* QEMU System Emulator block driver
*
* Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2020 Virtuozzo International GmbH.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
#include "qapi/qobject-output-visitor.h"
#include "qapi/qapi-visit-block-core.h"
#include "sysemu/block-backend.h"
-#include "sysemu/sysemu.h"
#include "qemu/notify.h"
#include "qemu/option.h"
#include "qemu/coroutine.h"
#include "qemu/timer.h"
#include "qemu/cutils.h"
#include "qemu/id.h"
-#include "qemu/transactions.h"
+#include "qemu/range.h"
+#include "qemu/rcu.h"
#include "block/coroutines.h"
#ifdef CONFIG_BSD
#include <sys/ioctl.h>
#include <sys/queue.h>
-#ifndef __DragonFly__
+#if defined(HAVE_SYS_DISK_H)
#include <sys/disk.h>
#endif
#endif
BdrvChildRole child_role,
Error **errp);
-static void bdrv_replace_child_noperm(BdrvChild *child,
- BlockDriverState *new_bs);
+static bool bdrv_recurse_has_child(BlockDriverState *bs,
+ BlockDriverState *child);
+
+static void bdrv_child_free(BdrvChild *child);
+static void bdrv_replace_child_noperm(BdrvChild **child,
+ BlockDriverState *new_bs,
+ bool free_empty_child);
+static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
+ BdrvChild *child,
+ Transaction *tran);
+static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
+ Transaction *tran);
-static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue
- *queue, Error **errp);
+static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue,
+ Transaction *change_child_tran, Error **errp);
static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
+static bool bdrv_backing_overridden(BlockDriverState *bs);
+
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;
* image is inactivated. */
bool bdrv_is_read_only(BlockDriverState *bs)
{
- return bs->read_only;
+ return !(bs->open_flags & BDRV_O_RDWR);
}
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
goto fail;
}
- bs->read_only = true;
bs->open_flags &= ~BDRV_O_RDWR;
return 0;
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
QLIST_INIT(&bs->op_blockers[i]);
}
- notifier_with_return_list_init(&bs->before_write_notifiers);
qemu_co_mutex_init(&bs->reqs_lock);
qemu_mutex_init(&bs->dirty_bitmap_mutex);
bs->refcnt = 1;
qemu_co_queue_init(&bs->flush_queue);
+ qemu_co_mutex_init(&bs->bsc_modify_lock);
+ bs->block_status_cache = g_new0(BdrvBlockStatusCache, 1);
+
for (i = 0; i < bdrv_drain_all_count; i++) {
bdrv_drained_begin(bs);
}
static char *bdrv_child_get_parent_desc(BdrvChild *c)
{
BlockDriverState *parent = c->opaque;
- return g_strdup(bdrv_get_device_or_node_name(parent));
+ return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
}
static void bdrv_child_cb_drained_begin(BdrvChild *child)
{
BlockDriverState *bs = child->opaque;
+ QLIST_INSERT_HEAD(&bs->children, child, next);
+
if (child->role & BDRV_CHILD_COW) {
bdrv_backing_attach(child);
}
}
bdrv_unapply_subtree_drain(child, bs);
+
+ QLIST_REMOVE(child, next);
}
static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
return 0;
}
-static AioContext *bdrv_child_cb_get_parent_aio_context(BdrvChild *c)
+AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
{
BlockDriverState *bs = c->opaque;
.can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
.set_aio_ctx = bdrv_child_cb_set_aio_ctx,
.update_filename = bdrv_child_cb_update_filename,
- .get_parent_aio_context = bdrv_child_cb_get_parent_aio_context,
+ .get_parent_aio_context = child_of_bds_get_parent_aio_context,
};
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
}
bs->drv = drv;
- bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
bs->opaque = g_malloc0(drv->instance_size);
if (drv->bdrv_file_open) {
return ret;
}
- bdrv_refresh_limits(bs, &local_err);
+ bdrv_refresh_limits(bs, NULL, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return -EINVAL;
return ret;
}
-BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
- int flags, Error **errp)
+/*
+ * Create and open a block node.
+ *
+ * @options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use qobject_ref() before calling bdrv_open.
+ */
+BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
+ const char *node_name,
+ QDict *options, int flags,
+ Error **errp)
{
BlockDriverState *bs;
int ret;
bs = bdrv_new();
bs->open_flags = flags;
- bs->explicit_options = qdict_new();
- bs->options = qdict_new();
+ bs->options = options ?: qdict_new();
+ bs->explicit_options = qdict_clone_shallow(bs->options);
bs->opaque = NULL;
update_options_from_flags(bs->options, flags);
return bs;
}
+/* Create and open a block node. */
+BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
+ int flags, Error **errp)
+{
+ return bdrv_new_open_driver_opts(drv, node_name, NULL, flags, errp);
+}
+
QemuOptsList bdrv_runtime_opts = {
.name = "bdrv_common",
.head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
QemuOpts *opts;
BlockDriver *drv;
Error *local_err = NULL;
+ bool ro;
assert(bs->file == NULL);
assert(options != NULL && bs->options != options);
trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
drv->format_name);
- bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
+ ro = bdrv_is_read_only(bs);
- if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
- if (!bs->read_only && bdrv_is_whitelisted(drv, true)) {
+ if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
+ if (!ro && bdrv_is_whitelisted(drv, true)) {
ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
} else {
ret = -ENOTSUP;
}
if (ret < 0) {
error_setg(errp,
- !bs->read_only && bdrv_is_whitelisted(drv, true)
+ !ro && bdrv_is_whitelisted(drv, true)
? "Driver '%s' can only be used for read-only devices"
: "Driver '%s' is not whitelisted",
drv->format_name);
assert(qatomic_read(&bs->copy_on_read) == 0);
if (bs->open_flags & BDRV_O_COPY_ON_READ) {
- if (!bs->read_only) {
+ if (!ro) {
bdrv_enable_copy_on_read(bs);
} else {
error_setg(errp, "Can't use copy-on-read on read-only device");
return 0;
}
-static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q,
- uint64_t new_used_perm,
- uint64_t new_shared_perm,
- GSList *ignore_children,
- Error **errp);
-
typedef struct BlockReopenQueueEntry {
bool prepared;
bool perms_checked;
static char *bdrv_child_user_desc(BdrvChild *c)
{
- if (c->klass->get_parent_desc) {
- return c->klass->get_parent_desc(c);
- }
-
- return g_strdup("another user");
+ return c->klass->get_parent_desc(c);
}
+/*
+ * Check that @a allows everything that @b needs. @a and @b must reference same
+ * child node.
+ */
static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
{
- g_autofree char *user = NULL;
- g_autofree char *perm_names = NULL;
+ const char *child_bs_name;
+ g_autofree char *a_user = NULL;
+ g_autofree char *b_user = NULL;
+ g_autofree char *perms = NULL;
+
+ assert(a->bs);
+ assert(a->bs == b->bs);
if ((b->perm & a->shared_perm) == b->perm) {
return true;
}
- perm_names = bdrv_perm_names(b->perm & ~a->shared_perm);
- user = bdrv_child_user_desc(a);
- error_setg(errp, "Conflicts with use by %s as '%s', which does not "
- "allow '%s' on %s",
- user, a->name, perm_names, bdrv_get_node_name(b->bs));
+ child_bs_name = bdrv_get_node_name(b->bs);
+ a_user = bdrv_child_user_desc(a);
+ b_user = bdrv_child_user_desc(b);
+ perms = bdrv_perm_names(b->perm & ~a->shared_perm);
+
+ error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
+ "both required by %s (uses node '%s' as '%s' child) and "
+ "unshared by %s (uses node '%s' as '%s' child).",
+ child_bs_name, perms,
+ b_user, child_bs_name, b->name,
+ a_user, child_bs_name, a->name);
return false;
}
-static bool bdrv_parent_perms_conflict(BlockDriverState *bs,
- GSList *ignore_children,
- Error **errp)
+static bool bdrv_parent_perms_conflict(BlockDriverState *bs, Error **errp)
{
BdrvChild *a, *b;
* directions.
*/
QLIST_FOREACH(a, &bs->parents, next_parent) {
- if (g_slist_find(ignore_children, a)) {
- continue;
- }
-
QLIST_FOREACH(b, &bs->parents, next_parent) {
- if (a == b || g_slist_find(ignore_children, b)) {
+ if (a == b) {
continue;
}
return g_slist_prepend(list, bs);
}
-static void bdrv_child_set_perm_commit(void *opaque)
-{
- BdrvChild *c = opaque;
-
- c->has_backup_perm = false;
-}
+typedef struct BdrvChildSetPermState {
+ BdrvChild *child;
+ uint64_t old_perm;
+ uint64_t old_shared_perm;
+} BdrvChildSetPermState;
static void bdrv_child_set_perm_abort(void *opaque)
{
- BdrvChild *c = opaque;
- /*
- * We may have child->has_backup_perm unset at this point, as in case of
- * _check_ stage of permission update failure we may _check_ not the whole
- * subtree. Still, _abort_ is called on the whole subtree anyway.
- */
- if (c->has_backup_perm) {
- c->perm = c->backup_perm;
- c->shared_perm = c->backup_shared_perm;
- c->has_backup_perm = false;
- }
+ BdrvChildSetPermState *s = opaque;
+
+ s->child->perm = s->old_perm;
+ s->child->shared_perm = s->old_shared_perm;
}
static TransactionActionDrv bdrv_child_set_pem_drv = {
.abort = bdrv_child_set_perm_abort,
- .commit = bdrv_child_set_perm_commit,
+ .clean = g_free,
};
-/*
- * With tran=NULL needs to be followed by direct call to either
- * bdrv_child_set_perm_commit() or bdrv_child_set_perm_abort().
- *
- * With non-NULL tran needs to be followed by tran_abort() or tran_commit()
- * instead.
- */
-static void bdrv_child_set_perm_safe(BdrvChild *c, uint64_t perm,
- uint64_t shared, Transaction *tran)
+static void bdrv_child_set_perm(BdrvChild *c, uint64_t perm,
+ uint64_t shared, Transaction *tran)
{
- if (!c->has_backup_perm) {
- c->has_backup_perm = true;
- c->backup_perm = c->perm;
- c->backup_shared_perm = c->shared_perm;
- }
- /*
- * Note: it's OK if c->has_backup_perm was already set, as we can find the
- * same c twice during check_perm procedure
- */
+ BdrvChildSetPermState *s = g_new(BdrvChildSetPermState, 1);
+
+ *s = (BdrvChildSetPermState) {
+ .child = c,
+ .old_perm = c->perm,
+ .old_shared_perm = c->shared_perm,
+ };
c->perm = perm;
c->shared_perm = shared;
- if (tran) {
- tran_add(tran, &bdrv_child_set_pem_drv, c);
- }
+ tran_add(tran, &bdrv_child_set_pem_drv, s);
}
static void bdrv_drv_set_perm_commit(void *opaque)
typedef struct BdrvReplaceChildState {
BdrvChild *child;
+ BdrvChild **childp;
BlockDriverState *old_bs;
+ bool free_empty_child;
} BdrvReplaceChildState;
static void bdrv_replace_child_commit(void *opaque)
{
BdrvReplaceChildState *s = opaque;
+ if (s->free_empty_child && !s->child->bs) {
+ bdrv_child_free(s->child);
+ }
bdrv_unref(s->old_bs);
}
BdrvReplaceChildState *s = opaque;
BlockDriverState *new_bs = s->child->bs;
- /* old_bs reference is transparently moved from @s to @s->child */
- bdrv_replace_child_noperm(s->child, s->old_bs);
+ /*
+ * old_bs reference is transparently moved from @s to s->child.
+ *
+ * Pass &s->child here instead of s->childp, because:
+ * (1) s->old_bs must be non-NULL, so bdrv_replace_child_noperm() will not
+ * modify the BdrvChild * pointer we indirectly pass to it, i.e. it
+ * will not modify s->child. From that perspective, it does not matter
+ * whether we pass s->childp or &s->child.
+ * (2) If new_bs is not NULL, s->childp will be NULL. We then cannot use
+ * it here.
+ * (3) If new_bs is NULL, *s->childp will have been NULLed by
+ * bdrv_replace_child_tran()'s bdrv_replace_child_noperm() call, and we
+ * must not pass a NULL *s->childp here.
+ *
+ * So whether new_bs was NULL or not, we cannot pass s->childp here; and in
+ * any case, there is no reason to pass it anyway.
+ */
+ bdrv_replace_child_noperm(&s->child, s->old_bs, true);
+ /*
+ * The child was pre-existing, so s->old_bs must be non-NULL, and
+ * s->child thus must not have been freed
+ */
+ assert(s->child != NULL);
+ if (!new_bs) {
+ /* As described above, *s->childp was cleared, so restore it */
+ assert(s->childp != NULL);
+ *s->childp = s->child;
+ }
bdrv_unref(new_bs);
}
};
/*
- * bdrv_replace_child_safe
+ * bdrv_replace_child_tran
*
* Note: real unref of old_bs is done only on commit.
+ *
+ * The function doesn't update permissions, caller is responsible for this.
+ *
+ * (*childp)->bs must not be NULL.
+ *
+ * Note that if new_bs == NULL, @childp is stored in a state object attached
+ * to @tran, so that the old child can be reinstated in the abort handler.
+ * Therefore, if @new_bs can be NULL, @childp must stay valid until the
+ * transaction is committed or aborted.
+ *
+ * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
+ * freed (on commit). @free_empty_child should only be false if the
+ * caller will free the BDrvChild themselves (which may be important
+ * if this is in turn called in another transactional context).
*/
-__attribute__((unused))
-static void bdrv_replace_child_safe(BdrvChild *child, BlockDriverState *new_bs,
- Transaction *tran)
+static void bdrv_replace_child_tran(BdrvChild **childp,
+ BlockDriverState *new_bs,
+ Transaction *tran,
+ bool free_empty_child)
{
BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
*s = (BdrvReplaceChildState) {
- .child = child,
- .old_bs = child->bs,
+ .child = *childp,
+ .childp = new_bs == NULL ? childp : NULL,
+ .old_bs = (*childp)->bs,
+ .free_empty_child = free_empty_child,
};
tran_add(tran, &bdrv_replace_child_drv, s);
+ /* The abort handler relies on this */
+ assert(s->old_bs != NULL);
+
if (new_bs) {
bdrv_ref(new_bs);
}
- bdrv_replace_child_noperm(child, new_bs);
- /* old_bs reference is transparently moved from @child to @s */
+ /*
+ * Pass free_empty_child=false, we will free the child (if
+ * necessary) in bdrv_replace_child_commit() (if our
+ * @free_empty_child parameter was true).
+ */
+ bdrv_replace_child_noperm(childp, new_bs, false);
+ /* old_bs reference is transparently moved from *childp to @s */
}
/*
- * Check whether permissions on this node can be changed in a way that
- * @cumulative_perms and @cumulative_shared_perms are the new cumulative
- * permissions of all its parents. This involves checking whether all necessary
- * permission changes to child nodes can be performed.
- *
- * A call to this function must always be followed by a call to bdrv_set_perm()
- * or bdrv_abort_perm_update().
+ * Refresh permissions in @bs subtree. The function is intended to be called
+ * after some graph modification that was done without permission update.
*/
-static int bdrv_node_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
- uint64_t cumulative_perms,
- uint64_t cumulative_shared_perms,
- GSList *ignore_children,
- Transaction *tran, Error **errp)
+static int bdrv_node_refresh_perm(BlockDriverState *bs, BlockReopenQueue *q,
+ Transaction *tran, Error **errp)
{
BlockDriver *drv = bs->drv;
BdrvChild *c;
int ret;
+ uint64_t cumulative_perms, cumulative_shared_perms;
+
+ bdrv_get_cumulative_perm(bs, &cumulative_perms, &cumulative_shared_perms);
/* Write permissions never work with read-only images */
if ((cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) &&
if (!bdrv_is_writable_after_reopen(bs, NULL)) {
error_setg(errp, "Block node is read-only");
} else {
- uint64_t current_perms, current_shared;
- bdrv_get_cumulative_perm(bs, ¤t_perms, ¤t_shared);
- if (current_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
- error_setg(errp, "Cannot make block node read-only, there is "
- "a writer on it");
- } else {
- error_setg(errp, "Cannot make block node read-only and create "
- "a writer on it");
- }
+ error_setg(errp, "Read-only block node '%s' cannot support "
+ "read-write users", bdrv_get_node_name(bs));
}
return -EPERM;
bdrv_child_perm(bs, c->bs, c, c->role, q,
cumulative_perms, cumulative_shared_perms,
&cur_perm, &cur_shared);
- bdrv_child_set_perm_safe(c, cur_perm, cur_shared, tran);
+ bdrv_child_set_perm(c, cur_perm, cur_shared, tran);
}
return 0;
}
-/*
- * If use_cumulative_perms is true, use cumulative_perms and
- * cumulative_shared_perms for first element of the list. Otherwise just refresh
- * all permissions.
- */
-static int bdrv_check_perm_common(GSList *list, BlockReopenQueue *q,
- bool use_cumulative_perms,
- uint64_t cumulative_perms,
- uint64_t cumulative_shared_perms,
- GSList *ignore_children,
- Transaction *tran, Error **errp)
+static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
+ Transaction *tran, Error **errp)
{
int ret;
BlockDriverState *bs;
- if (use_cumulative_perms) {
- bs = list->data;
-
- ret = bdrv_node_check_perm(bs, q, cumulative_perms,
- cumulative_shared_perms,
- ignore_children, tran, errp);
- if (ret < 0) {
- return ret;
- }
-
- list = list->next;
- }
-
for ( ; list; list = list->next) {
bs = list->data;
- if (bdrv_parent_perms_conflict(bs, ignore_children, errp)) {
+ if (bdrv_parent_perms_conflict(bs, errp)) {
return -EINVAL;
}
- bdrv_get_cumulative_perm(bs, &cumulative_perms,
- &cumulative_shared_perms);
-
- ret = bdrv_node_check_perm(bs, q, cumulative_perms,
- cumulative_shared_perms,
- ignore_children, tran, errp);
+ ret = bdrv_node_refresh_perm(bs, q, tran, errp);
if (ret < 0) {
return ret;
}
return 0;
}
-static int bdrv_check_perm(BlockDriverState *bs, BlockReopenQueue *q,
- uint64_t cumulative_perms,
- uint64_t cumulative_shared_perms,
- GSList *ignore_children, Error **errp)
-{
- g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
- return bdrv_check_perm_common(list, q, true, cumulative_perms,
- cumulative_shared_perms, ignore_children,
- NULL, errp);
-}
-
-static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
- Transaction *tran, Error **errp)
-{
- return bdrv_check_perm_common(list, q, false, 0, 0, NULL, tran, errp);
-}
-
-/*
- * Notifies drivers that after a previous bdrv_check_perm() call, the
- * permission update is not performed and any preparations made for it (e.g.
- * taken file locks) need to be undone.
- */
-static void bdrv_node_abort_perm_update(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- BdrvChild *c;
-
- if (!drv) {
- return;
- }
-
- bdrv_drv_set_perm_abort(bs);
-
- QLIST_FOREACH(c, &bs->children, next) {
- bdrv_child_set_perm_abort(c);
- }
-}
-
-static void bdrv_list_abort_perm_update(GSList *list)
-{
- for ( ; list; list = list->next) {
- bdrv_node_abort_perm_update((BlockDriverState *)list->data);
- }
-}
-
-static void bdrv_abort_perm_update(BlockDriverState *bs)
-{
- g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
- return bdrv_list_abort_perm_update(list);
-}
-
-static void bdrv_node_set_perm(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- BdrvChild *c;
-
- if (!drv) {
- return;
- }
-
- bdrv_drv_set_perm_commit(bs);
-
- /* Drivers that never have children can omit .bdrv_child_perm() */
- if (!drv->bdrv_child_perm) {
- assert(QLIST_EMPTY(&bs->children));
- return;
- }
-
- /* Update all children */
- QLIST_FOREACH(c, &bs->children, next) {
- bdrv_child_set_perm_commit(c);
- }
-}
-
-static void bdrv_list_set_perm(GSList *list)
-{
- for ( ; list; list = list->next) {
- bdrv_node_set_perm((BlockDriverState *)list->data);
- }
-}
-
-static void bdrv_set_perm(BlockDriverState *bs)
-{
- g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
- return bdrv_list_set_perm(list);
-}
-
void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
uint64_t *shared_perm)
{
{ BLK_PERM_WRITE, "write" },
{ BLK_PERM_WRITE_UNCHANGED, "write unchanged" },
{ BLK_PERM_RESIZE, "resize" },
- { BLK_PERM_GRAPH_MOD, "change children" },
{ 0, NULL }
};
return g_string_free(result, FALSE);
}
-/*
- * Checks whether a new reference to @bs can be added if the new user requires
- * @new_used_perm/@new_shared_perm as its permissions. If @ignore_children is
- * set, the BdrvChild objects in this list are ignored in the calculations;
- * this allows checking permission updates for an existing reference.
- *
- * Needs to be followed by a call to either bdrv_set_perm() or
- * bdrv_abort_perm_update(). */
-static int bdrv_check_update_perm(BlockDriverState *bs, BlockReopenQueue *q,
- uint64_t new_used_perm,
- uint64_t new_shared_perm,
- GSList *ignore_children,
- Error **errp)
-{
- BdrvChild *c;
- uint64_t cumulative_perms = new_used_perm;
- uint64_t cumulative_shared_perms = new_shared_perm;
-
-
- /* There is no reason why anyone couldn't tolerate write_unchanged */
- assert(new_shared_perm & BLK_PERM_WRITE_UNCHANGED);
-
- QLIST_FOREACH(c, &bs->parents, next_parent) {
- if (g_slist_find(ignore_children, c)) {
- continue;
- }
-
- if ((new_used_perm & c->shared_perm) != new_used_perm) {
- char *user = bdrv_child_user_desc(c);
- char *perm_names = bdrv_perm_names(new_used_perm & ~c->shared_perm);
-
- error_setg(errp, "Conflicts with use by %s as '%s', which does not "
- "allow '%s' on %s",
- user, c->name, perm_names, bdrv_get_node_name(c->bs));
- g_free(user);
- g_free(perm_names);
- return -EPERM;
- }
-
- if ((c->perm & new_shared_perm) != c->perm) {
- char *user = bdrv_child_user_desc(c);
- char *perm_names = bdrv_perm_names(c->perm & ~new_shared_perm);
-
- error_setg(errp, "Conflicts with use by %s as '%s', which uses "
- "'%s' on %s",
- user, c->name, perm_names, bdrv_get_node_name(c->bs));
- g_free(user);
- g_free(perm_names);
- return -EPERM;
- }
-
- cumulative_perms |= c->perm;
- cumulative_shared_perms &= c->shared_perm;
- }
-
- return bdrv_check_perm(bs, q, cumulative_perms, cumulative_shared_perms,
- ignore_children, errp);
-}
static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
{
Transaction *tran = tran_new();
int ret;
- bdrv_child_set_perm_safe(c, perm, shared, tran);
+ bdrv_child_set_perm(c, perm, shared, tran);
ret = bdrv_refresh_perms(c->bs, &local_err);
shared = 0;
}
- shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_GRAPH_MOD |
- BLK_PERM_WRITE_UNCHANGED;
+ shared |= BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
if (bs->open_flags & BDRV_O_INACTIVE) {
shared |= BLK_PERM_WRITE | BLK_PERM_RESIZE;
[BLOCK_PERMISSION_WRITE] = BLK_PERM_WRITE,
[BLOCK_PERMISSION_WRITE_UNCHANGED] = BLK_PERM_WRITE_UNCHANGED,
[BLOCK_PERMISSION_RESIZE] = BLK_PERM_RESIZE,
- [BLOCK_PERMISSION_GRAPH_MOD] = BLK_PERM_GRAPH_MOD,
};
QEMU_BUILD_BUG_ON(ARRAY_SIZE(permissions) != BLOCK_PERMISSION__MAX);
return permissions[qapi_perm];
}
-static void bdrv_replace_child_noperm(BdrvChild *child,
- BlockDriverState *new_bs)
+/**
+ * Replace (*childp)->bs by @new_bs.
+ *
+ * If @new_bs is NULL, *childp will be set to NULL, too: BDS parents
+ * generally cannot handle a BdrvChild with .bs == NULL, so clearing
+ * BdrvChild.bs should generally immediately be followed by the
+ * BdrvChild pointer being cleared as well.
+ *
+ * If @free_empty_child is true and @new_bs is NULL, the BdrvChild is
+ * freed. @free_empty_child should only be false if the caller will
+ * free the BdrvChild themselves (this may be important in a
+ * transactional context, where it may only be freed on commit).
+ */
+static void bdrv_replace_child_noperm(BdrvChild **childp,
+ BlockDriverState *new_bs,
+ bool free_empty_child)
{
+ BdrvChild *child = *childp;
BlockDriverState *old_bs = child->bs;
int new_bs_quiesce_counter;
int drain_saldo;
assert(!child->frozen);
+ assert(old_bs != new_bs);
if (old_bs && new_bs) {
assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
}
child->bs = new_bs;
+ if (!new_bs) {
+ *childp = NULL;
+ }
if (new_bs) {
QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
bdrv_parent_drained_end_single(child);
drain_saldo++;
}
+
+ if (free_empty_child && !child->bs) {
+ bdrv_child_free(child);
+ }
}
-/*
- * Updates @child to change its reference to point to @new_bs, including
- * checking and applying the necessary permission updates both to the old node
- * and to @new_bs.
- *
- * NULL is passed as @new_bs for removing the reference before freeing @child.
- *
- * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this
- * function uses bdrv_set_perm() to update the permissions according to the new
- * reference that @new_bs gets.
+/**
+ * Free the given @child.
*
- * Callers must ensure that child->frozen is false.
+ * The child must be empty (i.e. `child->bs == NULL`) and it must be
+ * unused (i.e. not in a children list).
*/
-static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
+static void bdrv_child_free(BdrvChild *child)
{
- BlockDriverState *old_bs = child->bs;
+ assert(!child->bs);
+ assert(!child->next.le_prev); /* not in children list */
+
+ g_free(child->name);
+ g_free(child);
+}
+
+typedef struct BdrvAttachChildCommonState {
+ BdrvChild **child;
+ AioContext *old_parent_ctx;
+ AioContext *old_child_ctx;
+} BdrvAttachChildCommonState;
- /* Asserts that child->frozen == false */
- bdrv_replace_child_noperm(child, new_bs);
+static void bdrv_attach_child_common_abort(void *opaque)
+{
+ BdrvAttachChildCommonState *s = opaque;
+ BdrvChild *child = *s->child;
+ BlockDriverState *bs = child->bs;
/*
- * Start with the new node's permissions. If @new_bs is a (direct
- * or indirect) child of @old_bs, we must complete the permission
- * update on @new_bs before we loosen the restrictions on @old_bs.
- * Otherwise, bdrv_check_perm() on @old_bs would re-initiate
- * updating the permissions of @new_bs, and thus not purely loosen
- * restrictions.
+ * Pass free_empty_child=false, because we still need the child
+ * for the AioContext operations on the parent below; those
+ * BdrvChildClass methods all work on a BdrvChild object, so we
+ * need to keep it as an empty shell (after this function, it will
+ * not be attached to any parent, and it will not have a .bs).
*/
- if (new_bs) {
- bdrv_set_perm(new_bs);
+ bdrv_replace_child_noperm(s->child, NULL, false);
+
+ if (bdrv_get_aio_context(bs) != s->old_child_ctx) {
+ bdrv_try_set_aio_context(bs, s->old_child_ctx, &error_abort);
}
- if (old_bs) {
- /*
- * Update permissions for old node. We're just taking a parent away, so
- * we're loosening restrictions. Errors of permission update are not
- * fatal in this case, ignore them.
- */
- bdrv_refresh_perms(old_bs, NULL);
+ if (bdrv_child_get_parent_aio_context(child) != s->old_parent_ctx) {
+ GSList *ignore;
- /* When the parent requiring a non-default AioContext is removed, the
- * node moves back to the main AioContext */
- bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
+ /* No need to ignore `child`, because it has been detached already */
+ ignore = NULL;
+ child->klass->can_set_aio_ctx(child, s->old_parent_ctx, &ignore,
+ &error_abort);
+ g_slist_free(ignore);
+
+ ignore = NULL;
+ child->klass->set_aio_ctx(child, s->old_parent_ctx, &ignore);
+ g_slist_free(ignore);
}
+
+ bdrv_unref(bs);
+ bdrv_child_free(child);
}
+static TransactionActionDrv bdrv_attach_child_common_drv = {
+ .abort = bdrv_attach_child_common_abort,
+ .clean = g_free,
+};
+
/*
- * This function steals the reference to child_bs from the caller.
- * That reference is later dropped by bdrv_root_unref_child().
+ * Common part of attaching bdrv child to bs or to blk or to job
*
- * On failure NULL is returned, errp is set and the reference to
- * child_bs is also dropped.
+ * Resulting new child is returned through @child.
+ * At start *@child must be NULL.
+ * @child is saved to a new entry of @tran, so that *@child could be reverted to
+ * NULL on abort(). So referenced variable must live at least until transaction
+ * end.
*
- * The caller must hold the AioContext lock @child_bs, but not that of @ctx
- * (unless @child_bs is already in @ctx).
+ * Function doesn't update permissions, caller is responsible for this.
*/
-BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
- const char *child_name,
- const BdrvChildClass *child_class,
- BdrvChildRole child_role,
- uint64_t perm, uint64_t shared_perm,
- void *opaque, Error **errp)
-{
- BdrvChild *child;
- Error *local_err = NULL;
- int ret;
- AioContext *ctx;
-
- ret = bdrv_check_update_perm(child_bs, NULL, perm, shared_perm, NULL, errp);
- if (ret < 0) {
- bdrv_abort_perm_update(child_bs);
- bdrv_unref(child_bs);
- return NULL;
- }
-
- child = g_new(BdrvChild, 1);
- *child = (BdrvChild) {
+static int bdrv_attach_child_common(BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ uint64_t perm, uint64_t shared_perm,
+ void *opaque, BdrvChild **child,
+ Transaction *tran, Error **errp)
+{
+ BdrvChild *new_child;
+ AioContext *parent_ctx;
+ AioContext *child_ctx = bdrv_get_aio_context(child_bs);
+
+ assert(child);
+ assert(*child == NULL);
+ assert(child_class->get_parent_desc);
+
+ new_child = g_new(BdrvChild, 1);
+ *new_child = (BdrvChild) {
.bs = NULL,
.name = g_strdup(child_name),
.klass = child_class,
.opaque = opaque,
};
- ctx = bdrv_child_get_parent_aio_context(child);
-
- /* If the AioContexts don't match, first try to move the subtree of
+ /*
+ * If the AioContexts don't match, first try to move the subtree of
* child_bs into the AioContext of the new parent. If this doesn't work,
- * try moving the parent into the AioContext of child_bs instead. */
- if (bdrv_get_aio_context(child_bs) != ctx) {
- ret = bdrv_try_set_aio_context(child_bs, ctx, &local_err);
+ * try moving the parent into the AioContext of child_bs instead.
+ */
+ parent_ctx = bdrv_child_get_parent_aio_context(new_child);
+ if (child_ctx != parent_ctx) {
+ Error *local_err = NULL;
+ int ret = bdrv_try_set_aio_context(child_bs, parent_ctx, &local_err);
+
if (ret < 0 && child_class->can_set_aio_ctx) {
- GSList *ignore = g_slist_prepend(NULL, child);
- ctx = bdrv_get_aio_context(child_bs);
- if (child_class->can_set_aio_ctx(child, ctx, &ignore, NULL)) {
+ GSList *ignore = g_slist_prepend(NULL, new_child);
+ if (child_class->can_set_aio_ctx(new_child, child_ctx, &ignore,
+ NULL))
+ {
error_free(local_err);
ret = 0;
g_slist_free(ignore);
- ignore = g_slist_prepend(NULL, child);
- child_class->set_aio_ctx(child, ctx, &ignore);
+ ignore = g_slist_prepend(NULL, new_child);
+ child_class->set_aio_ctx(new_child, child_ctx, &ignore);
}
g_slist_free(ignore);
}
+
if (ret < 0) {
error_propagate(errp, local_err);
- g_free(child);
- bdrv_abort_perm_update(child_bs);
- bdrv_unref(child_bs);
- return NULL;
+ bdrv_child_free(new_child);
+ return ret;
}
}
- /* This performs the matching bdrv_set_perm() for the above check. */
- bdrv_replace_child(child, child_bs);
+ bdrv_ref(child_bs);
+ bdrv_replace_child_noperm(&new_child, child_bs, true);
+ /* child_bs was non-NULL, so new_child must not have been freed */
+ assert(new_child != NULL);
- return child;
+ *child = new_child;
+
+ BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
+ *s = (BdrvAttachChildCommonState) {
+ .child = child,
+ .old_parent_ctx = parent_ctx,
+ .old_child_ctx = child_ctx,
+ };
+ tran_add(tran, &bdrv_attach_child_common_drv, s);
+
+ return 0;
}
/*
- * This function transfers the reference to child_bs from the caller
- * to parent_bs. That reference is later dropped by parent_bs on
- * bdrv_close() or if someone calls bdrv_unref_child().
- *
- * On failure NULL is returned, errp is set and the reference to
- * child_bs is also dropped.
+ * Variable referenced by @child must live at least until transaction end.
+ * (see bdrv_attach_child_common() doc for details)
*
- * If @parent_bs and @child_bs are in different AioContexts, the caller must
- * hold the AioContext lock for @child_bs, but not for @parent_bs.
+ * Function doesn't update permissions, caller is responsible for this.
*/
-BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
- BlockDriverState *child_bs,
- const char *child_name,
- const BdrvChildClass *child_class,
- BdrvChildRole child_role,
- Error **errp)
+static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
+ BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ BdrvChild **child,
+ Transaction *tran,
+ Error **errp)
{
- BdrvChild *child;
+ int ret;
uint64_t perm, shared_perm;
- bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
-
assert(parent_bs->drv);
+
+ if (bdrv_recurse_has_child(child_bs, parent_bs)) {
+ error_setg(errp, "Making '%s' a %s child of '%s' would create a cycle",
+ child_bs->node_name, child_name, parent_bs->node_name);
+ return -EINVAL;
+ }
+
+ bdrv_get_cumulative_perm(parent_bs, &perm, &shared_perm);
bdrv_child_perm(parent_bs, child_bs, NULL, child_role, NULL,
perm, shared_perm, &perm, &shared_perm);
- child = bdrv_root_attach_child(child_bs, child_name, child_class,
+ ret = bdrv_attach_child_common(child_bs, child_name, child_class,
child_role, perm, shared_perm, parent_bs,
- errp);
- if (child == NULL) {
- return NULL;
+ child, tran, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void bdrv_detach_child(BdrvChild **childp)
+{
+ BlockDriverState *old_bs = (*childp)->bs;
+
+ bdrv_replace_child_noperm(childp, NULL, true);
+
+ if (old_bs) {
+ /*
+ * Update permissions for old node. We're just taking a parent away, so
+ * we're loosening restrictions. Errors of permission update are not
+ * fatal in this case, ignore them.
+ */
+ bdrv_refresh_perms(old_bs, NULL);
+
+ /*
+ * When the parent requiring a non-default AioContext is removed, the
+ * node moves back to the main AioContext
+ */
+ bdrv_try_set_aio_context(old_bs, qemu_get_aio_context(), NULL);
+ }
+}
+
+/*
+ * This function steals the reference to child_bs from the caller.
+ * That reference is later dropped by bdrv_root_unref_child().
+ *
+ * On failure NULL is returned, errp is set and the reference to
+ * child_bs is also dropped.
+ *
+ * The caller must hold the AioContext lock @child_bs, but not that of @ctx
+ * (unless @child_bs is already in @ctx).
+ */
+BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ uint64_t perm, uint64_t shared_perm,
+ void *opaque, Error **errp)
+{
+ int ret;
+ BdrvChild *child = NULL;
+ Transaction *tran = tran_new();
+
+ ret = bdrv_attach_child_common(child_bs, child_name, child_class,
+ child_role, perm, shared_perm, opaque,
+ &child, tran, errp);
+ if (ret < 0) {
+ goto out;
}
- QLIST_INSERT_HEAD(&parent_bs->children, child, next);
+ ret = bdrv_refresh_perms(child_bs, errp);
+
+out:
+ tran_finalize(tran, ret);
+ /* child is unset on failure by bdrv_attach_child_common_abort() */
+ assert((ret < 0) == !child);
+
+ bdrv_unref(child_bs);
return child;
}
-static void bdrv_detach_child(BdrvChild *child)
+/*
+ * This function transfers the reference to child_bs from the caller
+ * to parent_bs. That reference is later dropped by parent_bs on
+ * bdrv_close() or if someone calls bdrv_unref_child().
+ *
+ * On failure NULL is returned, errp is set and the reference to
+ * child_bs is also dropped.
+ *
+ * If @parent_bs and @child_bs are in different AioContexts, the caller must
+ * hold the AioContext lock for @child_bs, but not for @parent_bs.
+ */
+BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
+ BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildClass *child_class,
+ BdrvChildRole child_role,
+ Error **errp)
{
- QLIST_SAFE_REMOVE(child, next);
+ int ret;
+ BdrvChild *child = NULL;
+ Transaction *tran = tran_new();
- bdrv_replace_child(child, NULL);
+ ret = bdrv_attach_child_noperm(parent_bs, child_bs, child_name, child_class,
+ child_role, &child, tran, errp);
+ if (ret < 0) {
+ goto out;
+ }
- g_free(child->name);
- g_free(child);
+ ret = bdrv_refresh_perms(parent_bs, errp);
+ if (ret < 0) {
+ goto out;
+ }
+
+out:
+ tran_finalize(tran, ret);
+ /* child is unset on failure by bdrv_attach_child_common_abort() */
+ assert((ret < 0) == !child);
+
+ bdrv_unref(child_bs);
+
+ return child;
}
/* Callers must ensure that child->frozen is false. */
BlockDriverState *child_bs;
child_bs = child->bs;
- bdrv_detach_child(child);
+ bdrv_detach_child(&child);
bdrv_unref(child_bs);
}
+typedef struct BdrvSetInheritsFrom {
+ BlockDriverState *bs;
+ BlockDriverState *old_inherits_from;
+} BdrvSetInheritsFrom;
+
+static void bdrv_set_inherits_from_abort(void *opaque)
+{
+ BdrvSetInheritsFrom *s = opaque;
+
+ s->bs->inherits_from = s->old_inherits_from;
+}
+
+static TransactionActionDrv bdrv_set_inherits_from_drv = {
+ .abort = bdrv_set_inherits_from_abort,
+ .clean = g_free,
+};
+
+/* @tran is allowed to be NULL. In this case no rollback is possible */
+static void bdrv_set_inherits_from(BlockDriverState *bs,
+ BlockDriverState *new_inherits_from,
+ Transaction *tran)
+{
+ if (tran) {
+ BdrvSetInheritsFrom *s = g_new(BdrvSetInheritsFrom, 1);
+
+ *s = (BdrvSetInheritsFrom) {
+ .bs = bs,
+ .old_inherits_from = bs->inherits_from,
+ };
+
+ tran_add(tran, &bdrv_set_inherits_from_drv, s);
+ }
+
+ bs->inherits_from = new_inherits_from;
+}
+
/**
* Clear all inherits_from pointers from children and grandchildren of
* @root that point to @root, where necessary.
+ * @tran is allowed to be NULL. In this case no rollback is possible
*/
-static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child)
+static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child,
+ Transaction *tran)
{
BdrvChild *c;
}
}
if (c == NULL) {
- child->bs->inherits_from = NULL;
+ bdrv_set_inherits_from(child->bs, NULL, tran);
}
}
QLIST_FOREACH(c, &child->bs->children, next) {
- bdrv_unset_inherits_from(root, c);
+ bdrv_unset_inherits_from(root, c, tran);
}
}
return;
}
- bdrv_unset_inherits_from(parent, child);
+ bdrv_unset_inherits_from(parent, child, NULL);
bdrv_root_unref_child(child);
}
}
/*
- * Sets the bs->backing link of a BDS. A new reference is created; callers
- * which don't need their own reference any more must call bdrv_unref().
+ * Sets the bs->backing or bs->file link of a BDS. A new reference is created;
+ * callers which don't need their own reference any more must call bdrv_unref().
+ *
+ * Function doesn't update permissions, caller is responsible for this.
*/
-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
- Error **errp)
+static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
+ BlockDriverState *child_bs,
+ bool is_backing,
+ Transaction *tran, Error **errp)
{
int ret = 0;
- bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
- bdrv_inherits_from_recursive(backing_hd, bs);
+ bool update_inherits_from =
+ bdrv_inherits_from_recursive(child_bs, parent_bs);
+ BdrvChild *child = is_backing ? parent_bs->backing : parent_bs->file;
+ BdrvChildRole role;
- if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) {
+ if (!parent_bs->drv) {
+ /*
+ * Node without drv is an object without a class :/. TODO: finally fix
+ * qcow2 driver to never clear bs->drv and implement format corruption
+ * handling in other way.
+ */
+ error_setg(errp, "Node corrupted");
+ return -EINVAL;
+ }
+
+ if (child && child->frozen) {
+ error_setg(errp, "Cannot change frozen '%s' link from '%s' to '%s'",
+ child->name, parent_bs->node_name, child->bs->node_name);
return -EPERM;
}
- if (backing_hd) {
- bdrv_ref(backing_hd);
+ if (is_backing && !parent_bs->drv->is_filter &&
+ !parent_bs->drv->supports_backing)
+ {
+ error_setg(errp, "Driver '%s' of node '%s' does not support backing "
+ "files", parent_bs->drv->format_name, parent_bs->node_name);
+ return -EINVAL;
}
- if (bs->backing) {
- /* Cannot be frozen, we checked that above */
- bdrv_unref_child(bs, bs->backing);
- bs->backing = NULL;
+ if (parent_bs->drv->is_filter) {
+ role = BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY;
+ } else if (is_backing) {
+ role = BDRV_CHILD_COW;
+ } else {
+ /*
+ * We only can use same role as it is in existing child. We don't have
+ * infrastructure to determine role of file child in generic way
+ */
+ if (!child) {
+ error_setg(errp, "Cannot set file child to format node without "
+ "file child");
+ return -EINVAL;
+ }
+ role = child->role;
}
- if (!backing_hd) {
- goto out;
+ if (child) {
+ bdrv_unset_inherits_from(parent_bs, child, tran);
+ bdrv_remove_file_or_backing_child(parent_bs, child, tran);
}
- bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_of_bds,
- bdrv_backing_role(bs), errp);
- if (!bs->backing) {
- ret = -EPERM;
+ if (!child_bs) {
goto out;
}
- /* If backing_hd was already part of bs's backing chain, and
- * inherits_from pointed recursively to bs then let's update it to
- * point directly to bs (else it will become NULL). */
+ ret = bdrv_attach_child_noperm(parent_bs, child_bs,
+ is_backing ? "backing" : "file",
+ &child_of_bds, role,
+ is_backing ? &parent_bs->backing :
+ &parent_bs->file,
+ tran, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+
+ /*
+ * If inherits_from pointed recursively to bs then let's update it to
+ * point directly to bs (else it will become NULL).
+ */
if (update_inherits_from) {
- backing_hd->inherits_from = bs;
+ bdrv_set_inherits_from(child_bs, parent_bs, tran);
+ }
+
+out:
+ bdrv_refresh_limits(parent_bs, tran, NULL);
+
+ return 0;
+}
+
+static int bdrv_set_backing_noperm(BlockDriverState *bs,
+ BlockDriverState *backing_hd,
+ Transaction *tran, Error **errp)
+{
+ return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
+}
+
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+ Error **errp)
+{
+ int ret;
+ Transaction *tran = tran_new();
+
+ ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
+ if (ret < 0) {
+ goto out;
}
+ ret = bdrv_refresh_perms(bs, errp);
out:
- bdrv_refresh_limits(bs, NULL);
+ tran_finalize(tran, ret);
return ret;
}
bs_entry->state.explicit_options = explicit_options;
bs_entry->state.flags = flags;
- /* This needs to be overwritten in bdrv_reopen_prepare() */
- bs_entry->state.perm = UINT64_MAX;
- bs_entry->state.shared_perm = 0;
-
/*
* If keep_old_opts is false then it means that unspecified
* options must be reset to their original value. We don't allow
NULL, 0, keep_old_opts);
}
+void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
+{
+ if (bs_queue) {
+ BlockReopenQueueEntry *bs_entry, *next;
+ QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+ qobject_unref(bs_entry->state.explicit_options);
+ qobject_unref(bs_entry->state.options);
+ g_free(bs_entry);
+ }
+ g_free(bs_queue);
+ }
+}
+
/*
* Reopen multiple BlockDriverStates atomically & transactionally.
*
*
* All affected nodes must be drained between bdrv_reopen_queue() and
* bdrv_reopen_multiple().
+ *
+ * To be called from the main thread, with all other AioContexts unlocked.
*/
int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
{
int ret = -1;
BlockReopenQueueEntry *bs_entry, *next;
+ AioContext *ctx;
+ Transaction *tran = tran_new();
+ g_autoptr(GHashTable) found = NULL;
+ g_autoptr(GSList) refresh_list = NULL;
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
assert(bs_queue != NULL);
+ QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
+ ctx = bdrv_get_aio_context(bs_entry->state.bs);
+ aio_context_acquire(ctx);
+ ret = bdrv_flush(bs_entry->state.bs);
+ aio_context_release(ctx);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Error flushing drive");
+ goto abort;
+ }
+ }
+
QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
assert(bs_entry->state.bs->quiesce_counter > 0);
- if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, errp)) {
- goto cleanup;
+ ctx = bdrv_get_aio_context(bs_entry->state.bs);
+ aio_context_acquire(ctx);
+ ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp);
+ aio_context_release(ctx);
+ if (ret < 0) {
+ goto abort;
}
bs_entry->prepared = true;
}
+ found = g_hash_table_new(NULL, NULL);
QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
BDRVReopenState *state = &bs_entry->state;
- ret = bdrv_check_perm(state->bs, bs_queue, state->perm,
- state->shared_perm, NULL, errp);
- if (ret < 0) {
- goto cleanup_perm;
- }
- /* Check if new_backing_bs would accept the new permissions */
- if (state->replace_backing_bs && state->new_backing_bs) {
- uint64_t nperm, nshared;
- bdrv_child_perm(state->bs, state->new_backing_bs,
- NULL, bdrv_backing_role(state->bs),
- bs_queue, state->perm, state->shared_perm,
- &nperm, &nshared);
- ret = bdrv_check_update_perm(state->new_backing_bs, NULL,
- nperm, nshared, NULL, errp);
- if (ret < 0) {
- goto cleanup_perm;
- }
+
+ refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs);
+ if (state->old_backing_bs) {
+ refresh_list = bdrv_topological_dfs(refresh_list, found,
+ state->old_backing_bs);
+ }
+ if (state->old_file_bs) {
+ refresh_list = bdrv_topological_dfs(refresh_list, found,
+ state->old_file_bs);
}
- bs_entry->perms_checked = true;
+ }
+
+ /*
+ * Note that file-posix driver rely on permission update done during reopen
+ * (even if no permission changed), because it wants "new" permissions for
+ * reconfiguring the fd and that's why it does it in raw_check_perm(), not
+ * in raw_reopen_prepare() which is called with "old" permissions.
+ */
+ ret = bdrv_list_refresh_perms(refresh_list, bs_queue, tran, errp);
+ if (ret < 0) {
+ goto abort;
}
/*
* to first element.
*/
QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
+ ctx = bdrv_get_aio_context(bs_entry->state.bs);
+ aio_context_acquire(ctx);
bdrv_reopen_commit(&bs_entry->state);
+ aio_context_release(ctx);
}
- ret = 0;
-cleanup_perm:
- QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
- BDRVReopenState *state = &bs_entry->state;
-
- if (!bs_entry->perms_checked) {
- continue;
- }
-
- if (ret == 0) {
- uint64_t perm, shared;
+ tran_commit(tran);
- bdrv_get_cumulative_perm(state->bs, &perm, &shared);
- assert(perm == state->perm);
- assert(shared == state->shared_perm);
+ QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
+ BlockDriverState *bs = bs_entry->state.bs;
- bdrv_set_perm(state->bs);
- } else {
- bdrv_abort_perm_update(state->bs);
- if (state->replace_backing_bs && state->new_backing_bs) {
- bdrv_abort_perm_update(state->new_backing_bs);
- }
+ if (bs->drv->bdrv_reopen_commit_post) {
+ ctx = bdrv_get_aio_context(bs);
+ aio_context_acquire(ctx);
+ bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
+ aio_context_release(ctx);
}
}
- if (ret == 0) {
- QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
- BlockDriverState *bs = bs_entry->state.bs;
+ ret = 0;
+ goto cleanup;
- if (bs->drv->bdrv_reopen_commit_post)
- bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
- }
- }
-cleanup:
+abort:
+ tran_abort(tran);
QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
- if (ret) {
- if (bs_entry->prepared) {
- bdrv_reopen_abort(&bs_entry->state);
- }
- qobject_unref(bs_entry->state.explicit_options);
- qobject_unref(bs_entry->state.options);
- }
- if (bs_entry->state.new_backing_bs) {
- bdrv_unref(bs_entry->state.new_backing_bs);
+ if (bs_entry->prepared) {
+ ctx = bdrv_get_aio_context(bs_entry->state.bs);
+ aio_context_acquire(ctx);
+ bdrv_reopen_abort(&bs_entry->state);
+ aio_context_release(ctx);
}
- g_free(bs_entry);
}
- g_free(bs_queue);
+
+cleanup:
+ bdrv_reopen_queue_free(bs_queue);
return ret;
}
-int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
- Error **errp)
+int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
+ Error **errp)
{
- int ret;
+ AioContext *ctx = bdrv_get_aio_context(bs);
BlockReopenQueue *queue;
- QDict *opts = qdict_new();
-
- qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
+ int ret;
bdrv_subtree_drained_begin(bs);
- queue = bdrv_reopen_queue(NULL, bs, opts, true);
- ret = bdrv_reopen_multiple(queue, errp);
- bdrv_subtree_drained_end(bs);
-
- return ret;
-}
-
-static BlockReopenQueueEntry *find_parent_in_reopen_queue(BlockReopenQueue *q,
- BdrvChild *c)
-{
- BlockReopenQueueEntry *entry;
-
- QTAILQ_FOREACH(entry, q, entry) {
- BlockDriverState *bs = entry->state.bs;
- BdrvChild *child;
-
- QLIST_FOREACH(child, &bs->children, next) {
- if (child == c) {
- return entry;
- }
- }
+ if (ctx != qemu_get_aio_context()) {
+ aio_context_release(ctx);
}
- return NULL;
-}
-
-static void bdrv_reopen_perm(BlockReopenQueue *q, BlockDriverState *bs,
- uint64_t *perm, uint64_t *shared)
-{
- BdrvChild *c;
- BlockReopenQueueEntry *parent;
- uint64_t cumulative_perms = 0;
- uint64_t cumulative_shared_perms = BLK_PERM_ALL;
-
- QLIST_FOREACH(c, &bs->parents, next_parent) {
- parent = find_parent_in_reopen_queue(q, c);
- if (!parent) {
- cumulative_perms |= c->perm;
- cumulative_shared_perms &= c->shared_perm;
- } else {
- uint64_t nperm, nshared;
-
- bdrv_child_perm(parent->state.bs, bs, c, c->role, q,
- parent->state.perm, parent->state.shared_perm,
- &nperm, &nshared);
+ queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
+ ret = bdrv_reopen_multiple(queue, errp);
- cumulative_perms |= nperm;
- cumulative_shared_perms &= nshared;
- }
+ if (ctx != qemu_get_aio_context()) {
+ aio_context_acquire(ctx);
}
- *perm = cumulative_perms;
- *shared = cumulative_shared_perms;
+ bdrv_subtree_drained_end(bs);
+
+ return ret;
}
-static bool bdrv_reopen_can_attach(BlockDriverState *parent,
- BdrvChild *child,
- BlockDriverState *new_child,
- Error **errp)
+int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
+ Error **errp)
{
- AioContext *parent_ctx = bdrv_get_aio_context(parent);
- AioContext *child_ctx = bdrv_get_aio_context(new_child);
- GSList *ignore;
- bool ret;
+ QDict *opts = qdict_new();
- ignore = g_slist_prepend(NULL, child);
- ret = bdrv_can_set_aio_context(new_child, parent_ctx, &ignore, NULL);
- g_slist_free(ignore);
- if (ret) {
- return ret;
- }
+ qdict_put_bool(opts, BDRV_OPT_READ_ONLY, read_only);
- ignore = g_slist_prepend(NULL, child);
- ret = bdrv_can_set_aio_context(parent, child_ctx, &ignore, errp);
- g_slist_free(ignore);
- return ret;
+ return bdrv_reopen(bs, opts, true, errp);
}
/*
*
* Return 0 on success, otherwise return < 0 and set @errp.
*/
-static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
- Error **errp)
+static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
+ bool is_backing, Transaction *tran,
+ Error **errp)
{
BlockDriverState *bs = reopen_state->bs;
- BlockDriverState *overlay_bs, *below_bs, *new_backing_bs;
+ BlockDriverState *new_child_bs;
+ BlockDriverState *old_child_bs = is_backing ? child_bs(bs->backing) :
+ child_bs(bs->file);
+ const char *child_name = is_backing ? "backing" : "file";
QObject *value;
const char *str;
- value = qdict_get(reopen_state->options, "backing");
+ value = qdict_get(reopen_state->options, child_name);
if (value == NULL) {
return 0;
}
switch (qobject_type(value)) {
case QTYPE_QNULL:
- new_backing_bs = NULL;
+ assert(is_backing); /* The 'file' option does not allow a null value */
+ new_child_bs = NULL;
break;
case QTYPE_QSTRING:
str = qstring_get_str(qobject_to(QString, value));
- new_backing_bs = bdrv_lookup_bs(NULL, str, errp);
- if (new_backing_bs == NULL) {
+ new_child_bs = bdrv_lookup_bs(NULL, str, errp);
+ if (new_child_bs == NULL) {
return -EINVAL;
- } else if (bdrv_recurse_has_child(new_backing_bs, bs)) {
- error_setg(errp, "Making '%s' a backing file of '%s' "
- "would create a cycle", str, bs->node_name);
+ } else if (bdrv_recurse_has_child(new_child_bs, bs)) {
+ error_setg(errp, "Making '%s' a %s child of '%s' would create a "
+ "cycle", str, child_name, bs->node_name);
return -EINVAL;
}
break;
default:
- /* 'backing' does not allow any other data type */
+ /*
+ * The options QDict has been flattened, so 'backing' and 'file'
+ * do not allow any other data type here.
+ */
g_assert_not_reached();
}
- /*
- * Check AioContext compatibility so that the bdrv_set_backing_hd() call in
- * bdrv_reopen_commit() won't fail.
- */
- if (new_backing_bs) {
- if (!bdrv_reopen_can_attach(bs, bs->backing, new_backing_bs, errp)) {
- return -EINVAL;
- }
- }
-
- /*
- * Ensure that @bs can really handle backing files, because we are
- * about to give it one (or swap the existing one)
- */
- if (bs->drv->is_filter) {
- /* Filters always have a file or a backing child */
- if (!bs->backing) {
- error_setg(errp, "'%s' is a %s filter node that does not support a "
- "backing child", bs->node_name, bs->drv->format_name);
- return -EINVAL;
- }
- } else if (!bs->drv->supports_backing) {
- error_setg(errp, "Driver '%s' of node '%s' does not support backing "
- "files", bs->drv->format_name, bs->node_name);
- return -EINVAL;
+ if (old_child_bs == new_child_bs) {
+ return 0;
}
- /*
- * Find the "actual" backing file by skipping all links that point
- * to an implicit node, if any (e.g. a commit filter node).
- * We cannot use any of the bdrv_skip_*() functions here because
- * those return the first explicit node, while we are looking for
- * its overlay here.
- */
- overlay_bs = bs;
- for (below_bs = bdrv_filter_or_cow_bs(overlay_bs);
- below_bs && below_bs->implicit;
- below_bs = bdrv_filter_or_cow_bs(overlay_bs))
- {
- overlay_bs = below_bs;
- }
+ if (old_child_bs) {
+ if (bdrv_skip_implicit_filters(old_child_bs) == new_child_bs) {
+ return 0;
+ }
- /* If we want to replace the backing file we need some extra checks */
- if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) {
- /* Check for implicit nodes between bs and its backing file */
- if (bs != overlay_bs) {
- error_setg(errp, "Cannot change backing link if '%s' has "
- "an implicit backing file", bs->node_name);
+ if (old_child_bs->implicit) {
+ error_setg(errp, "Cannot replace implicit %s child of %s",
+ child_name, bs->node_name);
return -EPERM;
}
+ }
+
+ if (bs->drv->is_filter && !old_child_bs) {
/*
- * Check if the backing link that we want to replace is frozen.
- * Note that
- * bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing,
- * because we know that overlay_bs == bs, and that @bs
- * either is a filter that uses ->backing or a COW format BDS
- * with bs->drv->supports_backing == true.
+ * Filters always have a file or a backing child, so we are trying to
+ * change wrong child
*/
- if (bdrv_is_backing_chain_frozen(overlay_bs,
- child_bs(overlay_bs->backing), errp))
- {
- return -EPERM;
- }
- reopen_state->replace_backing_bs = true;
- if (new_backing_bs) {
- bdrv_ref(new_backing_bs);
- reopen_state->new_backing_bs = new_backing_bs;
- }
+ error_setg(errp, "'%s' is a %s filter node that does not support a "
+ "%s child", bs->node_name, bs->drv->format_name, child_name);
+ return -EINVAL;
}
- return 0;
+ if (is_backing) {
+ reopen_state->old_backing_bs = old_child_bs;
+ } else {
+ reopen_state->old_file_bs = old_child_bs;
+ }
+
+ return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
+ tran, errp);
}
/*
*
*/
static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
- BlockReopenQueue *queue, Error **errp)
+ BlockReopenQueue *queue,
+ Transaction *change_child_tran, Error **errp)
{
int ret = -1;
int old_flags;
goto error;
}
- /* Calculate required permissions after reopening */
- bdrv_reopen_perm(queue, reopen_state->bs,
- &reopen_state->perm, &reopen_state->shared_perm);
-
- ret = bdrv_flush(reopen_state->bs);
- if (ret) {
- error_setg_errno(errp, -ret, "Error flushing drive");
- goto error;
- }
-
if (drv->bdrv_reopen_prepare) {
/*
* If a driver-specific option is missing, it means that we
* either a reference to an existing node (using its node name)
* or NULL to simply detach the current backing file.
*/
- ret = bdrv_reopen_parse_backing(reopen_state, errp);
+ ret = bdrv_reopen_parse_file_or_backing(reopen_state, true,
+ change_child_tran, errp);
if (ret < 0) {
goto error;
}
qdict_del(reopen_state->options, "backing");
+ /* Allow changing the 'file' option. In this case NULL is not allowed */
+ ret = bdrv_reopen_parse_file_or_backing(reopen_state, false,
+ change_child_tran, errp);
+ if (ret < 0) {
+ goto error;
+ }
+ qdict_del(reopen_state->options, "file");
+
/* Options that are not handled are only okay if they are unchanged
* compared to the old state. It is expected that some options are only
* used for the initial open, but not reopen (e.g. filename) */
/* set BDS specific flags now */
qobject_unref(bs->explicit_options);
qobject_unref(bs->options);
+ qobject_ref(reopen_state->explicit_options);
+ qobject_ref(reopen_state->options);
bs->explicit_options = reopen_state->explicit_options;
bs->options = reopen_state->options;
bs->open_flags = reopen_state->flags;
- bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
bs->detect_zeroes = reopen_state->detect_zeroes;
- if (reopen_state->replace_backing_bs) {
- qdict_del(bs->explicit_options, "backing");
- qdict_del(bs->options, "backing");
- }
-
/* Remove child references from bs->options and bs->explicit_options.
* Child options were already removed in bdrv_reopen_queue_child() */
QLIST_FOREACH(child, &bs->children, next) {
qdict_del(bs->explicit_options, child->name);
qdict_del(bs->options, child->name);
}
+ /* backing is probably removed, so it's not handled by previous loop */
+ qdict_del(bs->explicit_options, "backing");
+ qdict_del(bs->options, "backing");
- /*
- * Change the backing file if a new one was specified. We do this
- * after updating bs->options, so bdrv_refresh_filename() (called
- * from bdrv_set_backing_hd()) has the new values.
- */
- if (reopen_state->replace_backing_bs) {
- BlockDriverState *old_backing_bs = child_bs(bs->backing);
- assert(!old_backing_bs || !old_backing_bs->implicit);
- /* Abort the permission update on the backing bs we're detaching */
- if (old_backing_bs) {
- bdrv_abort_perm_update(old_backing_bs);
- }
- bdrv_set_backing_hd(bs, reopen_state->new_backing_bs, &error_abort);
- }
-
- bdrv_refresh_limits(bs, NULL);
+ bdrv_refresh_limits(bs, NULL, NULL);
}
/*
bs->explicit_options = NULL;
qobject_unref(bs->full_open_options);
bs->full_open_options = NULL;
+ g_free(bs->block_status_cache);
+ bs->block_status_cache = NULL;
bdrv_release_named_dirty_bitmaps(bs);
assert(QLIST_EMPTY(&bs->dirty_bitmaps));
return ret;
}
+typedef struct BdrvRemoveFilterOrCowChild {
+ BdrvChild *child;
+ BlockDriverState *bs;
+ bool is_backing;
+} BdrvRemoveFilterOrCowChild;
+
+static void bdrv_remove_filter_or_cow_child_abort(void *opaque)
+{
+ BdrvRemoveFilterOrCowChild *s = opaque;
+ BlockDriverState *parent_bs = s->child->opaque;
+
+ if (s->is_backing) {
+ parent_bs->backing = s->child;
+ } else {
+ parent_bs->file = s->child;
+ }
+
+ /*
+ * We don't have to restore child->bs here to undo bdrv_replace_child_tran()
+ * because that function is transactionable and it registered own completion
+ * entries in @tran, so .abort() for bdrv_replace_child_safe() will be
+ * called automatically.
+ */
+}
+
+static void bdrv_remove_filter_or_cow_child_commit(void *opaque)
+{
+ BdrvRemoveFilterOrCowChild *s = opaque;
+
+ bdrv_child_free(s->child);
+}
+
+static void bdrv_remove_filter_or_cow_child_clean(void *opaque)
+{
+ BdrvRemoveFilterOrCowChild *s = opaque;
+
+ /* Drop the bs reference after the transaction is done */
+ bdrv_unref(s->bs);
+ g_free(s);
+}
+
+static TransactionActionDrv bdrv_remove_filter_or_cow_child_drv = {
+ .abort = bdrv_remove_filter_or_cow_child_abort,
+ .commit = bdrv_remove_filter_or_cow_child_commit,
+ .clean = bdrv_remove_filter_or_cow_child_clean,
+};
+
/*
- * With auto_skip=true bdrv_replace_node_common skips updating from parents
- * if it creates a parent-child relation loop or if parent is block-job.
- *
- * With auto_skip=false the error is returned if from has a parent which should
- * not be updated.
+ * A function to remove backing or file child of @bs.
+ * Function doesn't update permissions, caller is responsible for this.
*/
-static int bdrv_replace_node_common(BlockDriverState *from,
+static void bdrv_remove_file_or_backing_child(BlockDriverState *bs,
+ BdrvChild *child,
+ Transaction *tran)
+{
+ BdrvChild **childp;
+ BdrvRemoveFilterOrCowChild *s;
+
+ if (!child) {
+ return;
+ }
+
+ /*
+ * Keep a reference to @bs so @childp will stay valid throughout the
+ * transaction (required by bdrv_replace_child_tran())
+ */
+ bdrv_ref(bs);
+ if (child == bs->backing) {
+ childp = &bs->backing;
+ } else if (child == bs->file) {
+ childp = &bs->file;
+ } else {
+ g_assert_not_reached();
+ }
+
+ if (child->bs) {
+ /*
+ * Pass free_empty_child=false, we will free the child in
+ * bdrv_remove_filter_or_cow_child_commit()
+ */
+ bdrv_replace_child_tran(childp, NULL, tran, false);
+ }
+
+ s = g_new(BdrvRemoveFilterOrCowChild, 1);
+ *s = (BdrvRemoveFilterOrCowChild) {
+ .child = child,
+ .bs = bs,
+ .is_backing = (childp == &bs->backing),
+ };
+ tran_add(tran, &bdrv_remove_filter_or_cow_child_drv, s);
+}
+
+/*
+ * A function to remove backing-chain child of @bs if exists: cow child for
+ * format nodes (always .backing) and filter child for filters (may be .file or
+ * .backing)
+ */
+static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
+ Transaction *tran)
+{
+ bdrv_remove_file_or_backing_child(bs, bdrv_filter_or_cow_child(bs), tran);
+}
+
+static int bdrv_replace_node_noperm(BlockDriverState *from,
BlockDriverState *to,
- bool auto_skip, Error **errp)
+ bool auto_skip, Transaction *tran,
+ Error **errp)
{
BdrvChild *c, *next;
- GSList *list = NULL, *p;
- uint64_t perm = 0, shared = BLK_PERM_ALL;
- int ret;
-
- /* Make sure that @from doesn't go away until we have successfully attached
- * all of its parents to @to. */
- bdrv_ref(from);
- assert(qemu_get_current_aio_context() == qemu_get_aio_context());
- assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
- bdrv_drained_begin(from);
+ assert(to != NULL);
- /* Put all parents into @list and calculate their cumulative permissions */
QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
assert(c->bs == from);
if (!should_update_child(c, to)) {
if (auto_skip) {
continue;
}
- ret = -EINVAL;
error_setg(errp, "Should not change '%s' link to '%s'",
c->name, from->node_name);
- goto out;
+ return -EINVAL;
}
if (c->frozen) {
- ret = -EPERM;
error_setg(errp, "Cannot change '%s' link to '%s'",
c->name, from->node_name);
- goto out;
+ return -EPERM;
+ }
+
+ /*
+ * Passing a pointer to the local variable @c is fine here, because
+ * @to is not NULL, and so &c will not be attached to the transaction.
+ */
+ bdrv_replace_child_tran(&c, to, tran, true);
+ }
+
+ return 0;
+}
+
+/*
+ * With auto_skip=true bdrv_replace_node_common skips updating from parents
+ * if it creates a parent-child relation loop or if parent is block-job.
+ *
+ * With auto_skip=false the error is returned if from has a parent which should
+ * not be updated.
+ *
+ * With @detach_subchain=true @to must be in a backing chain of @from. In this
+ * case backing link of the cow-parent of @to is removed.
+ *
+ * @to must not be NULL.
+ */
+static int bdrv_replace_node_common(BlockDriverState *from,
+ BlockDriverState *to,
+ bool auto_skip, bool detach_subchain,
+ Error **errp)
+{
+ Transaction *tran = tran_new();
+ g_autoptr(GHashTable) found = NULL;
+ g_autoptr(GSList) refresh_list = NULL;
+ BlockDriverState *to_cow_parent = NULL;
+ int ret;
+
+ assert(to != NULL);
+
+ if (detach_subchain) {
+ assert(bdrv_chain_contains(from, to));
+ assert(from != to);
+ for (to_cow_parent = from;
+ bdrv_filter_or_cow_bs(to_cow_parent) != to;
+ to_cow_parent = bdrv_filter_or_cow_bs(to_cow_parent))
+ {
+ ;
}
- list = g_slist_prepend(list, c);
- perm |= c->perm;
- shared &= c->shared_perm;
}
- /* Check whether the required permissions can be granted on @to, ignoring
- * all BdrvChild in @list so that they can't block themselves. */
- ret = bdrv_check_update_perm(to, NULL, perm, shared, list, errp);
+ /* Make sure that @from doesn't go away until we have successfully attached
+ * all of its parents to @to. */
+ bdrv_ref(from);
+
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+ assert(bdrv_get_aio_context(from) == bdrv_get_aio_context(to));
+ bdrv_drained_begin(from);
+
+ /*
+ * Do the replacement without permission update.
+ * Replacement may influence the permissions, we should calculate new
+ * permissions based on new graph. If we fail, we'll roll-back the
+ * replacement.
+ */
+ ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp);
if (ret < 0) {
- bdrv_abort_perm_update(to);
goto out;
}
- /* Now actually perform the change. We performed the permission check for
- * all elements of @list at once, so set the permissions all at once at the
- * very end. */
- for (p = list; p != NULL; p = p->next) {
- c = p->data;
-
- bdrv_ref(to);
- bdrv_replace_child_noperm(c, to);
- bdrv_unref(from);
+ if (detach_subchain) {
+ bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
}
- bdrv_set_perm(to);
+ found = g_hash_table_new(NULL, NULL);
+
+ refresh_list = bdrv_topological_dfs(refresh_list, found, to);
+ refresh_list = bdrv_topological_dfs(refresh_list, found, from);
+
+ ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
+ if (ret < 0) {
+ goto out;
+ }
ret = 0;
out:
- g_slist_free(list);
+ tran_finalize(tran, ret);
+
bdrv_drained_end(from);
bdrv_unref(from);
return ret;
}
+/**
+ * Replace node @from by @to (where neither may be NULL).
+ */
int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp)
{
- return bdrv_replace_node_common(from, to, true, errp);
+ return bdrv_replace_node_common(from, to, true, false, errp);
+}
+
+int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
+{
+ return bdrv_replace_node_common(bs, bdrv_filter_or_cow_bs(bs), true, true,
+ errp);
}
/*
* This will modify the BlockDriverState fields, and swap contents
* between bs_new and bs_top. Both bs_new and bs_top are modified.
*
- * bs_new must not be attached to a BlockBackend.
+ * bs_new must not be attached to a BlockBackend and must not have backing
+ * child.
*
* This function does not create any image files.
*/
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
Error **errp)
{
- int ret = bdrv_set_backing_hd(bs_new, bs_top, errp);
+ int ret;
+ Transaction *tran = tran_new();
+
+ assert(!bs_new->backing);
+
+ ret = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
+ &child_of_bds, bdrv_backing_role(bs_new),
+ &bs_new->backing, tran, errp);
if (ret < 0) {
- return ret;
+ goto out;
}
- ret = bdrv_replace_node(bs_top, bs_new, errp);
+ ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
if (ret < 0) {
- bdrv_set_backing_hd(bs_new, NULL, &error_abort);
- return ret;
+ goto out;
}
- return 0;
+ ret = bdrv_refresh_perms(bs_new, errp);
+out:
+ tran_finalize(tran, ret);
+
+ bdrv_refresh_limits(bs_top, NULL, NULL);
+
+ return ret;
+}
+
+/* Not for empty child */
+int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
+ Error **errp)
+{
+ int ret;
+ Transaction *tran = tran_new();
+ g_autoptr(GHashTable) found = NULL;
+ g_autoptr(GSList) refresh_list = NULL;
+ BlockDriverState *old_bs = child->bs;
+
+ bdrv_ref(old_bs);
+ bdrv_drained_begin(old_bs);
+ bdrv_drained_begin(new_bs);
+
+ bdrv_replace_child_tran(&child, new_bs, tran, true);
+ /* @new_bs must have been non-NULL, so @child must not have been freed */
+ assert(child != NULL);
+
+ found = g_hash_table_new(NULL, NULL);
+ refresh_list = bdrv_topological_dfs(refresh_list, found, old_bs);
+ refresh_list = bdrv_topological_dfs(refresh_list, found, new_bs);
+
+ ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
+
+ tran_finalize(tran, ret);
+
+ bdrv_drained_end(old_bs);
+ bdrv_drained_end(new_bs);
+ bdrv_unref(old_bs);
+
+ return ret;
}
static void bdrv_delete(BlockDriverState *bs)
g_free(bs);
}
-BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
+
+/*
+ * Replace @bs by newly created block node.
+ *
+ * @options is a QDict of options to pass to the block drivers, or NULL for an
+ * empty set of options. The reference to the QDict belongs to the block layer
+ * after the call (even on failure), so if the caller intends to reuse the
+ * dictionary, it needs to use qobject_ref() before calling bdrv_open.
+ */
+BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
int flags, Error **errp)
{
- BlockDriverState *new_node_bs;
- Error *local_err = NULL;
+ ERRP_GUARD();
+ int ret;
+ BlockDriverState *new_node_bs = NULL;
+ const char *drvname, *node_name;
+ BlockDriver *drv;
+
+ drvname = qdict_get_try_str(options, "driver");
+ if (!drvname) {
+ error_setg(errp, "driver is not specified");
+ goto fail;
+ }
+
+ drv = bdrv_find_format(drvname);
+ if (!drv) {
+ error_setg(errp, "Unknown driver: '%s'", drvname);
+ goto fail;
+ }
+
+ node_name = qdict_get_try_str(options, "node-name");
- new_node_bs = bdrv_open(NULL, NULL, node_options, flags, errp);
- if (new_node_bs == NULL) {
+ new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
+ errp);
+ options = NULL; /* bdrv_new_open_driver() eats options */
+ if (!new_node_bs) {
error_prepend(errp, "Could not create node: ");
- return NULL;
+ goto fail;
}
bdrv_drained_begin(bs);
- bdrv_replace_node(bs, new_node_bs, &local_err);
+ ret = bdrv_replace_node(bs, new_node_bs, errp);
bdrv_drained_end(bs);
- if (local_err) {
- bdrv_unref(new_node_bs);
- error_propagate(errp, local_err);
- return NULL;
+ if (ret < 0) {
+ error_prepend(errp, "Could not replace node: ");
+ goto fail;
}
return new_node_bs;
+
+fail:
+ qobject_unref(options);
+ bdrv_unref(new_node_bs);
+ return NULL;
}
/*
* -ENOTSUP - format driver doesn't support changing the backing file
*/
int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
- const char *backing_fmt, bool warn)
+ const char *backing_fmt, bool require)
{
BlockDriver *drv = bs->drv;
int ret;
return -EINVAL;
}
- if (warn && backing_file && !backing_fmt) {
- warn_report("Deprecated use of backing file without explicit "
- "backing format, use of this image requires "
- "potentially unsafe format probing");
+ if (require && backing_file && !backing_fmt) {
+ return -EINVAL;
}
if (drv->bdrv_change_backing_file != NULL) {
update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);
/* success - we can delete the intermediate states, and link top->base */
- /* TODO Check graph modification op blockers (BLK_PERM_GRAPH_MOD) once
- * we've figured out how they should work. */
if (!backing_file_str) {
bdrv_refresh_filename(base);
backing_file_str = base->filename;
updated_children = g_slist_prepend(updated_children, c);
}
- bdrv_replace_node_common(top, base, false, &local_err);
+ /*
+ * It seems correct to pass detach_subchain=true here, but it triggers
+ * one more yet not fixed bug, when due to nested aio_poll loop we switch to
+ * another drained section, which modify the graph (for example, removing
+ * the child, which we keep in updated_children list). So, it's a TODO.
+ *
+ * Note, bug triggered if pass detach_subchain=true here and run
+ * test-bdrv-drain. test_drop_intermediate_poll() test-case will crash.
+ * That's a FIXME.
+ */
+ bdrv_replace_node_common(top, base, false, false, &local_err);
if (local_err) {
error_report_err(local_err);
goto exit;
void bdrv_init(void)
{
+#ifdef CONFIG_BDRV_WHITELIST_TOOLS
+ use_bdrv_whitelist = 1;
+#endif
module_call_init(MODULE_INIT_BLOCK);
}
{
BdrvChild *child, *parent;
int ret;
+ uint64_t cumulative_perms, cumulative_shared_perms;
if (!bs->drv) {
return -ENOMEDIUM;
}
}
+ bdrv_get_cumulative_perm(bs, &cumulative_perms,
+ &cumulative_shared_perms);
+ if (cumulative_perms & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
+ /* Our inactive parents still need write access. Inactivation failed. */
+ return -EPERM;
+ }
+
bs->open_flags |= BDRV_O_INACTIVE;
/*
}
assert(full_backing);
- /* backing files always opened read-only */
+ /*
+ * No need to do I/O here, which allows us to open encrypted
+ * backing images without needing the secret
+ */
back_flags = flags;
back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+ back_flags |= BDRV_O_NO_IO;
backing_options = qdict_new();
if (backing_fmt) {
goto out;
} else {
if (!backing_fmt) {
- warn_report("Deprecated use of backing file without explicit "
- "backing format (detected format of %s)",
- bs->drv->format_name);
- if (bs->drv != &bdrv_raw) {
- /*
- * A probe of raw deserves the most attention:
- * leaving the backing format out of the image
- * will ensure bs->probed is set (ensuring we
- * don't accidentally commit into the backing
- * file), and allow more spots to warn the users
- * to fix their toolchain when opening this image
- * later. For other images, we can safely record
- * the format that we probed.
- */
- backing_fmt = bs->drv->format_name;
- qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, backing_fmt,
- NULL);
- }
+ error_setg(&local_err,
+ "Backing file specified without backing format");
+ error_append_hint(&local_err, "Detected format of %s.",
+ bs->drv->format_name);
+ goto out;
}
if (size == -1) {
/* Opened BS, have no size */
}
/* (backing_file && !(flags & BDRV_O_NO_BACKING)) */
} else if (backing_file && !backing_fmt) {
- warn_report("Deprecated use of unopened backing file without "
- "explicit backing format, use of this image requires "
- "potentially unsafe format probing");
+ error_setg(&local_err,
+ "Backing file specified without backing format");
+ goto out;
}
if (size == -1) {
/* Note: This function may return false positives; it may return true
* even if opening the backing file specified by bs's image header
* would result in exactly bs->backing. */
-bool bdrv_backing_overridden(BlockDriverState *bs)
+static bool bdrv_backing_overridden(BlockDriverState *bs)
{
if (bs->backing) {
return strcmp(bs->auto_backing_file,
{
return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
}
+
+/**
+ * Check whether [offset, offset + bytes) overlaps with the cached
+ * block-status data region.
+ *
+ * If so, and @pnum is not NULL, set *pnum to `bsc.data_end - offset`,
+ * which is what bdrv_bsc_is_data()'s interface needs.
+ * Otherwise, *pnum is not touched.
+ */
+static bool bdrv_bsc_range_overlaps_locked(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum)
+{
+ BdrvBlockStatusCache *bsc = qatomic_rcu_read(&bs->block_status_cache);
+ bool overlaps;
+
+ overlaps =
+ qatomic_read(&bsc->valid) &&
+ ranges_overlap(offset, bytes, bsc->data_start,
+ bsc->data_end - bsc->data_start);
+
+ if (overlaps && pnum) {
+ *pnum = bsc->data_end - offset;
+ }
+
+ return overlaps;
+}
+
+/**
+ * See block_int.h for this function's documentation.
+ */
+bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum)
+{
+ RCU_READ_LOCK_GUARD();
+
+ return bdrv_bsc_range_overlaps_locked(bs, offset, 1, pnum);
+}
+
+/**
+ * See block_int.h for this function's documentation.
+ */
+void bdrv_bsc_invalidate_range(BlockDriverState *bs,
+ int64_t offset, int64_t bytes)
+{
+ RCU_READ_LOCK_GUARD();
+
+ if (bdrv_bsc_range_overlaps_locked(bs, offset, bytes, NULL)) {
+ qatomic_set(&bs->block_status_cache->valid, false);
+ }
+}
+
+/**
+ * See block_int.h for this function's documentation.
+ */
+void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes)
+{
+ BdrvBlockStatusCache *new_bsc = g_new(BdrvBlockStatusCache, 1);
+ BdrvBlockStatusCache *old_bsc;
+
+ *new_bsc = (BdrvBlockStatusCache) {
+ .valid = true,
+ .data_start = offset,
+ .data_end = offset + bytes,
+ };
+
+ QEMU_LOCK_GUARD(&bs->bsc_modify_lock);
+
+ old_bsc = qatomic_rcu_read(&bs->block_status_cache);
+ qatomic_rcu_set(&bs->block_status_cache, new_bsc);
+ if (old_bsc) {
+ g_free_rcu(old_bsc, rcu);
+ }
+}