/*
* QEMU Block backends
*
- * Copyright (C) 2014 Red Hat, Inc.
+ * Copyright (C) 2014-2016 Red Hat, Inc.
*
* Authors:
#include "sysemu/blockdev.h"
#include "sysemu/sysemu.h"
#include "qapi-event.h"
+#include "qemu/id.h"
+#include "trace.h"
/* Number of coroutines to reserve per attached device model */
#define COROUTINE_POOL_RESERVATION 64
+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+
static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
struct BlockBackend {
char *name;
int refcnt;
- BlockDriverState *bs;
+ BdrvChild *root;
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
- QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
+ QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
+ QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
+ BlockBackendPublic public;
void *dev; /* attached device model, if any */
/* TODO change to DeviceState when all users are qdevified */
* can be used to restore those options in the new BDS on insert) */
BlockBackendRootState root_state;
+ bool enable_write_cache;
+
/* I/O stats (display with "info blockstats"). */
BlockAcctStats stats;
bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
+ bool allow_write_beyond_eof;
+
NotifierList remove_bs_notifiers, insert_bs_notifiers;
};
};
static void drive_info_del(DriveInfo *dinfo);
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
+
+/* All BlockBackends */
+static QTAILQ_HEAD(, BlockBackend) block_backends =
+ QTAILQ_HEAD_INITIALIZER(block_backends);
+
+/* All BlockBackends referenced by the monitor and which are iterated through by
+ * blk_next() */
+static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
+ QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
+
+static void blk_root_inherit_options(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options)
+{
+ /* We're not supposed to call this function for root nodes */
+ abort();
+}
+static void blk_root_drained_begin(BdrvChild *child);
+static void blk_root_drained_end(BdrvChild *child);
+
+static void blk_root_change_media(BdrvChild *child, bool load);
+static void blk_root_resize(BdrvChild *child);
-/* All the BlockBackends (except for hidden ones) */
-static QTAILQ_HEAD(, BlockBackend) blk_backends =
- QTAILQ_HEAD_INITIALIZER(blk_backends);
+static const char *blk_root_get_name(BdrvChild *child)
+{
+ return blk_name(child->opaque);
+}
+
+static const BdrvChildRole child_root = {
+ .inherit_options = blk_root_inherit_options,
+
+ .change_media = blk_root_change_media,
+ .resize = blk_root_resize,
+ .get_name = blk_root_get_name,
+
+ .drained_begin = blk_root_drained_begin,
+ .drained_end = blk_root_drained_end,
+};
/*
- * Create a new BlockBackend with @name, with a reference count of one.
- * @name must not be null or empty.
- * Fail if a BlockBackend with this name already exists.
+ * Create a new BlockBackend with a reference count of one.
* Store an error through @errp on failure, unless it's null.
* Return the new BlockBackend on success, null on failure.
*/
-BlockBackend *blk_new(const char *name, Error **errp)
+BlockBackend *blk_new(void)
{
BlockBackend *blk;
- assert(name && name[0]);
- if (!id_wellformed(name)) {
- error_setg(errp, "Invalid device name");
- return NULL;
- }
- if (blk_by_name(name)) {
- error_setg(errp, "Device with id '%s' already exists", name);
- return NULL;
- }
- if (bdrv_find_node(name)) {
- error_setg(errp,
- "Device name '%s' conflicts with an existing node name",
- name);
- return NULL;
- }
-
blk = g_new0(BlockBackend, 1);
- blk->name = g_strdup(name);
blk->refcnt = 1;
- notifier_list_init(&blk->remove_bs_notifiers);
- notifier_list_init(&blk->insert_bs_notifiers);
- QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
- return blk;
-}
+ blk_set_enable_write_cache(blk, true);
-/*
- * Create a new BlockBackend with a new BlockDriverState attached.
- * Otherwise just like blk_new(), which see.
- */
-BlockBackend *blk_new_with_bs(const char *name, Error **errp)
-{
- BlockBackend *blk;
- BlockDriverState *bs;
+ qemu_co_queue_init(&blk->public.throttled_reqs[0]);
+ qemu_co_queue_init(&blk->public.throttled_reqs[1]);
- blk = blk_new(name, errp);
- if (!blk) {
- return NULL;
- }
+ notifier_list_init(&blk->remove_bs_notifiers);
+ notifier_list_init(&blk->insert_bs_notifiers);
- bs = bdrv_new_root();
- blk->bs = bs;
- bs->blk = blk;
+ QTAILQ_INSERT_TAIL(&block_backends, blk, link);
return blk;
}
/*
- * Calls blk_new_with_bs() and then calls bdrv_open() on the BlockDriverState.
+ * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
*
* Just as with bdrv_open(), after having called this function the reference to
* @options belongs to the block layer (even on failure).
* though, so callers of this function have to be able to specify @filename and
* @flags.
*/
-BlockBackend *blk_new_open(const char *name, const char *filename,
- const char *reference, QDict *options, int flags,
- Error **errp)
+BlockBackend *blk_new_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp)
{
BlockBackend *blk;
- int ret;
-
- blk = blk_new_with_bs(name, errp);
- if (!blk) {
- QDECREF(options);
- return NULL;
- }
+ BlockDriverState *bs;
- ret = bdrv_open(&blk->bs, filename, reference, options, flags, errp);
- if (ret < 0) {
+ blk = blk_new();
+ bs = bdrv_open(filename, reference, options, flags, errp);
+ if (!bs) {
blk_unref(blk);
return NULL;
}
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
+
return blk;
}
static void blk_delete(BlockBackend *blk)
{
assert(!blk->refcnt);
+ assert(!blk->name);
assert(!blk->dev);
- if (blk->bs) {
+ if (blk->root) {
blk_remove_bs(blk);
}
assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
- if (blk->root_state.throttle_state) {
- g_free(blk->root_state.throttle_group);
- throttle_group_unref(blk->root_state.throttle_state);
- }
- /* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */
- if (blk->name[0]) {
- QTAILQ_REMOVE(&blk_backends, blk, link);
- }
- g_free(blk->name);
+ QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
block_acct_cleanup(&blk->stats);
g_free(blk);
}
/*
- * Return the BlockBackend after @blk.
+ * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
+ * ones which are hidden (i.e. are not referenced by the monitor).
+ */
+static BlockBackend *blk_all_next(BlockBackend *blk)
+{
+ return blk ? QTAILQ_NEXT(blk, link)
+ : QTAILQ_FIRST(&block_backends);
+}
+
+void blk_remove_all_bs(void)
+{
+ BlockBackend *blk = NULL;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *ctx = blk_get_aio_context(blk);
+
+ aio_context_acquire(ctx);
+ if (blk->root) {
+ blk_remove_bs(blk);
+ }
+ aio_context_release(ctx);
+ }
+}
+
+/*
+ * Return the monitor-owned BlockBackend after @blk.
* If @blk is null, return the first one.
* Else, return @blk's next sibling, which may be null.
*
*/
BlockBackend *blk_next(BlockBackend *blk)
{
- return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
+ return blk ? QTAILQ_NEXT(blk, monitor_link)
+ : QTAILQ_FIRST(&monitor_block_backends);
+}
+
+/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
+ * the monitor or attached to a BlockBackend */
+BlockDriverState *bdrv_next(BdrvNextIterator *it)
+{
+ BlockDriverState *bs;
+
+ /* First, return all root nodes of BlockBackends. In order to avoid
+ * returning a BDS twice when multiple BBs refer to it, we only return it
+ * if the BB is the first one in the parent list of the BDS. */
+ if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
+ do {
+ it->blk = blk_all_next(it->blk);
+ bs = it->blk ? blk_bs(it->blk) : NULL;
+ } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
+
+ if (bs) {
+ return bs;
+ }
+ it->phase = BDRV_NEXT_MONITOR_OWNED;
+ }
+
+ /* Then return the monitor-owned BDSes without a BB attached. Ignore all
+ * BDSes that are attached to a BlockBackend here; they have been handled
+ * by the above block already */
+ do {
+ it->bs = bdrv_next_monitor_owned(it->bs);
+ bs = it->bs;
+ } while (bs && bdrv_has_blk(bs));
+
+ return bs;
+}
+
+BlockDriverState *bdrv_first(BdrvNextIterator *it)
+{
+ *it = (BdrvNextIterator) {
+ .phase = BDRV_NEXT_BACKEND_ROOTS,
+ };
+
+ return bdrv_next(it);
+}
+
+/*
+ * Add a BlockBackend into the list of backends referenced by the monitor, with
+ * the given @name acting as the handle for the monitor.
+ * Strictly for use by blockdev.c.
+ *
+ * @name must not be null or empty.
+ *
+ * Returns true on success and false on failure. In the latter case, an Error
+ * object is returned through @errp.
+ */
+bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
+{
+ assert(!blk->name);
+ assert(name && name[0]);
+
+ if (!id_wellformed(name)) {
+ error_setg(errp, "Invalid device name");
+ return false;
+ }
+ if (blk_by_name(name)) {
+ error_setg(errp, "Device with id '%s' already exists", name);
+ return false;
+ }
+ if (bdrv_find_node(name)) {
+ error_setg(errp,
+ "Device name '%s' conflicts with an existing node name",
+ name);
+ return false;
+ }
+
+ blk->name = g_strdup(name);
+ QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
+ return true;
+}
+
+/*
+ * Remove a BlockBackend from the list of backends referenced by the monitor.
+ * Strictly for use by blockdev.c.
+ */
+void monitor_remove_blk(BlockBackend *blk)
+{
+ if (!blk->name) {
+ return;
+ }
+
+ QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
+ g_free(blk->name);
+ blk->name = NULL;
}
/*
* Return @blk's name, a non-null string.
- * Wart: the name is empty iff @blk has been hidden with
- * blk_hide_on_behalf_of_hmp_drive_del().
+ * Returns an empty string iff @blk is not referenced by the monitor.
*/
const char *blk_name(BlockBackend *blk)
{
- return blk->name;
+ return blk->name ?: "";
}
/*
*/
BlockBackend *blk_by_name(const char *name)
{
- BlockBackend *blk;
+ BlockBackend *blk = NULL;
assert(name);
- QTAILQ_FOREACH(blk, &blk_backends, link) {
+ while ((blk = blk_next(blk)) != NULL) {
if (!strcmp(name, blk->name)) {
return blk;
}
*/
BlockDriverState *blk_bs(BlockBackend *blk)
{
- return blk->bs;
+ return blk->root ? blk->root->bs : NULL;
}
-/*
- * Changes the BlockDriverState attached to @blk
- */
-void blk_set_bs(BlockBackend *blk, BlockDriverState *bs)
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
{
- bdrv_ref(bs);
-
- if (blk->bs) {
- blk->bs->blk = NULL;
- bdrv_unref(blk->bs);
+ BdrvChild *child;
+ QLIST_FOREACH(child, &bs->parents, next_parent) {
+ if (child->role == &child_root) {
+ return child->opaque;
+ }
}
- assert(bs->blk == NULL);
- blk->bs = bs;
- bs->blk = blk;
+ return NULL;
+}
+
+/*
+ * Returns true if @bs has an associated BlockBackend.
+ */
+bool bdrv_has_blk(BlockDriverState *bs)
+{
+ return bdrv_first_blk(bs) != NULL;
}
/*
*/
BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
{
- BlockBackend *blk;
+ BlockBackend *blk = NULL;
- QTAILQ_FOREACH(blk, &blk_backends, link) {
+ while ((blk = blk_next(blk)) != NULL) {
if (blk->legacy_dinfo == dinfo) {
return blk;
}
}
/*
- * Hide @blk.
- * @blk must not have been hidden already.
- * Make attached BlockDriverState, if any, anonymous.
- * Once hidden, @blk is invisible to all functions that don't receive
- * it as argument. For example, blk_by_name() won't return it.
- * Strictly for use by do_drive_del().
- * TODO get rid of it!
+ * Returns a pointer to the publicly accessible fields of @blk.
*/
-void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk)
+BlockBackendPublic *blk_get_public(BlockBackend *blk)
{
- QTAILQ_REMOVE(&blk_backends, blk, link);
- blk->name[0] = 0;
- if (blk->bs) {
- bdrv_make_anon(blk->bs);
- }
+ return &blk->public;
+}
+
+/*
+ * Returns a BlockBackend given the associated @public fields.
+ */
+BlockBackend *blk_by_public(BlockBackendPublic *public)
+{
+ return container_of(public, BlockBackend, public);
}
/*
*/
void blk_remove_bs(BlockBackend *blk)
{
- assert(blk->bs->blk == blk);
-
notifier_list_notify(&blk->remove_bs_notifiers, blk);
+ if (blk->public.throttle_state) {
+ throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+ }
blk_update_root_state(blk);
- blk->bs->blk = NULL;
- bdrv_unref(blk->bs);
- blk->bs = NULL;
+ bdrv_root_unref_child(blk->root);
+ blk->root = NULL;
}
/*
*/
void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
{
- assert(!blk->bs && !bs->blk);
bdrv_ref(bs);
- blk->bs = bs;
- bs->blk = blk;
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
+ if (blk->public.throttle_state) {
+ throttle_timers_attach_aio_context(
+ &blk->public.throttle_timers, bdrv_get_aio_context(bs));
+ }
}
/*
}
}
+static void blk_root_change_media(BdrvChild *child, bool load)
+{
+ blk_dev_change_media_cb(child->opaque, load);
+}
+
/*
* Does @blk's attached device model have removable media?
* %true if no device model is attached.
/*
* Notify @blk's attached device model of a backend size change.
*/
-void blk_dev_resize_cb(BlockBackend *blk)
+static void blk_root_resize(BdrvChild *child)
{
+ BlockBackend *blk = child->opaque;
+
if (blk->dev_ops && blk->dev_ops->resize_cb) {
blk->dev_ops->resize_cb(blk->dev_opaque);
}
void blk_iostatus_reset(BlockBackend *blk)
{
if (blk_iostatus_is_enabled(blk)) {
+ BlockDriverState *bs = blk_bs(blk);
blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
- if (blk->bs && blk->bs->job) {
- block_job_iostatus_reset(blk->bs->job);
+ if (bs && bs->job) {
+ block_job_iostatus_reset(bs->job);
}
}
}
}
}
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
+{
+ blk->allow_write_beyond_eof = allow;
+}
+
static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
size_t size)
{
return -ENOMEDIUM;
}
- len = blk_getlength(blk);
- if (len < 0) {
- return len;
- }
-
if (offset < 0) {
return -EIO;
}
- if (offset > len || len - offset < size) {
- return -EIO;
+ if (!blk->allow_write_beyond_eof) {
+ len = blk_getlength(blk);
+ if (len < 0) {
+ return len;
+ }
+
+ if (offset > len || len - offset < size) {
+ return -EIO;
+ }
}
return 0;
nb_sectors * BDRV_SECTOR_SIZE);
}
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ int ret;
+
+ trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
+
+ ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
- return bdrv_read(blk->bs, sector_num, buf, nb_sectors);
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, false);
+ }
+
+ return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
}
-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ int ret;
+
+ trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
+
+ ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
- return bdrv_read_unthrottled(blk->bs, sector_num, buf, nb_sectors);
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, true);
+ }
+
+ if (!blk->enable_write_cache) {
+ flags |= BDRV_REQ_FUA;
+ }
+
+ return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
}
-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
- int nb_sectors)
+typedef struct BlkRwCo {
+ BlockBackend *blk;
+ int64_t offset;
+ QEMUIOVector *qiov;
+ int ret;
+ BdrvRequestFlags flags;
+} BlkRwCo;
+
+static void blk_read_entry(void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return ret;
+ BlkRwCo *rwco = opaque;
+
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
+ rwco->qiov, rwco->flags);
+}
+
+static void blk_write_entry(void *opaque)
+{
+ BlkRwCo *rwco = opaque;
+
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
+ rwco->qiov, rwco->flags);
+}
+
+static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
+ int64_t bytes, CoroutineEntry co_entry,
+ BdrvRequestFlags flags)
+{
+ AioContext *aio_context;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ Coroutine *co;
+ BlkRwCo rwco;
+
+ iov = (struct iovec) {
+ .iov_base = buf,
+ .iov_len = bytes,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .qiov = &qiov,
+ .flags = flags,
+ .ret = NOT_DONE,
+ };
+
+ co = qemu_coroutine_create(co_entry);
+ qemu_coroutine_enter(co, &rwco);
+
+ aio_context = blk_get_aio_context(blk);
+ while (rwco.ret == NOT_DONE) {
+ aio_poll(aio_context, true);
}
- return bdrv_write(blk->bs, sector_num, buf, nb_sectors);
+ return rwco.ret;
}
-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
+ int count)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ int ret;
+
+ ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return ret;
}
- return bdrv_write_zeroes(blk->bs, sector_num, nb_sectors, flags);
+ blk_root_drained_begin(blk->root);
+ ret = blk_pread(blk, offset, buf, count);
+ blk_root_drained_end(blk->root);
+ return ret;
+}
+
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
+{
+ return blk_prw(blk, offset, NULL, count, blk_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE);
+}
+
+int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
+{
+ return bdrv_make_zero(blk->root, flags);
}
static void error_callback_bh(void *opaque)
return &acb->common;
}
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque)
+typedef struct BlkAioEmAIOCB {
+ BlockAIOCB common;
+ BlkRwCo rwco;
+ int bytes;
+ bool has_returned;
+ QEMUBH* bh;
+} BlkAioEmAIOCB;
+
+static const AIOCBInfo blk_aio_em_aiocb_info = {
+ .aiocb_size = sizeof(BlkAioEmAIOCB),
+};
+
+static void blk_aio_complete(BlkAioEmAIOCB *acb)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return blk_abort_aio_request(blk, cb, opaque, ret);
+ if (acb->bh) {
+ assert(acb->has_returned);
+ qemu_bh_delete(acb->bh);
+ }
+ if (acb->has_returned) {
+ acb->common.cb(acb->common.opaque, acb->rwco.ret);
+ qemu_aio_unref(acb);
}
+}
- return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags,
- cb, opaque);
+static void blk_aio_complete_bh(void *opaque)
+{
+ blk_aio_complete(opaque);
+}
+
+static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
+ QEMUIOVector *qiov, CoroutineEntry co_entry,
+ BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ BlkAioEmAIOCB *acb;
+ Coroutine *co;
+
+ acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+ acb->rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .qiov = qiov,
+ .flags = flags,
+ .ret = NOT_DONE,
+ };
+ acb->bytes = bytes;
+ acb->bh = NULL;
+ acb->has_returned = false;
+
+ co = qemu_coroutine_create(co_entry);
+ qemu_coroutine_enter(co, acb);
+
+ acb->has_returned = true;
+ if (acb->rwco.ret != NOT_DONE) {
+ acb->bh = aio_bh_new(blk_get_aio_context(blk), blk_aio_complete_bh, acb);
+ qemu_bh_schedule(acb->bh);
+ }
+
+ return &acb->common;
+}
+
+static void blk_aio_read_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ assert(rwco->qiov->size == acb->bytes);
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
+ rwco->qiov, rwco->flags);
+ blk_aio_complete(acb);
+}
+
+static void blk_aio_write_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
+ rwco->qiov, rwco->flags);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
}
int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
{
- int ret = blk_check_byte_request(blk, offset, count);
+ int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
if (ret < 0) {
return ret;
}
-
- return bdrv_pread(blk->bs, offset, buf, count);
+ return count;
}
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
+ BdrvRequestFlags flags)
{
- int ret = blk_check_byte_request(blk, offset, count);
+ int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+ flags);
if (ret < 0) {
return ret;
}
-
- return bdrv_pwrite(blk->bs, offset, buf, count);
+ return count;
}
int64_t blk_getlength(BlockBackend *blk)
return -ENOMEDIUM;
}
- return bdrv_getlength(blk->bs);
+ return bdrv_getlength(blk_bs(blk));
}
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
{
- if (!blk->bs) {
+ if (!blk_bs(blk)) {
*nb_sectors_ptr = 0;
} else {
- bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+ bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
}
}
return -ENOMEDIUM;
}
- return bdrv_nb_sectors(blk->bs);
+ return bdrv_nb_sectors(blk_bs(blk));
}
-BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return blk_abort_aio_request(blk, cb, opaque, ret);
- }
-
- return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_read_entry, flags, cb, opaque);
}
-BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return blk_abort_aio_request(blk, cb, opaque, ret);
- }
-
- return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_write_entry, flags, cb, opaque);
}
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
}
- return bdrv_aio_flush(blk->bs, cb, opaque);
+ return bdrv_aio_flush(blk_bs(blk), cb, opaque);
}
BlockAIOCB *blk_aio_discard(BlockBackend *blk,
return blk_abort_aio_request(blk, cb, opaque, ret);
}
- return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque);
+ return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque);
}
void blk_aio_cancel(BlockAIOCB *acb)
bdrv_aio_cancel_async(acb);
}
-int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
-{
- int i, ret;
-
- for (i = 0; i < num_reqs; i++) {
- ret = blk_check_request(blk, reqs[i].sector, reqs[i].nb_sectors);
- if (ret < 0) {
- return ret;
- }
- }
-
- return bdrv_aio_multiwrite(blk->bs, reqs, num_reqs);
-}
-
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
- return bdrv_ioctl(blk->bs, req, buf);
+ return bdrv_ioctl(blk_bs(blk), req, buf);
}
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
}
- return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
+ return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
}
int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
return ret;
}
- return bdrv_co_discard(blk->bs, sector_num, nb_sectors);
+ return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors);
}
int blk_co_flush(BlockBackend *blk)
return -ENOMEDIUM;
}
- return bdrv_co_flush(blk->bs);
+ return bdrv_co_flush(blk_bs(blk));
}
int blk_flush(BlockBackend *blk)
return -ENOMEDIUM;
}
- return bdrv_flush(blk->bs);
-}
-
-int blk_flush_all(void)
-{
- return bdrv_flush_all();
+ return bdrv_flush(blk_bs(blk));
}
void blk_drain(BlockBackend *blk)
{
- if (blk->bs) {
- bdrv_drain(blk->bs);
+ if (blk_bs(blk)) {
+ bdrv_drain(blk_bs(blk));
}
}
int blk_is_read_only(BlockBackend *blk)
{
- if (blk->bs) {
- return bdrv_is_read_only(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_is_read_only(bs);
} else {
return blk->root_state.read_only;
}
int blk_is_sg(BlockBackend *blk)
{
- if (!blk->bs) {
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
return 0;
}
- return bdrv_is_sg(blk->bs);
+ return bdrv_is_sg(bs);
}
int blk_enable_write_cache(BlockBackend *blk)
{
- if (blk->bs) {
- return bdrv_enable_write_cache(blk->bs);
- } else {
- return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB);
- }
+ return blk->enable_write_cache;
}
void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
{
- if (blk->bs) {
- bdrv_set_enable_write_cache(blk->bs, wce);
- } else {
- if (wce) {
- blk->root_state.open_flags |= BDRV_O_CACHE_WB;
- } else {
- blk->root_state.open_flags &= ~BDRV_O_CACHE_WB;
- }
- }
+ blk->enable_write_cache = wce;
}
void blk_invalidate_cache(BlockBackend *blk, Error **errp)
{
- if (!blk->bs) {
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
error_setg(errp, "Device '%s' has no medium", blk->name);
return;
}
- bdrv_invalidate_cache(blk->bs, errp);
+ bdrv_invalidate_cache(bs, errp);
}
bool blk_is_inserted(BlockBackend *blk)
{
- return blk->bs && bdrv_is_inserted(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ return bs && bdrv_is_inserted(bs);
}
bool blk_is_available(BlockBackend *blk)
void blk_lock_medium(BlockBackend *blk, bool locked)
{
- if (blk->bs) {
- bdrv_lock_medium(blk->bs, locked);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_lock_medium(bs, locked);
}
}
void blk_eject(BlockBackend *blk, bool eject_flag)
{
- if (blk->bs) {
- bdrv_eject(blk->bs, eject_flag);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_eject(bs, eject_flag);
}
}
int blk_get_flags(BlockBackend *blk)
{
- if (blk->bs) {
- return bdrv_get_flags(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_get_flags(bs);
} else {
return blk->root_state.open_flags;
}
}
-int blk_get_max_transfer_length(BlockBackend *blk)
+/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
+uint32_t blk_get_max_transfer(BlockBackend *blk)
{
- if (blk->bs) {
- return blk->bs->bl.max_transfer_length;
- } else {
- return 0;
+ BlockDriverState *bs = blk_bs(blk);
+ uint32_t max = 0;
+
+ if (bs) {
+ max = bs->bl.max_transfer;
}
+ return MIN_NON_ZERO(max, INT_MAX);
}
int blk_get_max_iov(BlockBackend *blk)
{
- return blk->bs->bl.max_iov;
+ return blk->root->bs->bl.max_iov;
}
void blk_set_guest_block_size(BlockBackend *blk, int align)
void *blk_try_blockalign(BlockBackend *blk, size_t size)
{
- return qemu_try_blockalign(blk ? blk->bs : NULL, size);
+ return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
}
void *blk_blockalign(BlockBackend *blk, size_t size)
{
- return qemu_blockalign(blk ? blk->bs : NULL, size);
+ return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
}
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
{
- if (!blk->bs) {
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
return false;
}
- return bdrv_op_is_blocked(blk->bs, op, errp);
+ return bdrv_op_is_blocked(bs, op, errp);
}
void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
{
- if (blk->bs) {
- bdrv_op_unblock(blk->bs, op, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_unblock(bs, op, reason);
}
}
void blk_op_block_all(BlockBackend *blk, Error *reason)
{
- if (blk->bs) {
- bdrv_op_block_all(blk->bs, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_block_all(bs, reason);
}
}
void blk_op_unblock_all(BlockBackend *blk, Error *reason)
{
- if (blk->bs) {
- bdrv_op_unblock_all(blk->bs, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_unblock_all(bs, reason);
}
}
AioContext *blk_get_aio_context(BlockBackend *blk)
{
- if (blk->bs) {
- return bdrv_get_aio_context(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_get_aio_context(bs);
} else {
return qemu_get_aio_context();
}
void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
{
- if (blk->bs) {
- bdrv_set_aio_context(blk->bs, new_context);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ if (blk->public.throttle_state) {
+ throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+ }
+ bdrv_set_aio_context(bs, new_context);
+ if (blk->public.throttle_state) {
+ throttle_timers_attach_aio_context(&blk->public.throttle_timers,
+ new_context);
+ }
}
}
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque)
{
- if (blk->bs) {
- bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_add_aio_context_notifier(bs, attached_aio_context,
detach_aio_context, opaque);
}
}
void (*detach_aio_context)(void *),
void *opaque)
{
- if (blk->bs) {
- bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_remove_aio_context_notifier(bs, attached_aio_context,
detach_aio_context, opaque);
}
}
void blk_io_plug(BlockBackend *blk)
{
- if (blk->bs) {
- bdrv_io_plug(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_io_plug(bs);
}
}
void blk_io_unplug(BlockBackend *blk)
{
- if (blk->bs) {
- bdrv_io_unplug(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_io_unplug(bs);
}
}
return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
}
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return ret;
- }
-
- return bdrv_co_write_zeroes(blk->bs, sector_num, nb_sectors, flags);
+ return blk_co_pwritev(blk, offset, count, NULL,
+ flags | BDRV_REQ_ZERO_WRITE);
}
int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
return ret;
}
- return bdrv_write_compressed(blk->bs, sector_num, buf, nb_sectors);
+ return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
}
int blk_truncate(BlockBackend *blk, int64_t offset)
return -ENOMEDIUM;
}
- return bdrv_truncate(blk->bs, offset);
+ return bdrv_truncate(blk_bs(blk), offset);
}
int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
return ret;
}
- return bdrv_discard(blk->bs, sector_num, nb_sectors);
+ return bdrv_discard(blk_bs(blk), sector_num, nb_sectors);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size)
{
+ int ret;
+
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
- return bdrv_save_vmstate(blk->bs, buf, pos, size);
+ ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (ret == size && !blk->enable_write_cache) {
+ ret = bdrv_flush(blk_bs(blk));
+ }
+
+ return ret < 0 ? ret : size;
}
int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
return -ENOMEDIUM;
}
- return bdrv_load_vmstate(blk->bs, buf, pos, size);
+ return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
}
int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
return -ENOMEDIUM;
}
- return bdrv_probe_blocksizes(blk->bs, bsz);
+ return bdrv_probe_blocksizes(blk_bs(blk), bsz);
}
int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
return -ENOMEDIUM;
}
- return bdrv_probe_geometry(blk->bs, geo);
+ return bdrv_probe_geometry(blk_bs(blk), geo);
}
/*
*/
void blk_update_root_state(BlockBackend *blk)
{
- assert(blk->bs);
-
- blk->root_state.open_flags = blk->bs->open_flags;
- blk->root_state.read_only = blk->bs->read_only;
- blk->root_state.detect_zeroes = blk->bs->detect_zeroes;
+ assert(blk->root);
- if (blk->root_state.throttle_group) {
- g_free(blk->root_state.throttle_group);
- throttle_group_unref(blk->root_state.throttle_state);
- }
- if (blk->bs->throttle_state) {
- const char *name = throttle_group_get_name(blk->bs);
- blk->root_state.throttle_group = g_strdup(name);
- blk->root_state.throttle_state = throttle_group_incref(name);
- } else {
- blk->root_state.throttle_group = NULL;
- blk->root_state.throttle_state = NULL;
- }
+ blk->root_state.open_flags = blk->root->bs->open_flags;
+ blk->root_state.read_only = blk->root->bs->read_only;
+ blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
}
/*
void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
{
bs->detect_zeroes = blk->root_state.detect_zeroes;
- if (blk->root_state.throttle_group) {
- bdrv_io_limits_enable(bs, blk->root_state.throttle_group);
- }
}
/*
{
return &blk->root_state;
}
+
+int blk_commit_all(void)
+{
+ BlockBackend *blk = NULL;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *aio_context = blk_get_aio_context(blk);
+
+ aio_context_acquire(aio_context);
+ if (blk_is_inserted(blk) && blk->root->bs->backing) {
+ int ret = bdrv_commit(blk->root->bs);
+ if (ret < 0) {
+ aio_context_release(aio_context);
+ return ret;
+ }
+ }
+ aio_context_release(aio_context);
+ }
+ return 0;
+}
+
+int blk_flush_all(void)
+{
+ BlockBackend *blk = NULL;
+ int result = 0;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *aio_context = blk_get_aio_context(blk);
+ int ret;
+
+ aio_context_acquire(aio_context);
+ if (blk_is_inserted(blk)) {
+ ret = blk_flush(blk);
+ if (ret < 0 && !result) {
+ result = ret;
+ }
+ }
+ aio_context_release(aio_context);
+ }
+
+ return result;
+}
+
+
+/* throttling disk I/O limits */
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
+{
+ throttle_group_config(blk, cfg);
+}
+
+void blk_io_limits_disable(BlockBackend *blk)
+{
+ assert(blk->public.throttle_state);
+ bdrv_drained_begin(blk_bs(blk));
+ throttle_group_unregister_blk(blk);
+ bdrv_drained_end(blk_bs(blk));
+}
+
+/* should be called before blk_set_io_limits if a limit is set */
+void blk_io_limits_enable(BlockBackend *blk, const char *group)
+{
+ assert(!blk->public.throttle_state);
+ throttle_group_register_blk(blk, group);
+}
+
+void blk_io_limits_update_group(BlockBackend *blk, const char *group)
+{
+ /* this BB is not part of any group */
+ if (!blk->public.throttle_state) {
+ return;
+ }
+
+ /* this BB is a part of the same group than the one we want */
+ if (!g_strcmp0(throttle_group_get_name(blk), group)) {
+ return;
+ }
+
+ /* need to change the group this bs belong to */
+ blk_io_limits_disable(blk);
+ blk_io_limits_enable(blk, group);
+}
+
+static void blk_root_drained_begin(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+
+ /* Note that blk->root may not be accessible here yet if we are just
+ * attaching to a BlockDriverState that is drained. Use child instead. */
+
+ if (blk->public.io_limits_disabled++ == 0) {
+ throttle_group_restart_blk(blk);
+ }
+}
+
+static void blk_root_drained_end(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+
+ assert(blk->public.io_limits_disabled);
+ --blk->public.io_limits_disabled;
+}