/*
* QEMU Block backends
*
- * Copyright (C) 2014 Red Hat, Inc.
+ * Copyright (C) 2014-2016 Red Hat, Inc.
*
* Authors:
#include "sysemu/sysemu.h"
#include "qapi-event.h"
#include "qemu/id.h"
+#include "trace.h"
/* Number of coroutines to reserve per attached device model */
#define COROUTINE_POOL_RESERVATION 64
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
+ BlockBackendPublic public;
void *dev; /* attached device model, if any */
/* TODO change to DeviceState when all users are qdevified */
};
static void drive_info_del(DriveInfo *dinfo);
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
/* We're not supposed to call this function for root nodes */
abort();
}
+static void blk_root_drained_begin(BdrvChild *child);
+static void blk_root_drained_end(BdrvChild *child);
+
+static void blk_root_change_media(BdrvChild *child, bool load);
+static void blk_root_resize(BdrvChild *child);
+
+static const char *blk_root_get_name(BdrvChild *child)
+{
+ return blk_name(child->opaque);
+}
static const BdrvChildRole child_root = {
- .inherit_options = blk_root_inherit_options,
+ .inherit_options = blk_root_inherit_options,
+
+ .change_media = blk_root_change_media,
+ .resize = blk_root_resize,
+ .get_name = blk_root_get_name,
+
+ .drained_begin = blk_root_drained_begin,
+ .drained_end = blk_root_drained_end,
};
/*
* Store an error through @errp on failure, unless it's null.
* Return the new BlockBackend on success, null on failure.
*/
-BlockBackend *blk_new(Error **errp)
+BlockBackend *blk_new(void)
{
BlockBackend *blk;
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
- notifier_list_init(&blk->remove_bs_notifiers);
- notifier_list_init(&blk->insert_bs_notifiers);
- QTAILQ_INSERT_TAIL(&block_backends, blk, link);
- return blk;
-}
+ blk_set_enable_write_cache(blk, true);
-/*
- * Create a new BlockBackend with a new BlockDriverState attached.
- * Otherwise just like blk_new(), which see.
- */
-BlockBackend *blk_new_with_bs(Error **errp)
-{
- BlockBackend *blk;
- BlockDriverState *bs;
+ qemu_co_queue_init(&blk->public.throttled_reqs[0]);
+ qemu_co_queue_init(&blk->public.throttled_reqs[1]);
- blk = blk_new(errp);
- if (!blk) {
- return NULL;
- }
+ notifier_list_init(&blk->remove_bs_notifiers);
+ notifier_list_init(&blk->insert_bs_notifiers);
- bs = bdrv_new_root();
- blk->root = bdrv_root_attach_child(bs, "root", &child_root);
- bs->blk = blk;
+ QTAILQ_INSERT_TAIL(&block_backends, blk, link);
return blk;
}
/*
- * Calls blk_new_with_bs() and then calls bdrv_open() on the BlockDriverState.
+ * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
*
* Just as with bdrv_open(), after having called this function the reference to
* @options belongs to the block layer (even on failure).
QDict *options, int flags, Error **errp)
{
BlockBackend *blk;
- int ret;
-
- blk = blk_new_with_bs(errp);
- if (!blk) {
- QDECREF(options);
- return NULL;
- }
+ BlockDriverState *bs;
- ret = bdrv_open(&blk->root->bs, filename, reference, options, flags, errp);
- if (ret < 0) {
+ blk = blk_new();
+ bs = bdrv_open(filename, reference, options, flags, errp);
+ if (!bs) {
blk_unref(blk);
return NULL;
}
- blk_set_enable_write_cache(blk, true);
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
return blk;
}
}
assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
- if (blk->root_state.throttle_state) {
- g_free(blk->root_state.throttle_group);
- throttle_group_unref(blk->root_state.throttle_state);
- }
QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
block_acct_cleanup(&blk->stats);
: QTAILQ_FIRST(&monitor_block_backends);
}
-/*
- * Iterates over all BlockDriverStates which are attached to a BlockBackend.
- * This function is for use by bdrv_next().
- *
- * @bs must be NULL or a BDS that is attached to a BB.
- */
-BlockDriverState *blk_next_root_bs(BlockDriverState *bs)
+/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
+ * the monitor or attached to a BlockBackend */
+BlockDriverState *bdrv_next(BdrvNextIterator *it)
{
- BlockBackend *blk;
+ BlockDriverState *bs;
- if (bs) {
- assert(bs->blk);
- blk = bs->blk;
- } else {
- blk = NULL;
+ /* First, return all root nodes of BlockBackends. In order to avoid
+ * returning a BDS twice when multiple BBs refer to it, we only return it
+ * if the BB is the first one in the parent list of the BDS. */
+ if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
+ do {
+ it->blk = blk_all_next(it->blk);
+ bs = it->blk ? blk_bs(it->blk) : NULL;
+ } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
+
+ if (bs) {
+ return bs;
+ }
+ it->phase = BDRV_NEXT_MONITOR_OWNED;
}
+ /* Then return the monitor-owned BDSes without a BB attached. Ignore all
+ * BDSes that are attached to a BlockBackend here; they have been handled
+ * by the above block already */
do {
- blk = blk_all_next(blk);
- } while (blk && !blk->root);
+ it->bs = bdrv_next_monitor_owned(it->bs);
+ bs = it->bs;
+ } while (bs && bdrv_has_blk(bs));
+
+ return bs;
+}
+
+BlockDriverState *bdrv_first(BdrvNextIterator *it)
+{
+ *it = (BdrvNextIterator) {
+ .phase = BDRV_NEXT_BACKEND_ROOTS,
+ };
- return blk ? blk->root->bs : NULL;
+ return bdrv_next(it);
}
/*
return blk->root ? blk->root->bs : NULL;
}
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
+{
+ BdrvChild *child;
+ QLIST_FOREACH(child, &bs->parents, next_parent) {
+ if (child->role == &child_root) {
+ return child->opaque;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Returns true if @bs has an associated BlockBackend.
+ */
+bool bdrv_has_blk(BlockDriverState *bs)
+{
+ return bdrv_first_blk(bs) != NULL;
+}
+
/*
* Return @blk's DriveInfo if any, else null.
*/
abort();
}
+/*
+ * Returns a pointer to the publicly accessible fields of @blk.
+ */
+BlockBackendPublic *blk_get_public(BlockBackend *blk)
+{
+ return &blk->public;
+}
+
+/*
+ * Returns a BlockBackend given the associated @public fields.
+ */
+BlockBackend *blk_by_public(BlockBackendPublic *public)
+{
+ return container_of(public, BlockBackend, public);
+}
+
/*
* Disassociates the currently associated BlockDriverState from @blk.
*/
void blk_remove_bs(BlockBackend *blk)
{
- assert(blk->root->bs->blk == blk);
-
notifier_list_notify(&blk->remove_bs_notifiers, blk);
+ if (blk->public.throttle_state) {
+ throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+ }
blk_update_root_state(blk);
- blk->root->bs->blk = NULL;
bdrv_root_unref_child(blk->root);
blk->root = NULL;
}
*/
void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
{
- assert(!blk->root && !bs->blk);
bdrv_ref(bs);
- blk->root = bdrv_root_attach_child(bs, "root", &child_root);
- bs->blk = blk;
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
+ if (blk->public.throttle_state) {
+ throttle_timers_attach_aio_context(
+ &blk->public.throttle_timers, bdrv_get_aio_context(bs));
+ }
}
/*
}
}
+static void blk_root_change_media(BdrvChild *child, bool load)
+{
+ blk_dev_change_media_cb(child->opaque, load);
+}
+
/*
* Does @blk's attached device model have removable media?
* %true if no device model is attached.
/*
* Notify @blk's attached device model of a backend size change.
*/
-void blk_dev_resize_cb(BlockBackend *blk)
+static void blk_root_resize(BdrvChild *child)
{
+ BlockBackend *blk = child->opaque;
+
if (blk->dev_ops && blk->dev_ops->resize_cb) {
blk->dev_ops->resize_cb(blk->dev_opaque);
}
nb_sectors * BDRV_SECTOR_SIZE);
}
-static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
- unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
- int ret = blk_check_byte_request(blk, offset, bytes);
+ int ret;
+
+ trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
+
+ ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
- return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags);
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, false);
+ }
+
+ return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
}
-static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
- unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
int ret;
+ trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
+
ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, true);
+ }
+
if (!blk->enable_write_cache) {
flags |= BDRV_REQ_FUA;
}
- return bdrv_co_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
+ return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
}
typedef struct BlkRwCo {
return rwco.ret;
}
-static int blk_rw(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors, CoroutineEntry co_entry,
- BdrvRequestFlags flags)
-{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return -EINVAL;
- }
-
- return blk_prw(blk, sector_num << BDRV_SECTOR_BITS, buf,
- nb_sectors << BDRV_SECTOR_BITS, co_entry, flags);
-}
-
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
-{
- return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0);
-}
-
int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
int count)
{
- BlockDriverState *bs = blk_bs(blk);
int ret;
ret = blk_check_byte_request(blk, offset, count);
return ret;
}
- bdrv_no_throttling_begin(bs);
+ blk_root_drained_begin(blk->root);
ret = blk_pread(blk, offset, buf, count);
- bdrv_no_throttling_end(bs);
+ blk_root_drained_end(blk->root);
return ret;
}
-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
- int nb_sectors)
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- return blk_rw(blk, sector_num, (uint8_t*) buf, nb_sectors,
- blk_write_entry, 0);
+ return blk_prw(blk, offset, NULL, count, blk_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE);
}
-int blk_write_zeroes(BlockBackend *blk, int64_t offset,
- int count, BdrvRequestFlags flags)
+int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
{
- return blk_prw(blk, offset, NULL, count, blk_write_entry,
- flags | BDRV_REQ_ZERO_WRITE);
+ return bdrv_make_zero(blk->root, flags);
}
static void error_callback_bh(void *opaque)
blk_aio_complete(acb);
}
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
- int count, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
return bdrv_nb_sectors(blk_bs(blk));
}
-BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
- }
-
- assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
- return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
- blk_aio_read_entry, 0, cb, opaque);
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_read_entry, flags, cb, opaque);
}
-BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
- }
-
- assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
- return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
- blk_aio_write_entry, 0, cb, opaque);
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_write_entry, flags, cb, opaque);
}
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
bdrv_aio_cancel_async(acb);
}
-int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
-{
- int i, ret;
-
- for (i = 0; i < num_reqs; i++) {
- ret = blk_check_request(blk, reqs[i].sector, reqs[i].nb_sectors);
- if (ret < 0) {
- return ret;
- }
- }
-
- return bdrv_aio_multiwrite(blk_bs(blk), reqs, num_reqs);
-}
-
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
if (!blk_is_available(blk)) {
}
}
-int blk_get_max_transfer_length(BlockBackend *blk)
+/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
+uint32_t blk_get_max_transfer(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ uint32_t max = 0;
if (bs) {
- return bs->bl.max_transfer_length;
- } else {
- return 0;
+ max = bs->bl.max_transfer;
}
+ return MIN_NON_ZERO(max, INT_MAX);
}
int blk_get_max_iov(BlockBackend *blk)
BlockDriverState *bs = blk_bs(blk);
if (bs) {
+ if (blk->public.throttle_state) {
+ throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+ }
bdrv_set_aio_context(bs, new_context);
+ if (blk->public.throttle_state) {
+ throttle_timers_attach_aio_context(&blk->public.throttle_timers,
+ new_context);
+ }
}
}
return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
}
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
- int count, BdrvRequestFlags flags)
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
return blk_co_pwritev(blk, offset, count, NULL,
flags | BDRV_REQ_ZERO_WRITE);
blk->root_state.open_flags = blk->root->bs->open_flags;
blk->root_state.read_only = blk->root->bs->read_only;
blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
-
- if (blk->root_state.throttle_group) {
- g_free(blk->root_state.throttle_group);
- throttle_group_unref(blk->root_state.throttle_state);
- }
- if (blk->root->bs->throttle_state) {
- const char *name = throttle_group_get_name(blk->root->bs);
- blk->root_state.throttle_group = g_strdup(name);
- blk->root_state.throttle_state = throttle_group_incref(name);
- } else {
- blk->root_state.throttle_group = NULL;
- blk->root_state.throttle_state = NULL;
- }
}
/*
void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
{
bs->detect_zeroes = blk->root_state.detect_zeroes;
- if (blk->root_state.throttle_group) {
- bdrv_io_limits_enable(bs, blk->root_state.throttle_group);
- }
}
/*
return result;
}
+
+
+/* throttling disk I/O limits */
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
+{
+ throttle_group_config(blk, cfg);
+}
+
+void blk_io_limits_disable(BlockBackend *blk)
+{
+ assert(blk->public.throttle_state);
+ bdrv_drained_begin(blk_bs(blk));
+ throttle_group_unregister_blk(blk);
+ bdrv_drained_end(blk_bs(blk));
+}
+
+/* should be called before blk_set_io_limits if a limit is set */
+void blk_io_limits_enable(BlockBackend *blk, const char *group)
+{
+ assert(!blk->public.throttle_state);
+ throttle_group_register_blk(blk, group);
+}
+
+void blk_io_limits_update_group(BlockBackend *blk, const char *group)
+{
+ /* this BB is not part of any group */
+ if (!blk->public.throttle_state) {
+ return;
+ }
+
+ /* this BB is a part of the same group than the one we want */
+ if (!g_strcmp0(throttle_group_get_name(blk), group)) {
+ return;
+ }
+
+ /* need to change the group this bs belong to */
+ blk_io_limits_disable(blk);
+ blk_io_limits_enable(blk, group);
+}
+
+static void blk_root_drained_begin(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+
+ /* Note that blk->root may not be accessible here yet if we are just
+ * attaching to a BlockDriverState that is drained. Use child instead. */
+
+ if (blk->public.io_limits_disabled++ == 0) {
+ throttle_group_restart_blk(blk);
+ }
+}
+
+static void blk_root_drained_end(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+
+ assert(blk->public.io_limits_disabled);
+ --blk->public.io_limits_disabled;
+}