/*
* QEMU Block backends
*
- * Copyright (C) 2014 Red Hat, Inc.
+ * Copyright (C) 2014-2016 Red Hat, Inc.
*
* Authors:
#include "sysemu/blockdev.h"
#include "sysemu/sysemu.h"
#include "qapi-event.h"
+#include "qemu/id.h"
/* Number of coroutines to reserve per attached device model */
#define COROUTINE_POOL_RESERVATION 64
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
+ BlockBackendPublic public;
void *dev; /* attached device model, if any */
/* TODO change to DeviceState when all users are qdevified */
* can be used to restore those options in the new BDS on insert) */
BlockBackendRootState root_state;
+ bool enable_write_cache;
+
/* I/O stats (display with "info blockstats"). */
BlockAcctStats stats;
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
+ qemu_co_queue_init(&blk->public.throttled_reqs[0]);
+ qemu_co_queue_init(&blk->public.throttled_reqs[1]);
+
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
+
QTAILQ_INSERT_TAIL(&block_backends, blk, link);
return blk;
}
bs = bdrv_new_root();
blk->root = bdrv_root_attach_child(bs, "root", &child_root);
+ blk->root->opaque = blk;
bs->blk = blk;
return blk;
}
return NULL;
}
+ blk_set_enable_write_cache(blk, true);
+
return blk;
}
return blk->root ? blk->root->bs : NULL;
}
-/*
- * Changes the BlockDriverState attached to @blk
- */
-void blk_set_bs(BlockBackend *blk, BlockDriverState *bs)
-{
- bdrv_ref(bs);
-
- if (blk->root) {
- blk->root->bs->blk = NULL;
- bdrv_root_unref_child(blk->root);
- }
- assert(bs->blk == NULL);
-
- blk->root = bdrv_root_attach_child(bs, "root", &child_root);
- bs->blk = blk;
-}
-
/*
* Return @blk's DriveInfo if any, else null.
*/
abort();
}
+/*
+ * Returns a pointer to the publicly accessible fields of @blk.
+ */
+BlockBackendPublic *blk_get_public(BlockBackend *blk)
+{
+ return &blk->public;
+}
+
+/*
+ * Returns a BlockBackend given the associated @public fields.
+ */
+BlockBackend *blk_by_public(BlockBackendPublic *public)
+{
+ return container_of(public, BlockBackend, public);
+}
+
/*
* Disassociates the currently associated BlockDriverState from @blk.
*/
notifier_list_notify(&blk->remove_bs_notifiers, blk);
blk_update_root_state(blk);
+ if (blk->public.throttle_state) {
+ blk_io_limits_disable(blk);
+ }
blk->root->bs->blk = NULL;
bdrv_root_unref_child(blk->root);
assert(!blk->root && !bs->blk);
bdrv_ref(bs);
blk->root = bdrv_root_attach_child(bs, "root", &child_root);
+ blk->root->opaque = blk;
bs->blk = blk;
notifier_list_notify(&blk->insert_bs_notifiers, blk);
return ret;
}
- return bdrv_co_do_preadv(blk_bs(blk), offset, bytes, qiov, flags);
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, false);
+ }
+
+ return bdrv_co_preadv(blk_bs(blk), offset, bytes, qiov, flags);
}
static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
- int ret = blk_check_byte_request(blk, offset, bytes);
+ int ret;
+
+ ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
- return bdrv_co_do_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, true);
+ }
+
+ if (!blk->enable_write_cache) {
+ flags |= BDRV_REQ_FUA;
+ }
+
+ return bdrv_co_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
}
typedef struct BlkRwCo {
return rwco.ret;
}
-static int blk_rw(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors, CoroutineEntry co_entry,
- BdrvRequestFlags flags)
+int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
+ int count)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return -EINVAL;
- }
-
- return blk_prw(blk, sector_num << BDRV_SECTOR_BITS, buf,
- nb_sectors << BDRV_SECTOR_BITS, co_entry, flags);
-}
-
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
-{
- return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0);
-}
-
-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
-{
- BlockDriverState *bs = blk_bs(blk);
- bool enabled;
int ret;
- ret = blk_check_request(blk, sector_num, nb_sectors);
+ ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return ret;
}
- enabled = bs->io_limits_enabled;
- bs->io_limits_enabled = false;
- ret = blk_read(blk, sector_num, buf, nb_sectors);
- bs->io_limits_enabled = enabled;
+ bdrv_no_throttling_begin(blk_bs(blk));
+ ret = blk_pread(blk, offset, buf, count);
+ bdrv_no_throttling_end(blk_bs(blk));
return ret;
}
-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
- int nb_sectors)
+int blk_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- return blk_rw(blk, sector_num, (uint8_t*) buf, nb_sectors,
- blk_write_entry, 0);
-}
-
-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
-{
- return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry,
- BDRV_REQ_ZERO_WRITE);
+ return blk_prw(blk, offset, NULL, count, blk_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE);
}
static void error_callback_bh(void *opaque)
typedef struct BlkAioEmAIOCB {
BlockAIOCB common;
BlkRwCo rwco;
+ int bytes;
bool has_returned;
QEMUBH* bh;
} BlkAioEmAIOCB;
blk_aio_complete(opaque);
}
-static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset,
+static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
QEMUIOVector *qiov, CoroutineEntry co_entry,
BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
.flags = flags,
.ret = NOT_DONE,
};
+ acb->bytes = bytes;
acb->bh = NULL;
acb->has_returned = false;
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
+ assert(rwco->qiov->size == acb->bytes);
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
rwco->qiov, rwco->flags);
blk_aio_complete(acb);
}
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset,
- rwco->qiov ? rwco->qiov->size : 0,
+ assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
rwco->qiov, rwco->flags);
blk_aio_complete(acb);
}
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags,
+BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
- }
-
- return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, NULL,
- blk_aio_write_entry, BDRV_REQ_ZERO_WRITE, cb, opaque);
+ return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
}
int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
return count;
}
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
+ BdrvRequestFlags flags)
{
- int ret = blk_prw(blk, offset, (void*) buf, count, blk_write_entry, 0);
+ int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+ flags);
if (ret < 0) {
return ret;
}
return bdrv_nb_sectors(blk_bs(blk));
}
-BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
- }
-
- return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov,
- blk_aio_read_entry, 0, cb, opaque);
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_read_entry, flags, cb, opaque);
}
-BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
- }
-
- return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov,
- blk_aio_write_entry, 0, cb, opaque);
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_write_entry, flags, cb, opaque);
}
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
int blk_enable_write_cache(BlockBackend *blk)
{
- BlockDriverState *bs = blk_bs(blk);
-
- if (bs) {
- return bdrv_enable_write_cache(bs);
- } else {
- return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB);
- }
+ return blk->enable_write_cache;
}
void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
{
- BlockDriverState *bs = blk_bs(blk);
-
- if (bs) {
- bdrv_set_enable_write_cache(bs, wce);
- } else {
- if (wce) {
- blk->root_state.open_flags |= BDRV_O_CACHE_WB;
- } else {
- blk->root_state.open_flags &= ~BDRV_O_CACHE_WB;
- }
- }
+ blk->enable_write_cache = wce;
}
void blk_invalidate_cache(BlockBackend *blk, Error **errp)
return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
}
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return ret;
- }
-
- return bdrv_co_write_zeroes(blk_bs(blk), sector_num, nb_sectors, flags);
+ return blk_co_pwritev(blk, offset, count, NULL,
+ flags | BDRV_REQ_ZERO_WRITE);
}
int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size)
{
+ int ret;
+
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
- return bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
+ ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (ret == size && !blk->enable_write_cache) {
+ ret = bdrv_flush(blk_bs(blk));
+ }
+
+ return ret < 0 ? ret : size;
}
int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
g_free(blk->root_state.throttle_group);
throttle_group_unref(blk->root_state.throttle_state);
}
- if (blk->root->bs->throttle_state) {
- const char *name = throttle_group_get_name(blk->root->bs);
+ if (blk->public.throttle_state) {
+ const char *name = throttle_group_get_name(blk);
blk->root_state.throttle_group = g_strdup(name);
blk->root_state.throttle_state = throttle_group_incref(name);
} else {
{
bs->detect_zeroes = blk->root_state.detect_zeroes;
if (blk->root_state.throttle_group) {
- bdrv_io_limits_enable(bs, blk->root_state.throttle_group);
+ blk_io_limits_enable(blk, blk->root_state.throttle_group);
}
}
return result;
}
+
+
+/* throttling disk I/O limits */
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
+{
+ throttle_group_config(blk, cfg);
+}
+
+void blk_io_limits_disable(BlockBackend *blk)
+{
+ assert(blk->public.throttle_state);
+ bdrv_no_throttling_begin(blk_bs(blk));
+ throttle_group_unregister_blk(blk);
+ bdrv_no_throttling_end(blk_bs(blk));
+}
+
+/* should be called before blk_set_io_limits if a limit is set */
+void blk_io_limits_enable(BlockBackend *blk, const char *group)
+{
+ assert(!blk->public.throttle_state);
+ throttle_group_register_blk(blk, group);
+}
+
+void blk_io_limits_update_group(BlockBackend *blk, const char *group)
+{
+ /* this BB is not part of any group */
+ if (!blk->public.throttle_state) {
+ return;
+ }
+
+ /* this BB is a part of the same group than the one we want */
+ if (!g_strcmp0(throttle_group_get_name(blk), group)) {
+ return;
+ }
+
+ /* need to change the group this bs belong to */
+ blk_io_limits_disable(blk);
+ blk_io_limits_enable(blk, group);
+}