#include "hw/qdev-core.h"
#include "sysemu/blockdev.h"
#include "sysemu/runstate.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/replay.h"
#include "qapi/error.h"
#include "qapi/qapi-events-block.h"
#include "qemu/id.h"
QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
int quiesce_counter;
+ CoQueue queued_requests;
+ bool disable_request_queuing;
+
VMChangeStateEntry *vmsh;
bool force_allow_inactivate;
static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
-static void blk_root_inherit_options(int *child_flags, QDict *child_options,
+static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
+ int *child_flags, QDict *child_options,
int parent_flags, QDict *parent_options)
{
/* We're not supposed to call this function for root nodes */
}
}
-static const BdrvChildRole child_root = {
+static const BdrvChildClass child_root = {
.inherit_options = blk_root_inherit_options,
.change_media = blk_root_change_media,
block_acct_init(&blk->stats);
+ qemu_co_queue_init(&blk->queued_requests);
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
QLIST_INIT(&blk->aio_notifiers);
return blk;
}
+/*
+ * Create a new BlockBackend connected to an existing BlockDriverState.
+ *
+ * @perm is a bitmasks of BLK_PERM_* constants which describes the
+ * permissions to request for @bs that is attached to this
+ * BlockBackend. @shared_perm is a bitmask which describes which
+ * permissions may be granted to other users of the attached node.
+ * Both sets of permissions can be changed later using blk_set_perm().
+ *
+ * Return the new BlockBackend on success, null on failure.
+ */
+BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
+ uint64_t shared_perm, Error **errp)
+{
+ BlockBackend *blk = blk_new(bdrv_get_aio_context(bs), perm, shared_perm);
+
+ if (blk_insert_bs(blk, bs, errp) < 0) {
+ blk_unref(blk);
+ return NULL;
+ }
+ return blk;
+}
+
/*
* Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
* The new BlockBackend is in the main AioContext.
return NULL;
}
- blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx,
- perm, BLK_PERM_ALL, blk, errp);
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+ BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
+ blk->ctx, perm, BLK_PERM_ALL, blk, errp);
if (!blk->root) {
blk_unref(blk);
return NULL;
{
BdrvChild *child;
QLIST_FOREACH(child, &bs->parents, next_parent) {
- if (child->role == &child_root) {
+ if (child->klass == &child_root) {
return child->opaque;
}
}
BdrvChild *c;
QLIST_FOREACH(c, &bs->parents, next_parent) {
- if (c->role != &child_root) {
+ if (c->klass != &child_root) {
return false;
}
}
{
ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
bdrv_ref(bs);
- blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk->ctx,
- blk->perm, blk->shared_perm, blk, errp);
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+ BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
+ blk->ctx, blk->perm, blk->shared_perm,
+ blk, errp);
if (blk->root == NULL) {
return -EPERM;
}
blk->allow_aio_context_change = allow;
}
+void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
+{
+ blk->disable_request_queuing = disable;
+}
+
static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
size_t size)
{
return 0;
}
-int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
- unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
+{
+ assert(blk->in_flight > 0);
+
+ if (blk->quiesce_counter && !blk->disable_request_queuing) {
+ blk_dec_in_flight(blk);
+ qemu_co_queue_wait(&blk->queued_requests, NULL);
+ blk_inc_in_flight(blk);
+ }
+}
+
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_preadv(BlockBackend *blk, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
{
int ret;
- BlockDriverState *bs = blk_bs(blk);
+ BlockDriverState *bs;
+
+ blk_wait_while_drained(blk);
+ /* Call blk_bs() only after waiting, the graph may have changed */
+ bs = blk_bs(blk);
trace_blk_co_preadv(blk, bs, offset, bytes, flags);
ret = blk_check_byte_request(blk, offset, bytes);
return ret;
}
-int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
- unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
int ret;
- BlockDriverState *bs = blk_bs(blk);
+ blk_inc_in_flight(blk);
+ ret = blk_do_preadv(blk, offset, bytes, qiov, flags);
+ blk_dec_in_flight(blk);
+
+ return ret;
+}
+
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_pwritev_part(BlockBackend *blk, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
+{
+ int ret;
+ BlockDriverState *bs;
+
+ blk_wait_while_drained(blk);
+
+ /* Call blk_bs() only after waiting, the graph may have changed */
+ bs = blk_bs(blk);
trace_blk_co_pwritev(blk, bs, offset, bytes, flags);
ret = blk_check_byte_request(blk, offset, bytes);
flags |= BDRV_REQ_FUA;
}
- ret = bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
+ ret = bdrv_co_pwritev_part(blk->root, offset, bytes, qiov, qiov_offset,
+ flags);
bdrv_dec_in_flight(bs);
return ret;
}
+int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
+ unsigned int bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
+{
+ int ret;
+
+ blk_inc_in_flight(blk);
+ ret = blk_do_pwritev_part(blk, offset, bytes, qiov, qiov_offset, flags);
+ blk_dec_in_flight(blk);
+
+ return ret;
+}
+
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
+}
+
typedef struct BlkRwCo {
BlockBackend *blk;
int64_t offset;
BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
+ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, qiov->size,
qiov, rwco->flags);
aio_wait_kick();
}
BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
- qiov, rwco->flags);
+ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, qiov->size,
+ qiov, 0, rwco->flags);
aio_wait_kick();
}
.ret = NOT_DONE,
};
+ blk_inc_in_flight(blk);
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
co_entry(&rwco);
bdrv_coroutine_enter(blk_bs(blk), co);
BDRV_POLL_WHILE(blk_bs(blk), rwco.ret == NOT_DONE);
}
+ blk_dec_in_flight(blk);
return rwco.ret;
}
-int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
- int count)
-{
- int ret;
-
- ret = blk_check_byte_request(blk, offset, count);
- if (ret < 0) {
- return ret;
- }
-
- blk_root_drained_begin(blk->root);
- ret = blk_pread(blk, offset, buf, count);
- blk_root_drained_end(blk->root, NULL);
- return ret;
-}
-
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int bytes, BdrvRequestFlags flags)
{
acb->blk = blk;
acb->ret = ret;
- aio_bh_schedule_oneshot(blk_get_aio_context(blk), error_callback_bh, acb);
+ replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ error_callback_bh, acb);
return &acb->common;
}
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
- aio_bh_schedule_oneshot(blk_get_aio_context(blk),
- blk_aio_complete_bh, acb);
+ replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
+ blk_aio_complete_bh, acb);
}
return &acb->common;
QEMUIOVector *qiov = rwco->iobuf;
assert(qiov->size == acb->bytes);
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
+ rwco->ret = blk_do_preadv(rwco->blk, rwco->offset, acb->bytes,
qiov, rwco->flags);
blk_aio_complete(acb);
}
QEMUIOVector *qiov = rwco->iobuf;
assert(!qiov || qiov->size == acb->bytes);
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
- qiov, rwco->flags);
+ rwco->ret = blk_do_pwritev_part(rwco->blk, rwco->offset, acb->bytes,
+ qiov, 0, rwco->flags);
blk_aio_complete(acb);
}
blk_aio_write_entry, flags, cb, opaque);
}
-static void blk_aio_flush_entry(void *opaque)
-{
- BlkAioEmAIOCB *acb = opaque;
- BlkRwCo *rwco = &acb->rwco;
-
- rwco->ret = blk_co_flush(rwco->blk);
- blk_aio_complete(acb);
-}
-
-BlockAIOCB *blk_aio_flush(BlockBackend *blk,
- BlockCompletionFunc *cb, void *opaque)
-{
- return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
-}
-
-static void blk_aio_pdiscard_entry(void *opaque)
-{
- BlkAioEmAIOCB *acb = opaque;
- BlkRwCo *rwco = &acb->rwco;
-
- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, acb->bytes);
- blk_aio_complete(acb);
-}
-
-BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
- int64_t offset, int bytes,
- BlockCompletionFunc *cb, void *opaque)
-{
- return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
- cb, opaque);
-}
-
void blk_aio_cancel(BlockAIOCB *acb)
{
bdrv_aio_cancel(acb);
bdrv_aio_cancel_async(acb);
}
-int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
+ blk_wait_while_drained(blk);
+
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
BlkRwCo *rwco = opaque;
QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
- qiov->iov[0].iov_base);
+ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, qiov->iov[0].iov_base);
aio_wait_kick();
}
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
+ rwco->ret = blk_do_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
blk_aio_complete(acb);
}
return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
}
-int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn
+blk_do_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
- int ret = blk_check_byte_request(blk, offset, bytes);
+ int ret;
+
+ blk_wait_while_drained(blk);
+
+ ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
return bdrv_co_pdiscard(blk->root, offset, bytes);
}
-int blk_co_flush(BlockBackend *blk)
+static void blk_aio_pdiscard_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, acb->bytes);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
+ int64_t offset, int bytes,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ return blk_aio_prwv(blk, offset, bytes, NULL, blk_aio_pdiscard_entry, 0,
+ cb, opaque);
+}
+
+int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
+{
+ int ret;
+
+ blk_inc_in_flight(blk);
+ ret = blk_do_pdiscard(blk, offset, bytes);
+ blk_dec_in_flight(blk);
+
+ return ret;
+}
+
+static void blk_pdiscard_entry(void *opaque)
+{
+ BlkRwCo *rwco = opaque;
+ QEMUIOVector *qiov = rwco->iobuf;
+
+ rwco->ret = blk_do_pdiscard(rwco->blk, rwco->offset, qiov->size);
+ aio_wait_kick();
+}
+
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
+ return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
+}
+
+/* To be called between exactly one pair of blk_inc/dec_in_flight() */
+static int coroutine_fn blk_do_flush(BlockBackend *blk)
+{
+ blk_wait_while_drained(blk);
+
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
return bdrv_co_flush(blk_bs(blk));
}
+static void blk_aio_flush_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ rwco->ret = blk_do_flush(rwco->blk);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_flush(BlockBackend *blk,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ return blk_aio_prwv(blk, 0, 0, NULL, blk_aio_flush_entry, 0, cb, opaque);
+}
+
+int coroutine_fn blk_co_flush(BlockBackend *blk)
+{
+ int ret;
+
+ blk_inc_in_flight(blk);
+ ret = blk_do_flush(blk);
+ blk_dec_in_flight(blk);
+
+ return ret;
+}
+
static void blk_flush_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
- rwco->ret = blk_co_flush(rwco->blk);
+ rwco->ret = blk_do_flush(rwco->blk);
aio_wait_kick();
}
BDRV_REQ_WRITE_COMPRESSED);
}
-int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
- Error **errp)
+int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
+ PreallocMode prealloc, BdrvRequestFlags flags, Error **errp)
{
if (!blk_is_available(blk)) {
error_setg(errp, "No medium inserted");
return -ENOMEDIUM;
}
- return bdrv_truncate(blk->root, offset, prealloc, errp);
-}
-
-static void blk_pdiscard_entry(void *opaque)
-{
- BlkRwCo *rwco = opaque;
- QEMUIOVector *qiov = rwco->iobuf;
-
- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
- aio_wait_kick();
-}
-
-int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
-{
- return blk_prw(blk, offset, NULL, bytes, blk_pdiscard_entry, 0);
+ return bdrv_truncate(blk->root, offset, exact, prealloc, flags, errp);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
if (blk->dev_ops && blk->dev_ops->drained_end) {
blk->dev_ops->drained_end(blk->dev_opaque);
}
+ while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
+ /* Resume all queued requests */
+ }
}
}
{
return blk->root;
}
+
+int blk_make_empty(BlockBackend *blk, Error **errp)
+{
+ if (!blk_is_available(blk)) {
+ error_setg(errp, "No medium inserted");
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_make_empty(blk->root, errp);
+}