#include "block/throttle-groups.h"
#include "sysemu/blockdev.h"
#include "sysemu/sysemu.h"
-#include "qapi-event.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-block.h"
#include "qemu/id.h"
+#include "qemu/option.h"
#include "trace.h"
#include "migration/misc.h"
static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
+typedef struct BlockBackendAioNotifier {
+ void (*attached_aio_context)(AioContext *new_context, void *opaque);
+ void (*detach_aio_context)(void *opaque);
+ void *opaque;
+ QLIST_ENTRY(BlockBackendAioNotifier) list;
+} BlockBackendAioNotifier;
+
struct BlockBackend {
char *name;
int refcnt;
bool allow_write_beyond_eof;
NotifierList remove_bs_notifiers, insert_bs_notifiers;
+ QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;
int quiesce_counter;
VMChangeStateEntry *vmsh;
bool force_allow_inactivate;
+
+ /* Number of in-flight aio requests. BlockDriverState also counts
+ * in-flight requests but aio requests can exist even when blk->root is
+ * NULL, so we cannot rely on its counter for that case.
+ * Accessed with atomic ops.
+ */
+ unsigned int in_flight;
+ AioWait wait;
};
typedef struct BlockBackendAIOCB {
return 0;
}
+static void blk_root_attach(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+ BlockBackendAioNotifier *notifier;
+
+ trace_blk_root_attach(child, blk, child->bs);
+
+ QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
+ bdrv_add_aio_context_notifier(child->bs,
+ notifier->attached_aio_context,
+ notifier->detach_aio_context,
+ notifier->opaque);
+ }
+}
+
+static void blk_root_detach(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+ BlockBackendAioNotifier *notifier;
+
+ trace_blk_root_detach(child, blk, child->bs);
+
+ QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
+ bdrv_remove_aio_context_notifier(child->bs,
+ notifier->attached_aio_context,
+ notifier->detach_aio_context,
+ notifier->opaque);
+ }
+}
+
static const BdrvChildRole child_root = {
.inherit_options = blk_root_inherit_options,
.activate = blk_root_activate,
.inactivate = blk_root_inactivate,
+
+ .attach = blk_root_attach,
+ .detach = blk_root_detach,
};
/*
notifier_list_init(&blk->remove_bs_notifiers);
notifier_list_init(&blk->insert_bs_notifiers);
+ QLIST_INIT(&blk->aio_notifiers);
QTAILQ_INSERT_TAIL(&block_backends, blk, link);
return blk;
}
assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
+ assert(QLIST_EMPTY(&blk->aio_notifiers));
QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
block_acct_cleanup(&blk->stats);
blk_update_root_state(blk);
+ /* bdrv_root_unref_child() will cause blk->root to become stale and may
+ * switch to a completion coroutine later on. Let's drain all I/O here
+ * to avoid that and a potential QEMU crash.
+ */
+ blk_drain(blk);
bdrv_root_unref_child(blk->root);
blk->root = NULL;
}
typedef struct BlkRwCo {
BlockBackend *blk;
int64_t offset;
- QEMUIOVector *qiov;
+ void *iobuf;
int ret;
BdrvRequestFlags flags;
} BlkRwCo;
static void blk_read_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
+ QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
- rwco->qiov, rwco->flags);
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
+ qiov, rwco->flags);
}
static void blk_write_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
+ QEMUIOVector *qiov = rwco->iobuf;
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
- rwco->qiov, rwco->flags);
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
+ qiov, rwco->flags);
}
static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
rwco = (BlkRwCo) {
.blk = blk,
.offset = offset,
- .qiov = &qiov,
+ .iobuf = &qiov,
.flags = flags,
.ret = NOT_DONE,
};
return bdrv_make_zero(blk->root, flags);
}
+static void blk_inc_in_flight(BlockBackend *blk)
+{
+ atomic_inc(&blk->in_flight);
+}
+
+static void blk_dec_in_flight(BlockBackend *blk)
+{
+ atomic_dec(&blk->in_flight);
+ aio_wait_kick(&blk->wait);
+}
+
static void error_callback_bh(void *opaque)
{
struct BlockBackendAIOCB *acb = opaque;
- bdrv_dec_in_flight(acb->common.bs);
+ blk_dec_in_flight(acb->blk);
acb->common.cb(acb->common.opaque, acb->ret);
qemu_aio_unref(acb);
}
{
struct BlockBackendAIOCB *acb;
- bdrv_inc_in_flight(blk_bs(blk));
+ blk_inc_in_flight(blk);
acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
acb->blk = blk;
acb->ret = ret;
static void blk_aio_complete(BlkAioEmAIOCB *acb)
{
if (acb->has_returned) {
- bdrv_dec_in_flight(acb->common.bs);
+ blk_dec_in_flight(acb->rwco.blk);
acb->common.cb(acb->common.opaque, acb->rwco.ret);
qemu_aio_unref(acb);
}
}
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
- QEMUIOVector *qiov, CoroutineEntry co_entry,
+ void *iobuf, CoroutineEntry co_entry,
BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
BlkAioEmAIOCB *acb;
Coroutine *co;
- bdrv_inc_in_flight(blk_bs(blk));
+ blk_inc_in_flight(blk);
acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
acb->rwco = (BlkRwCo) {
.blk = blk,
.offset = offset,
- .qiov = qiov,
+ .iobuf = iobuf,
.flags = flags,
.ret = NOT_DONE,
};
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
+ QEMUIOVector *qiov = rwco->iobuf;
- assert(rwco->qiov->size == acb->bytes);
+ assert(qiov->size == acb->bytes);
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
- rwco->qiov, rwco->flags);
+ qiov, rwco->flags);
blk_aio_complete(acb);
}
{
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
+ QEMUIOVector *qiov = rwco->iobuf;
- assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
+ assert(!qiov || qiov->size == acb->bytes);
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
- rwco->qiov, rwco->flags);
+ qiov, rwco->flags);
blk_aio_complete(acb);
}
static void blk_ioctl_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
+ QEMUIOVector *qiov = rwco->iobuf;
+
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
- rwco->qiov->iov[0].iov_base);
+ qiov->iov[0].iov_base);
}
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
BlkAioEmAIOCB *acb = opaque;
BlkRwCo *rwco = &acb->rwco;
- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
- rwco->qiov->iov[0].iov_base);
+ rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
+
blk_aio_complete(acb);
}
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
- QEMUIOVector qiov;
- struct iovec iov;
-
- iov = (struct iovec) {
- .iov_base = buf,
- .iov_len = 0,
- };
- qemu_iovec_init_external(&qiov, &iov, 1);
-
- return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
+ return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
}
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
return ret;
}
- return bdrv_co_pdiscard(blk_bs(blk), offset, bytes);
+ return bdrv_co_pdiscard(blk->root, offset, bytes);
}
int blk_co_flush(BlockBackend *blk)
void blk_drain(BlockBackend *blk)
{
- if (blk_bs(blk)) {
- bdrv_drain(blk_bs(blk));
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_drained_begin(bs);
+ }
+
+ /* We may have -ENOMEDIUM completions in flight */
+ AIO_WAIT_WHILE(&blk->wait,
+ blk_get_aio_context(blk),
+ atomic_mb_read(&blk->in_flight) > 0);
+
+ if (bs) {
+ bdrv_drained_end(bs);
}
}
void blk_drain_all(void)
{
- bdrv_drain_all();
+ BlockBackend *blk = NULL;
+
+ bdrv_drain_all_begin();
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *ctx = blk_get_aio_context(blk);
+
+ aio_context_acquire(ctx);
+
+ /* We may have -ENOMEDIUM completions in flight */
+ AIO_WAIT_WHILE(&blk->wait, ctx,
+ atomic_mb_read(&blk->in_flight) > 0);
+
+ aio_context_release(ctx);
+ }
+
+ bdrv_drain_all_end();
}
void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
bool is_read, int error)
{
IoOperationType optype;
+ BlockDriverState *bs = blk_bs(blk);
optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
- qapi_event_send_block_io_error(blk_name(blk),
- bdrv_get_node_name(blk_bs(blk)), optype,
+ qapi_event_send_block_io_error(blk_name(blk), !!bs,
+ bs ? bdrv_get_node_name(bs) : NULL, optype,
action, blk_iostatus_is_enabled(blk),
error == ENOSPC, strerror(error),
&error_abort);
AioContext *blk_get_aio_context(BlockBackend *blk)
{
- BlockDriverState *bs = blk_bs(blk);
-
- if (bs) {
- return bdrv_get_aio_context(bs);
- } else {
- return qemu_get_aio_context();
- }
+ return bdrv_get_aio_context(blk_bs(blk));
}
static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque)
{
+ BlockBackendAioNotifier *notifier;
BlockDriverState *bs = blk_bs(blk);
+ notifier = g_new(BlockBackendAioNotifier, 1);
+ notifier->attached_aio_context = attached_aio_context;
+ notifier->detach_aio_context = detach_aio_context;
+ notifier->opaque = opaque;
+ QLIST_INSERT_HEAD(&blk->aio_notifiers, notifier, list);
+
if (bs) {
bdrv_add_aio_context_notifier(bs, attached_aio_context,
detach_aio_context, opaque);
void (*detach_aio_context)(void *),
void *opaque)
{
+ BlockBackendAioNotifier *notifier;
BlockDriverState *bs = blk_bs(blk);
if (bs) {
bdrv_remove_aio_context_notifier(bs, attached_aio_context,
detach_aio_context, opaque);
}
+
+ QLIST_FOREACH(notifier, &blk->aio_notifiers, list) {
+ if (notifier->attached_aio_context == attached_aio_context &&
+ notifier->detach_aio_context == detach_aio_context &&
+ notifier->opaque == opaque) {
+ QLIST_REMOVE(notifier, list);
+ g_free(notifier);
+ return;
+ }
+ }
+
+ abort();
}
void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
static void blk_pdiscard_entry(void *opaque)
{
BlkRwCo *rwco = opaque;
- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
+ QEMUIOVector *qiov = rwco->iobuf;
+
+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
}
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
{
bdrv_unregister_buf(blk_bs(blk), host);
}
+
+int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
+ BlockBackend *blk_out, int64_t off_out,
+ int bytes, BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags)
+{
+ int r;
+ r = blk_check_byte_request(blk_in, off_in, bytes);
+ if (r) {
+ return r;
+ }
+ r = blk_check_byte_request(blk_out, off_out, bytes);
+ if (r) {
+ return r;
+ }
+ return bdrv_co_copy_range(blk_in->root, off_in,
+ blk_out->root, off_out,
+ bytes, read_flags, write_flags);
+}