static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
int64_t sector_num,
* @is_write: is the IO a write
*/
static void bdrv_io_limits_intercept(BlockDriverState *bs,
- int nb_sectors,
+ unsigned int bytes,
bool is_write)
{
/* does this io must wait */
}
/* the IO will be executed, do the accounting */
- throttle_account(&bs->throttle_state,
- is_write,
- nb_sectors * BDRV_SECTOR_SIZE);
+ throttle_account(&bs->throttle_state, is_write, bytes);
+
/* if the next request must wait -> do nothing */
if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
filename = qdict_get_try_str(options, "filename");
}
+ if (drv->bdrv_needs_filename && !filename) {
+ error_setg(errp, "The '%s' block driver requires a file name",
+ drv->format_name);
+ return -EINVAL;
+ }
+
trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
node_name = qdict_get_try_str(options, "node-name");
goto fail;
}
qdict_del(options, "filename");
- } else if (drv->bdrv_needs_filename && !filename) {
- error_setg(errp, "The '%s' block driver requires a file name",
- drv->format_name);
- ret = -EINVAL;
- goto fail;
}
if (!drv->bdrv_file_open) {
*/
static void tracked_request_end(BdrvTrackedRequest *req)
{
+ if (req->serialising) {
+ req->bs->serialising_in_flight--;
+ }
+
QLIST_REMOVE(req, list);
qemu_co_queue_restart_all(&req->wait_queue);
}
*/
static void tracked_request_begin(BdrvTrackedRequest *req,
BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, bool is_write)
+ int64_t offset,
+ unsigned int bytes, bool is_write)
{
*req = (BdrvTrackedRequest){
.bs = bs,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .is_write = is_write,
- .co = qemu_coroutine_self(),
+ .offset = offset,
+ .bytes = bytes,
+ .is_write = is_write,
+ .co = qemu_coroutine_self(),
+ .serialising = false,
+ .overlap_offset = offset,
+ .overlap_bytes = bytes,
};
qemu_co_queue_init(&req->wait_queue);
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
}
+static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
+{
+ int64_t overlap_offset = req->offset & ~(align - 1);
+ unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
+ - overlap_offset;
+
+ if (!req->serialising) {
+ req->bs->serialising_in_flight++;
+ req->serialising = true;
+ }
+
+ req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
+ req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
+}
+
/**
* Round a region to cluster boundaries
*/
}
}
+static int bdrv_get_cluster_size(BlockDriverState *bs)
+{
+ BlockDriverInfo bdi;
+ int ret;
+
+ ret = bdrv_get_info(bs, &bdi);
+ if (ret < 0 || bdi.cluster_size == 0) {
+ return bs->request_alignment;
+ } else {
+ return bdi.cluster_size;
+ }
+}
+
static bool tracked_request_overlaps(BdrvTrackedRequest *req,
- int64_t sector_num, int nb_sectors) {
+ int64_t offset, unsigned int bytes)
+{
/* aaaa bbbb */
- if (sector_num >= req->sector_num + req->nb_sectors) {
+ if (offset >= req->overlap_offset + req->overlap_bytes) {
return false;
}
/* bbbb aaaa */
- if (req->sector_num >= sector_num + nb_sectors) {
+ if (req->overlap_offset >= offset + bytes) {
return false;
}
return true;
}
-static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
{
+ BlockDriverState *bs = self->bs;
BdrvTrackedRequest *req;
- int64_t cluster_sector_num;
- int cluster_nb_sectors;
bool retry;
+ bool waited = false;
- /* If we touch the same cluster it counts as an overlap. This guarantees
- * that allocating writes will be serialized and not race with each other
- * for the same cluster. For example, in copy-on-read it ensures that the
- * CoR read and write operations are atomic and guest writes cannot
- * interleave between them.
- */
- bdrv_round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
+ if (!bs->serialising_in_flight) {
+ return false;
+ }
do {
retry = false;
QLIST_FOREACH(req, &bs->tracked_requests, list) {
- if (tracked_request_overlaps(req, cluster_sector_num,
- cluster_nb_sectors)) {
+ if (req == self || (!req->serialising && !self->serialising)) {
+ continue;
+ }
+ if (tracked_request_overlaps(req, self->overlap_offset,
+ self->overlap_bytes))
+ {
/* Hitting this means there was a reentrant request, for
* example, a block driver issuing nested requests. This must
* never happen since it means deadlock.
*/
assert(qemu_coroutine_self() != req->co);
- qemu_co_queue_wait(&req->wait_queue);
- retry = true;
- break;
+ /* If the request is already (indirectly) waiting for us, or
+ * will wait for us as soon as it wakes up, then just go on
+ * (instead of producing a deadlock in the former case). */
+ if (!req->waiting_for) {
+ self->waiting_for = req;
+ qemu_co_queue_wait(&req->wait_queue);
+ self->waiting_for = NULL;
+ retry = true;
+ waited = true;
+ break;
+ }
}
}
} while (retry);
+
+ return waited;
}
/*
typedef struct RwCo {
BlockDriverState *bs;
- int64_t sector_num;
- int nb_sectors;
+ int64_t offset;
QEMUIOVector *qiov;
bool is_write;
int ret;
RwCo *rwco = opaque;
if (!rwco->is_write) {
- rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov,
- rwco->flags);
- } else {
- rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov,
+ rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
+ rwco->qiov->size, rwco->qiov,
rwco->flags);
+ } else {
+ rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
+ rwco->qiov->size, rwco->qiov,
+ rwco->flags);
}
}
/*
* Process a vectored synchronous request using coroutines
*/
-static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, bool is_write,
- BdrvRequestFlags flags)
+static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
+ QEMUIOVector *qiov, bool is_write,
+ BdrvRequestFlags flags)
{
Coroutine *co;
RwCo rwco = {
.bs = bs,
- .sector_num = sector_num,
- .nb_sectors = qiov->size >> BDRV_SECTOR_BITS,
+ .offset = offset,
.qiov = qiov,
.is_write = is_write,
.ret = NOT_DONE,
.flags = flags,
};
- assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
/**
* In sync call context, when the vcpu is blocked, this throttling timer
};
qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags);
+ return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
+ &qiov, is_write, flags);
}
/* return < 0 if error. See bdrv_write() for the return codes */
return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
}
-int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
-{
- return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
-}
-
int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags)
{
}
}
-int bdrv_pread(BlockDriverState *bs, int64_t offset,
- void *buf, int count1)
+int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
{
- uint8_t tmp_buf[BDRV_SECTOR_SIZE];
- int len, nb_sectors, count;
- int64_t sector_num;
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = bytes,
+ };
int ret;
- count = count1;
- /* first read to align to sector start */
- len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
- if (len > count)
- len = count;
- sector_num = offset >> BDRV_SECTOR_BITS;
- if (len > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
- count -= len;
- if (count == 0)
- return count1;
- sector_num++;
- buf += len;
- }
-
- /* read the sectors "in place" */
- nb_sectors = count >> BDRV_SECTOR_BITS;
- if (nb_sectors > 0) {
- if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
- return ret;
- sector_num += nb_sectors;
- len = nb_sectors << BDRV_SECTOR_BITS;
- buf += len;
- count -= len;
+ if (bytes < 0) {
+ return -EINVAL;
}
- /* add data from the last sector */
- if (count > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- memcpy(buf, tmp_buf, count);
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
+ if (ret < 0) {
+ return ret;
}
- return count1;
+
+ return bytes;
}
int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
{
- uint8_t tmp_buf[BDRV_SECTOR_SIZE];
- int len, nb_sectors, count;
- int64_t sector_num;
int ret;
- count = qiov->size;
-
- /* first write to align to sector start */
- len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
- if (len > count)
- len = count;
- sector_num = offset >> BDRV_SECTOR_BITS;
- if (len > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)),
- len);
- if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- count -= len;
- if (count == 0)
- return qiov->size;
- sector_num++;
- }
-
- /* write the sectors "in place" */
- nb_sectors = count >> BDRV_SECTOR_BITS;
- if (nb_sectors > 0) {
- QEMUIOVector qiov_inplace;
-
- qemu_iovec_init(&qiov_inplace, qiov->niov);
- qemu_iovec_concat(&qiov_inplace, qiov, len,
- nb_sectors << BDRV_SECTOR_BITS);
- ret = bdrv_writev(bs, sector_num, &qiov_inplace);
- qemu_iovec_destroy(&qiov_inplace);
- if (ret < 0) {
- return ret;
- }
-
- sector_num += nb_sectors;
- len = nb_sectors << BDRV_SECTOR_BITS;
- count -= len;
+ ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
+ if (ret < 0) {
+ return ret;
}
- /* add data from the last sector */
- if (count > 0) {
- if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count);
- if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
- return ret;
- }
return qiov->size;
}
int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
- const void *buf, int count1)
+ const void *buf, int bytes)
{
QEMUIOVector qiov;
struct iovec iov = {
.iov_base = (void *) buf,
- .iov_len = count1,
+ .iov_len = bytes,
};
+ if (bytes < 0) {
+ return -EINVAL;
+ }
+
qemu_iovec_init_external(&qiov, &iov, 1);
return bdrv_pwritev(bs, offset, &qiov);
}
}
/*
- * Handle a read request in coroutine context
+ * Forwards an already correctly aligned request to the BlockDriver. This
+ * handles copy on read and zeroing after EOF; any other features must be
+ * implemented by the caller.
*/
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
+ BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+ int64_t align, QEMUIOVector *qiov, int flags)
{
BlockDriver *drv = bs->drv;
- BdrvTrackedRequest req;
int ret;
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (bdrv_check_request(bs, sector_num, nb_sectors)) {
- return -EIO;
- }
-
- if (bs->copy_on_read) {
- flags |= BDRV_REQ_COPY_ON_READ;
- }
- if (flags & BDRV_REQ_COPY_ON_READ) {
- bs->copy_on_read_in_flight++;
- }
+ int64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
- if (bs->copy_on_read_in_flight) {
- wait_for_overlapping_requests(bs, sector_num, nb_sectors);
- }
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- /* throttling disk I/O */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, nb_sectors, false);
+ /* Handle Copy on Read and associated serialisation */
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ /* If we touch the same cluster it counts as an overlap. This
+ * guarantees that allocating writes will be serialized and not race
+ * with each other for the same cluster. For example, in copy-on-read
+ * it ensures that the CoR read and write operations are atomic and
+ * guest writes cannot interleave between them. */
+ mark_request_serialising(req, bdrv_get_cluster_size(bs));
}
- tracked_request_begin(&req, bs, sector_num, nb_sectors, false);
+ wait_serialising_requests(req);
if (flags & BDRV_REQ_COPY_ON_READ) {
int pnum;
}
}
+ /* Forward the request to the BlockDriver */
if (!(bs->zero_beyond_eof && bs->growable)) {
ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
} else {
}
total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE);
- max_nb_sectors = MAX(0, total_sectors - sector_num);
+ max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
+ align >> BDRV_SECTOR_BITS);
if (max_nb_sectors > 0) {
ret = drv->bdrv_co_readv(bs, sector_num,
MIN(nb_sectors, max_nb_sectors), qiov);
}
out:
+ return ret;
+}
+
+/*
+ * Handle a read request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvTrackedRequest req;
+
+ /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
+ uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint8_t *head_buf = NULL;
+ uint8_t *tail_buf = NULL;
+ QEMUIOVector local_qiov;
+ bool use_local_qiov = false;
+ int ret;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (bdrv_check_byte_request(bs, offset, bytes)) {
+ return -EIO;
+ }
+
+ if (bs->copy_on_read) {
+ flags |= BDRV_REQ_COPY_ON_READ;
+ }
+
+ /* throttling disk I/O */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_intercept(bs, bytes, false);
+ }
+
+ /* Align read if necessary by padding qiov */
+ if (offset & (align - 1)) {
+ head_buf = qemu_blockalign(bs, align);
+ qemu_iovec_init(&local_qiov, qiov->niov + 2);
+ qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
+ qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+ use_local_qiov = true;
+
+ bytes += offset & (align - 1);
+ offset = offset & ~(align - 1);
+ }
+
+ if ((offset + bytes) & (align - 1)) {
+ if (!use_local_qiov) {
+ qemu_iovec_init(&local_qiov, qiov->niov + 1);
+ qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+ use_local_qiov = true;
+ }
+ tail_buf = qemu_blockalign(bs, align);
+ qemu_iovec_add(&local_qiov, tail_buf,
+ align - ((offset + bytes) & (align - 1)));
+
+ bytes = ROUND_UP(bytes, align);
+ }
+
+ tracked_request_begin(&req, bs, offset, bytes, false);
+ ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
+ use_local_qiov ? &local_qiov : qiov,
+ flags);
tracked_request_end(&req);
- if (flags & BDRV_REQ_COPY_ON_READ) {
- bs->copy_on_read_in_flight--;
+ if (use_local_qiov) {
+ qemu_iovec_destroy(&local_qiov);
+ qemu_vfree(head_buf);
+ qemu_vfree(tail_buf);
}
return ret;
}
+static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
+ return -EINVAL;
+ }
+
+ return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+}
+
int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
}
/*
- * Handle a write request in coroutine context
+ * Forwards an already correctly aligned write request to the BlockDriver.
*/
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
+ BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, int flags)
{
BlockDriver *drv = bs->drv;
- BdrvTrackedRequest req;
+ bool waited;
int ret;
- if (!bs->drv) {
- return -ENOMEDIUM;
- }
- if (bs->read_only) {
- return -EACCES;
- }
- if (bdrv_check_request(bs, sector_num, nb_sectors)) {
- return -EIO;
- }
+ int64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
- if (bs->copy_on_read_in_flight) {
- wait_for_overlapping_requests(bs, sector_num, nb_sectors);
- }
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- /* throttling disk I/O */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, nb_sectors, true);
- }
-
- tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
+ waited = wait_serialising_requests(req);
+ assert(!waited || !req->serialising);
+ assert(req->overlap_offset <= offset);
+ assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
- ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
+ ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
if (ret < 0) {
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
+ BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
} else {
+ BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
}
+ BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
if (ret == 0 && !bs->enable_write_cache) {
ret = bdrv_co_flush(bs);
bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
}
+ return ret;
+}
+
+/*
+ * Handle a write request in coroutine context
+ */
+static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ BdrvTrackedRequest req;
+ /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
+ uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint8_t *head_buf = NULL;
+ uint8_t *tail_buf = NULL;
+ QEMUIOVector local_qiov;
+ bool use_local_qiov = false;
+ int ret;
+
+ if (!bs->drv) {
+ return -ENOMEDIUM;
+ }
+ if (bs->read_only) {
+ return -EACCES;
+ }
+ if (bdrv_check_byte_request(bs, offset, bytes)) {
+ return -EIO;
+ }
+
+ /* throttling disk I/O */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_intercept(bs, bytes, true);
+ }
+
+ /*
+ * Align write if necessary by performing a read-modify-write cycle.
+ * Pad qiov with the read parts and be sure to have a tracked request not
+ * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
+ */
+ tracked_request_begin(&req, bs, offset, bytes, true);
+
+ if (offset & (align - 1)) {
+ QEMUIOVector head_qiov;
+ struct iovec head_iov;
+
+ mark_request_serialising(&req, align);
+ wait_serialising_requests(&req);
+
+ head_buf = qemu_blockalign(bs, align);
+ head_iov = (struct iovec) {
+ .iov_base = head_buf,
+ .iov_len = align,
+ };
+ qemu_iovec_init_external(&head_qiov, &head_iov, 1);
+
+ BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+ ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
+ align, &head_qiov, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+ BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+
+ qemu_iovec_init(&local_qiov, qiov->niov + 2);
+ qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
+ qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+ use_local_qiov = true;
+
+ bytes += offset & (align - 1);
+ offset = offset & ~(align - 1);
+ }
+
+ if ((offset + bytes) & (align - 1)) {
+ QEMUIOVector tail_qiov;
+ struct iovec tail_iov;
+ size_t tail_bytes;
+ bool waited;
+
+ mark_request_serialising(&req, align);
+ waited = wait_serialising_requests(&req);
+ assert(!waited || !use_local_qiov);
+
+ tail_buf = qemu_blockalign(bs, align);
+ tail_iov = (struct iovec) {
+ .iov_base = tail_buf,
+ .iov_len = align,
+ };
+ qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
+
+ BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
+ align, &tail_qiov, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+ BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+
+ if (!use_local_qiov) {
+ qemu_iovec_init(&local_qiov, qiov->niov + 1);
+ qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+ use_local_qiov = true;
+ }
+
+ tail_bytes = (offset + bytes) & (align - 1);
+ qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
+
+ bytes = ROUND_UP(bytes, align);
+ }
+
+ ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
+ use_local_qiov ? &local_qiov : qiov,
+ flags);
+
+fail:
tracked_request_end(&req);
+ if (use_local_qiov) {
+ qemu_iovec_destroy(&local_qiov);
+ }
+ qemu_vfree(head_buf);
+ qemu_vfree(tail_buf);
+
return ret;
}
+static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
+ return -EINVAL;
+ }
+
+ return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+}
+
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
return rwco.ret;
}
+typedef struct DiscardCo {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ int nb_sectors;
+ int ret;
+} DiscardCo;
static void coroutine_fn bdrv_discard_co_entry(void *opaque)
{
- RwCo *rwco = opaque;
+ DiscardCo *rwco = opaque;
rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
}
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
{
Coroutine *co;
- RwCo rwco = {
+ DiscardCo rwco = {
.bs = bs,
.sector_num = sector_num,
.nb_sectors = nb_sectors,