VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
{
- VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
+ VirtIOBlockReq *req = g_new(VirtIOBlockReq, 1);
req->dev = s;
req->qiov.size = 0;
+ req->in_len = 0;
req->next = NULL;
req->mr_next = NULL;
return req;
void virtio_blk_free_request(VirtIOBlockReq *req)
{
if (req) {
- g_slice_free(VirtIOBlockReq, req);
+ g_free(req);
}
}
trace_virtio_blk_req_complete(req, status);
stb_p(&req->in->status, status);
- virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
+ virtqueue_push(s->vq, &req->elem, req->in_len);
virtio_notify(vdev, s->vq);
}
VirtIOBlock *s = req->dev;
if (action == BLOCK_ERROR_ACTION_STOP) {
+ /* Break the link as the next request is going to be parsed from the
+ * ring again. Otherwise we may end up doing a double completion! */
+ req->mr_next = NULL;
req->next = s->rq;
s->rq = req;
} else if (action == BLOCK_ERROR_ACTION_REPORT) {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
- block_acct_done(blk_get_stats(s->blk), &req->acct);
+ block_acct_failed(blk_get_stats(s->blk), &req->acct);
virtio_blk_free_request(req);
}
if (ret) {
int p = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
bool is_read = !(p & VIRTIO_BLK_T_OUT);
+ /* Note that memory may be dirtied on read failure. If the
+ * virtio request is not completed here, as is the case for
+ * BLOCK_ERROR_ACTION_STOP, the memory may not be copied
+ * correctly during live migration. While this is ugly,
+ * it is acceptable because the device is free to write to
+ * the memory until the request is completed (which will
+ * happen on the other side of the migration).
+ */
if (virtio_blk_handle_rw_error(req, -ret, is_read)) {
continue;
}
#ifdef __linux__
int i;
VirtIOBlockIoctlReq *ioctl_req;
+ BlockAIOCB *acb;
#endif
/*
ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base;
ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len;
- blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr,
- virtio_blk_ioctl_complete, ioctl_req);
+ acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr,
+ virtio_blk_ioctl_complete, ioctl_req);
+ if (!acb) {
+ g_free(ioctl_req);
+ status = VIRTIO_BLK_S_UNSUPP;
+ goto fail;
+ }
return -EINPROGRESS;
#else
abort();
for (i = 0; i < mrb->num_reqs; i++) {
VirtIOBlockReq *req = mrb->reqs[i];
if (num_reqs > 0) {
- bool merge = true;
-
- /* merge would exceed maximum number of IOVs */
- if (niov + req->qiov.niov > IOV_MAX) {
- merge = false;
- }
-
- /* merge would exceed maximum transfer length of backend device */
- if (req->qiov.size / BDRV_SECTOR_SIZE + nb_sectors > max_xfer_len) {
- merge = false;
- }
-
- /* requests are not sequential */
- if (sector_num + nb_sectors != req->sector_num) {
- merge = false;
- }
-
- if (!merge) {
+ /*
+ * NOTE: We cannot merge the requests in below situations:
+ * 1. requests are not sequential
+ * 2. merge would exceed maximum number of IOVs
+ * 3. merge would exceed maximum transfer length of backend device
+ */
+ if (sector_num + nb_sectors != req->sector_num ||
+ niov > IOV_MAX - req->qiov.niov ||
+ req->qiov.size / BDRV_SECTOR_SIZE > max_xfer_len ||
+ nb_sectors > max_xfer_len - req->qiov.size / BDRV_SECTOR_SIZE) {
submit_requests(blk, mrb, start, num_reqs, niov);
num_reqs = 0;
}
iov_discard_front(&iov, &out_num, sizeof(req->out));
- if (in_num < 1 ||
- in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
+ if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
error_report("virtio-blk request inhdr too short");
exit(1);
}
+ /* We always touch the last byte, so just see how big in_iov is. */
+ req->in_len = iov_size(in_iov, in_num);
req->in = (void *)in_iov[in_num - 1].iov_base
+ in_iov[in_num - 1].iov_len
- sizeof(struct virtio_blk_inhdr);
type = virtio_ldl_p(VIRTIO_DEVICE(req->dev), &req->out.type);
/* VIRTIO_BLK_T_OUT defines the command direction. VIRTIO_BLK_T_BARRIER
- * is an optional flag. Altough a guest should not send this flag if
+ * is an optional flag. Although a guest should not send this flag if
* not negotiated we ignored it in the past. So keep ignoring it. */
switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
case VIRTIO_BLK_T_IN:
if (!virtio_blk_sect_range_ok(req->dev, req->sector_num,
req->qiov.size)) {
virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+ block_acct_invalid(blk_get_stats(req->dev->blk),
+ is_write ? BLOCK_ACCT_WRITE : BLOCK_ACCT_READ);
virtio_blk_free_request(req);
return;
}
return;
}
+ blk_io_plug(s->blk);
+
while ((req = virtio_blk_get_request(s))) {
virtio_blk_handle_request(req, &mrb);
}
virtio_blk_submit_multireq(s->blk, &mrb);
}
- /*
- * FIXME: Want to check for completions before returning to guest mode,
- * so cached reads and writes are reported as quickly as possible. But
- * that should be done in the generic block layer.
- */
+ blk_io_unplug(s->blk);
}
static void virtio_blk_dma_restart_bh(void *opaque)
static void virtio_blk_reset(VirtIODevice *vdev)
{
VirtIOBlock *s = VIRTIO_BLK(vdev);
-
- if (s->dataplane) {
- virtio_blk_data_plane_stop(s->dataplane);
- }
+ AioContext *ctx;
/*
* This should cancel pending requests, but can't do nicely until there
* are per-device request lists.
*/
- blk_drain_all();
+ ctx = blk_get_aio_context(s->blk);
+ aio_context_acquire(ctx);
+ blk_drain(s->blk);
+
+ if (s->dataplane) {
+ virtio_blk_data_plane_stop(s->dataplane);
+ }
+ aio_context_release(ctx);
+
blk_set_enable_write_cache(s->blk, s->original_wce);
}
memset(&blkcfg, 0, sizeof(blkcfg));
virtio_stq_p(vdev, &blkcfg.capacity, capacity);
virtio_stl_p(vdev, &blkcfg.seg_max, 128 - 2);
- virtio_stw_p(vdev, &blkcfg.cylinders, conf->cyls);
+ virtio_stw_p(vdev, &blkcfg.geometry.cylinders, conf->cyls);
virtio_stl_p(vdev, &blkcfg.blk_size, blk_size);
virtio_stw_p(vdev, &blkcfg.min_io_size, conf->min_io_size / blk_size);
virtio_stw_p(vdev, &blkcfg.opt_io_size, conf->opt_io_size / blk_size);
- blkcfg.heads = conf->heads;
+ blkcfg.geometry.heads = conf->heads;
/*
* We must ensure that the block device capacity is a multiple of
* the logical block size. If that is not the case, let's use
* per track (cylinder).
*/
if (blk_getlength(s->blk) / conf->heads / conf->secs % blk_size) {
- blkcfg.sectors = conf->secs & ~s->sector_mask;
+ blkcfg.geometry.sectors = conf->secs & ~s->sector_mask;
} else {
- blkcfg.sectors = conf->secs;
+ blkcfg.geometry.sectors = conf->secs;
}
blkcfg.size_max = 0;
blkcfg.physical_block_exp = get_physical_block_exp(conf);
aio_context_release(blk_get_aio_context(s->blk));
}
-static uint32_t virtio_blk_get_features(VirtIODevice *vdev, uint32_t features)
+static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features,
+ Error **errp)
{
VirtIOBlock *s = VIRTIO_BLK(vdev);
- features |= (1 << VIRTIO_BLK_F_SEG_MAX);
- features |= (1 << VIRTIO_BLK_F_GEOMETRY);
- features |= (1 << VIRTIO_BLK_F_TOPOLOGY);
- features |= (1 << VIRTIO_BLK_F_BLK_SIZE);
- features |= (1 << VIRTIO_BLK_F_SCSI);
+ virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX);
+ virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY);
+ virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY);
+ virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE);
+ if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) {
+ if (s->conf.scsi) {
+ error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0");
+ return 0;
+ }
+ } else {
+ virtio_clear_feature(&features, VIRTIO_F_ANY_LAYOUT);
+ virtio_add_feature(&features, VIRTIO_BLK_F_SCSI);
+ }
if (s->conf.config_wce) {
- features |= (1 << VIRTIO_BLK_F_CONFIG_WCE);
+ virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE);
}
if (blk_enable_write_cache(s->blk)) {
- features |= (1 << VIRTIO_BLK_F_WCE);
+ virtio_add_feature(&features, VIRTIO_BLK_F_WCE);
}
if (blk_is_read_only(s->blk)) {
- features |= 1 << VIRTIO_BLK_F_RO;
+ virtio_add_feature(&features, VIRTIO_BLK_F_RO);
}
return features;
static void virtio_blk_set_status(VirtIODevice *vdev, uint8_t status)
{
VirtIOBlock *s = VIRTIO_BLK(vdev);
- uint32_t features;
if (s->dataplane && !(status & (VIRTIO_CONFIG_S_DRIVER |
VIRTIO_CONFIG_S_DRIVER_OK))) {
return;
}
- features = vdev->guest_features;
-
/* A guest that supports VIRTIO_BLK_F_CONFIG_WCE must be able to send
* cache flushes. Thus, the "auto writethrough" behavior is never
* necessary for guests that support the VIRTIO_BLK_F_CONFIG_WCE feature.
*
* s->blk would erroneously be placed in writethrough mode.
*/
- if (!(features & (1 << VIRTIO_BLK_F_CONFIG_WCE))) {
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) {
aio_context_acquire(blk_get_aio_context(s->blk));
blk_set_enable_write_cache(s->blk,
- !!(features & (1 << VIRTIO_BLK_F_WCE)));
+ virtio_vdev_has_feature(vdev,
+ VIRTIO_BLK_F_WCE));
aio_context_release(blk_get_aio_context(s->blk));
}
}
static void virtio_blk_save(QEMUFile *f, void *opaque)
{
VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
+ VirtIOBlock *s = VIRTIO_BLK(vdev);
+
+ if (s->dataplane) {
+ virtio_blk_data_plane_stop(s->dataplane);
+ }
virtio_save(vdev, f);
}
req->next = s->rq;
s->rq = req;
- virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
- req->elem.in_num, 1);
- virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
- req->elem.out_num, 0);
+ virtqueue_map(&req->elem);
}
return 0;
virtio_blk_data_plane_create(VIRTIO_DEVICE(s), &s->conf,
&s->dataplane, &err);
if (err != NULL) {
- error_report("%s", error_get_pretty(err));
- error_free(err);
+ error_report_err(err);
}
}
}
error_propagate(errp, err);
return;
}
+ blkconf_blocksizes(&conf->conf);
virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
sizeof(struct virtio_blk_config));
DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial),
DEFINE_PROP_BIT("config-wce", VirtIOBlock, conf.config_wce, 0, true),
#ifdef __linux__
- DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, true),
+ DEFINE_PROP_BIT("scsi", VirtIOBlock, conf.scsi, 0, false),
#endif
DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
true),
- DEFINE_PROP_BIT("x-data-plane", VirtIOBlock, conf.data_plane, 0, false),
DEFINE_PROP_END_OF_LIST(),
};