#include "qemu/osdep.h"
#include <sys/ioctl.h>
-#include <sys/mman.h>
#include <sys/uio.h>
#include "hw/hw.h"
unsigned int persistent_gnt_count;
unsigned int max_grants;
+ /* Grant copy */
+ gboolean feature_grant_copy;
+
/* qemu block driver */
DriveInfo *dinfo;
BlockBackend *blk;
xengnttab_handle *gnt = grant->blkdev->xendev.gnttabdev;
if (xengnttab_unmap(gnt, grant->page, 1) != 0) {
- xen_be_printf(&grant->blkdev->xendev, 0,
+ xen_pv_printf(&grant->blkdev->xendev, 0,
"xengnttab_unmap failed: %s\n",
strerror(errno));
}
grant->blkdev->persistent_gnt_count--;
- xen_be_printf(&grant->blkdev->xendev, 3,
+ xen_pv_printf(&grant->blkdev->xendev, 3,
"unmapped grant %p\n", grant->page);
g_free(grant);
}
xengnttab_handle *gnt = blkdev->xendev.gnttabdev;
if (xengnttab_unmap(gnt, region->addr, region->num) != 0) {
- xen_be_printf(&blkdev->xendev, 0,
+ xen_pv_printf(&blkdev->xendev, 0,
"xengnttab_unmap region %p failed: %s\n",
region->addr, strerror(errno));
}
- xen_be_printf(&blkdev->xendev, 3,
+ xen_pv_printf(&blkdev->xendev, 3,
"unmapped grant region %p with %d pages\n",
region->addr, region->num);
g_free(region);
size_t len;
int i;
- xen_be_printf(&blkdev->xendev, 3,
+ xen_pv_printf(&blkdev->xendev, 3,
"op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
ioreq->req.operation, ioreq->req.nr_segments,
ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
case BLKIF_OP_DISCARD:
return 0;
default:
- xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
+ xen_pv_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
ioreq->req.operation);
goto err;
};
if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
- xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
+ xen_pv_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
goto err;
}
ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
for (i = 0; i < ioreq->req.nr_segments; i++) {
if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
- xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
+ xen_pv_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
goto err;
}
if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
- xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
+ xen_pv_printf(&blkdev->xendev, 0, "error: first > last sector\n");
goto err;
}
if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
- xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
+ xen_pv_printf(&blkdev->xendev, 0, "error: page crossing\n");
goto err;
}
qemu_iovec_add(&ioreq->v, (void*)mem, len);
}
if (ioreq->start + ioreq->v.size > blkdev->file_size) {
- xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
+ xen_pv_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
goto err;
}
return 0;
return;
}
if (xengnttab_unmap(gnt, ioreq->pages, ioreq->num_unmap) != 0) {
- xen_be_printf(&ioreq->blkdev->xendev, 0,
+ xen_pv_printf(&ioreq->blkdev->xendev, 0,
"xengnttab_unmap failed: %s\n",
strerror(errno));
}
continue;
}
if (xengnttab_unmap(gnt, ioreq->page[i], 1) != 0) {
- xen_be_printf(&ioreq->blkdev->xendev, 0,
+ xen_pv_printf(&ioreq->blkdev->xendev, 0,
"xengnttab_unmap failed: %s\n",
strerror(errno));
}
if (grant != NULL) {
page[i] = grant->page;
- xen_be_printf(&ioreq->blkdev->xendev, 3,
+ xen_pv_printf(&ioreq->blkdev->xendev, 3,
"using persistent-grant %" PRIu32 "\n",
ioreq->refs[i]);
} else {
ioreq->pages = xengnttab_map_grant_refs
(gnt, new_maps, domids, refs, ioreq->prot);
if (ioreq->pages == NULL) {
- xen_be_printf(&ioreq->blkdev->xendev, 0,
+ xen_pv_printf(&ioreq->blkdev->xendev, 0,
"can't map %d grant refs (%s, %d maps)\n",
new_maps, strerror(errno), ioreq->blkdev->cnt_map);
return -1;
ioreq->page[i] = xengnttab_map_grant_ref
(gnt, domids[i], refs[i], ioreq->prot);
if (ioreq->page[i] == NULL) {
- xen_be_printf(&ioreq->blkdev->xendev, 0,
+ xen_pv_printf(&ioreq->blkdev->xendev, 0,
"can't map grant ref %d (%s, %d maps)\n",
refs[i], strerror(errno), ioreq->blkdev->cnt_map);
ioreq->mapped = 1;
grant->page = ioreq->page[new_maps];
}
grant->blkdev = ioreq->blkdev;
- xen_be_printf(&ioreq->blkdev->xendev, 3,
+ xen_pv_printf(&ioreq->blkdev->xendev, 3,
"adding grant %" PRIu32 " page: %p\n",
refs[new_maps], grant->page);
g_tree_insert(ioreq->blkdev->persistent_gnts,
return 0;
}
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 480
+
+static void ioreq_free_copy_buffers(struct ioreq *ioreq)
+{
+ int i;
+
+ for (i = 0; i < ioreq->v.niov; i++) {
+ ioreq->page[i] = NULL;
+ }
+
+ qemu_vfree(ioreq->pages);
+}
+
+static int ioreq_init_copy_buffers(struct ioreq *ioreq)
+{
+ int i;
+
+ if (ioreq->v.niov == 0) {
+ return 0;
+ }
+
+ ioreq->pages = qemu_memalign(XC_PAGE_SIZE, ioreq->v.niov * XC_PAGE_SIZE);
+
+ for (i = 0; i < ioreq->v.niov; i++) {
+ ioreq->page[i] = ioreq->pages + i * XC_PAGE_SIZE;
+ ioreq->v.iov[i].iov_base = ioreq->page[i];
+ }
+
+ return 0;
+}
+
+static int ioreq_grant_copy(struct ioreq *ioreq)
+{
+ xengnttab_handle *gnt = ioreq->blkdev->xendev.gnttabdev;
+ xengnttab_grant_copy_segment_t segs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ int i, count, rc;
+ int64_t file_blk = ioreq->blkdev->file_blk;
+
+ if (ioreq->v.niov == 0) {
+ return 0;
+ }
+
+ count = ioreq->v.niov;
+
+ for (i = 0; i < count; i++) {
+ if (ioreq->req.operation == BLKIF_OP_READ) {
+ segs[i].flags = GNTCOPY_dest_gref;
+ segs[i].dest.foreign.ref = ioreq->refs[i];
+ segs[i].dest.foreign.domid = ioreq->domids[i];
+ segs[i].dest.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
+ segs[i].source.virt = ioreq->v.iov[i].iov_base;
+ } else {
+ segs[i].flags = GNTCOPY_source_gref;
+ segs[i].source.foreign.ref = ioreq->refs[i];
+ segs[i].source.foreign.domid = ioreq->domids[i];
+ segs[i].source.foreign.offset = ioreq->req.seg[i].first_sect * file_blk;
+ segs[i].dest.virt = ioreq->v.iov[i].iov_base;
+ }
+ segs[i].len = (ioreq->req.seg[i].last_sect
+ - ioreq->req.seg[i].first_sect + 1) * file_blk;
+ }
+
+ rc = xengnttab_grant_copy(gnt, count, segs);
+
+ if (rc) {
+ xen_pv_printf(&ioreq->blkdev->xendev, 0,
+ "failed to copy data %d\n", rc);
+ ioreq->aio_errors++;
+ return -1;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (segs[i].status != GNTST_okay) {
+ xen_pv_printf(&ioreq->blkdev->xendev, 3,
+ "failed to copy data %d for gref %d, domid %d\n",
+ segs[i].status, ioreq->refs[i], ioreq->domids[i]);
+ ioreq->aio_errors++;
+ rc = -1;
+ }
+ }
+
+ return rc;
+}
+#else
+static void ioreq_free_copy_buffers(struct ioreq *ioreq)
+{
+ abort();
+}
+
+static int ioreq_init_copy_buffers(struct ioreq *ioreq)
+{
+ abort();
+}
+
+static int ioreq_grant_copy(struct ioreq *ioreq)
+{
+ abort();
+}
+#endif
+
static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
static void qemu_aio_complete(void *opaque, int ret)
struct ioreq *ioreq = opaque;
if (ret != 0) {
- xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
+ xen_pv_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
ioreq->aio_errors++;
}
return;
}
+ if (ioreq->blkdev->feature_grant_copy) {
+ switch (ioreq->req.operation) {
+ case BLKIF_OP_READ:
+ /* in case of failure ioreq->aio_errors is increased */
+ if (ret == 0) {
+ ioreq_grant_copy(ioreq);
+ }
+ ioreq_free_copy_buffers(ioreq);
+ break;
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ if (!ioreq->req.nr_segments) {
+ break;
+ }
+ ioreq_free_copy_buffers(ioreq);
+ break;
+ default:
+ break;
+ }
+ }
+
ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
- ioreq_unmap(ioreq);
+ if (!ioreq->blkdev->feature_grant_copy) {
+ ioreq_unmap(ioreq);
+ }
ioreq_finish(ioreq);
switch (ioreq->req.operation) {
case BLKIF_OP_WRITE:
qemu_bh_schedule(ioreq->blkdev->bh);
}
+static bool blk_split_discard(struct ioreq *ioreq, blkif_sector_t sector_number,
+ uint64_t nr_sectors)
+{
+ struct XenBlkDev *blkdev = ioreq->blkdev;
+ int64_t byte_offset;
+ int byte_chunk;
+ uint64_t byte_remaining, limit;
+ uint64_t sec_start = sector_number;
+ uint64_t sec_count = nr_sectors;
+
+ /* Wrap around, or overflowing byte limit? */
+ if (sec_start + sec_count < sec_count ||
+ sec_start + sec_count > INT64_MAX >> BDRV_SECTOR_BITS) {
+ return false;
+ }
+
+ limit = BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS;
+ byte_offset = sec_start << BDRV_SECTOR_BITS;
+ byte_remaining = sec_count << BDRV_SECTOR_BITS;
+
+ do {
+ byte_chunk = byte_remaining > limit ? limit : byte_remaining;
+ ioreq->aio_inflight++;
+ blk_aio_pdiscard(blkdev->blk, byte_offset, byte_chunk,
+ qemu_aio_complete, ioreq);
+ byte_remaining -= byte_chunk;
+ byte_offset += byte_chunk;
+ } while (byte_remaining > 0);
+
+ return true;
+}
+
static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
{
struct XenBlkDev *blkdev = ioreq->blkdev;
- if (ioreq->req.nr_segments && ioreq_map(ioreq) == -1) {
- goto err_no_map;
+ if (ioreq->blkdev->feature_grant_copy) {
+ ioreq_init_copy_buffers(ioreq);
+ if (ioreq->req.nr_segments && (ioreq->req.operation == BLKIF_OP_WRITE ||
+ ioreq->req.operation == BLKIF_OP_FLUSH_DISKCACHE) &&
+ ioreq_grant_copy(ioreq)) {
+ ioreq_free_copy_buffers(ioreq);
+ goto err;
+ }
+ } else {
+ if (ioreq->req.nr_segments && ioreq_map(ioreq)) {
+ goto err;
+ }
}
ioreq->aio_inflight++;
break;
case BLKIF_OP_DISCARD:
{
- struct blkif_request_discard *discard_req = (void *)&ioreq->req;
- ioreq->aio_inflight++;
- blk_aio_discard(blkdev->blk,
- discard_req->sector_number, discard_req->nr_sectors,
- qemu_aio_complete, ioreq);
+ struct blkif_request_discard *req = (void *)&ioreq->req;
+ if (!blk_split_discard(ioreq, req->sector_number, req->nr_sectors)) {
+ goto err;
+ }
break;
}
default:
/* unknown operation (shouldn't happen -- parse catches this) */
+ if (!ioreq->blkdev->feature_grant_copy) {
+ ioreq_unmap(ioreq);
+ }
goto err;
}
return 0;
err:
- ioreq_unmap(ioreq);
-err_no_map:
ioreq_finish(ioreq);
ioreq->status = BLKIF_RSP_ERROR;
return -1;
ioreq_release(ioreq, true);
}
if (send_notify) {
- xen_be_send_notify(&blkdev->xendev);
+ xen_pv_send_notify(&blkdev->xendev);
}
}
};
if (blk_send_response_one(ioreq)) {
- xen_be_send_notify(&blkdev->xendev);
+ xen_pv_send_notify(&blkdev->xendev);
}
ioreq_release(ioreq, false);
continue;
}
if (xengnttab_set_max_grants(xendev->gnttabdev,
MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) {
- xen_be_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
+ xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
strerror(errno));
}
}
}
/* setup via xenbus -> create new block driver instance */
- xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
+ xen_pv_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
blkdev->blk = blk_new_open(blkdev->filename, NULL, options,
qflags, &local_err);
if (!blkdev->blk) {
- xen_be_printf(&blkdev->xendev, 0, "error: %s\n",
+ xen_pv_printf(&blkdev->xendev, 0, "error: %s\n",
error_get_pretty(local_err));
error_free(local_err);
return -1;
blk_set_enable_write_cache(blkdev->blk, !writethrough);
} else {
/* setup via qemu cmdline -> already setup for us */
- xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
+ xen_pv_printf(&blkdev->xendev, 2,
+ "get configured bdrv (cmdline setup)\n");
blkdev->blk = blk_by_legacy_dinfo(blkdev->dinfo);
if (blk_is_read_only(blkdev->blk) && !readonly) {
- xen_be_printf(&blkdev->xendev, 0, "Unexpected read-only drive");
+ xen_pv_printf(&blkdev->xendev, 0, "Unexpected read-only drive");
blkdev->blk = NULL;
return -1;
}
* so we can blk_unref() unconditionally */
blk_ref(blkdev->blk);
}
- blk_attach_dev_nofail(blkdev->blk, blkdev);
+ blk_attach_dev_legacy(blkdev->blk, blkdev);
blkdev->file_size = blk_getlength(blkdev->blk);
if (blkdev->file_size < 0) {
BlockDriverState *bs = blk_bs(blkdev->blk);
const char *drv_name = bs ? bdrv_get_format_name(bs) : NULL;
- xen_be_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n",
+ xen_pv_printf(&blkdev->xendev, 1, "blk_getlength: %d (%s) | drv %s\n",
(int)blkdev->file_size, strerror(-blkdev->file_size),
drv_name ?: "-");
blkdev->file_size = 0;
}
- xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
+ xen_pv_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
" size %" PRId64 " (%" PRId64 " MB)\n",
blkdev->type, blkdev->fileproto, blkdev->filename,
blkdev->file_size, blkdev->file_size >> 20);
blkdev->feature_persistent = !!pers;
}
- blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
- if (blkdev->xendev.protocol) {
- if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
- blkdev->protocol = BLKIF_PROTOCOL_X86_32;
- }
- if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
- blkdev->protocol = BLKIF_PROTOCOL_X86_64;
- }
+ if (!blkdev->xendev.protocol) {
+ blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
+ } else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_NATIVE) == 0) {
+ blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
+ } else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
+ blkdev->protocol = BLKIF_PROTOCOL_X86_32;
+ } else if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0) {
+ blkdev->protocol = BLKIF_PROTOCOL_X86_64;
+ } else {
+ blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
}
blkdev->sring = xengnttab_map_grant_ref(blkdev->xendev.gnttabdev,
xen_be_bind_evtchn(&blkdev->xendev);
- xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
+ blkdev->feature_grant_copy =
+ (xengnttab_grant_copy(blkdev->xendev.gnttabdev, 0, NULL) == 0);
+
+ xen_pv_printf(&blkdev->xendev, 3, "grant copy operation %s\n",
+ blkdev->feature_grant_copy ? "enabled" : "disabled");
+
+ xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
"remote port %d, local port %d\n",
blkdev->xendev.protocol, blkdev->ring_ref,
blkdev->xendev.remote_port, blkdev->xendev.local_port);
blk_unref(blkdev->blk);
blkdev->blk = NULL;
}
- xen_be_unbind_evtchn(&blkdev->xendev);
+ xen_pv_unbind_evtchn(&blkdev->xendev);
if (blkdev->sring) {
xengnttab_unmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);