#include <inttypes.h>
+#include "trace.h"
+#include "qemu-error.h"
#include "virtio.h"
-#include "sysemu.h"
+#include "qemu-barrier.h"
/* The alignment to use between consumer and producer parts of vring.
* x86 pagesize again. */
#define VIRTIO_PCI_VRING_ALIGN 4096
-/* QEMU doesn't strictly need write barriers since everything runs in
- * lock-step. We'll leave the calls to wmb() in though to make it obvious for
- * KVM or if kqemu gets SMP support.
- */
-#define wmb() do { } while (0)
-
typedef struct VRingDesc
{
uint64_t addr;
VRing vring;
target_phys_addr_t pa;
uint16_t last_avail_idx;
+ /* Last used index value we have signalled on */
+ uint16_t signalled_used;
+
+ /* Last used index value we have signalled on */
+ bool signalled_used_valid;
+
+ /* Notification enabled? */
+ bool notification;
+
int inuse;
+
uint16_t vector;
void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
+ VirtIODevice *vdev;
+ EventNotifier guest_notifier;
+ EventNotifier host_notifier;
};
-#define VIRTIO_PCI_QUEUE_MAX 16
-
/* virt queue functions */
static void virtqueue_init(VirtQueue *vq)
{
return lduw_phys(pa);
}
+static inline uint16_t vring_used_event(VirtQueue *vq)
+{
+ return vring_avail_ring(vq, vq->vring.num);
+}
+
static inline void vring_used_ring_id(VirtQueue *vq, int i, uint32_t val)
{
target_phys_addr_t pa;
return lduw_phys(pa);
}
-static inline void vring_used_idx_increment(VirtQueue *vq, uint16_t val)
+static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
{
target_phys_addr_t pa;
pa = vq->vring.used + offsetof(VRingUsed, idx);
- stw_phys(pa, vring_used_idx(vq) + val);
+ stw_phys(pa, val);
}
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
stw_phys(pa, lduw_phys(pa) & ~mask);
}
+static inline void vring_avail_event(VirtQueue *vq, uint16_t val)
+{
+ target_phys_addr_t pa;
+ if (!vq->notification) {
+ return;
+ }
+ pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
+ stw_phys(pa, val);
+}
+
void virtio_queue_set_notification(VirtQueue *vq, int enable)
{
- if (enable)
+ vq->notification = enable;
+ if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
+ vring_avail_event(vq, vring_avail_idx(vq));
+ } else if (enable) {
vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
- else
+ } else {
vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
+ }
}
int virtio_queue_ready(VirtQueue *vq)
unsigned int offset;
int i;
+ trace_virtqueue_fill(vq, elem, len, idx);
+
offset = 0;
for (i = 0; i < elem->in_num; i++) {
size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
+ uint16_t old, new;
/* Make sure buffer is written before we update index. */
- wmb();
- vring_used_idx_increment(vq, count);
+ smp_wmb();
+ trace_virtqueue_flush(vq, count);
+ old = vring_used_idx(vq);
+ new = old + count;
+ vring_used_idx_set(vq, new);
vq->inuse -= count;
+ if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
+ vq->signalled_used_valid = false;
}
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
/* Check it isn't doing very strange things with descriptor numbers. */
if (num_heads > vq->vring.num) {
- fprintf(stderr, "Guest moved used index from %u to %u",
- idx, vring_avail_idx(vq));
+ error_report("Guest moved used index from %u to %u",
+ idx, vring_avail_idx(vq));
exit(1);
}
/* If their number is silly, that's a fatal mistake. */
if (head >= vq->vring.num) {
- fprintf(stderr, "Guest says index %u is available", head);
+ error_report("Guest says index %u is available", head);
exit(1);
}
/* Check they're not leading us off end of descriptors. */
next = vring_desc_next(desc_pa, i);
/* Make sure compiler knows to grab that: we don't want it changing! */
- wmb();
+ smp_wmb();
if (next >= max) {
- fprintf(stderr, "Desc next is %u", next);
+ error_report("Desc next is %u", next);
exit(1);
}
if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
- fprintf(stderr, "Invalid size for indirect buffer table\n");
+ error_report("Invalid size for indirect buffer table");
exit(1);
}
/* If we've got too many, that implies a descriptor loop. */
if (num_bufs >= max) {
- fprintf(stderr, "Looped descriptor");
+ error_report("Looped descriptor");
exit(1);
}
do {
/* If we've got too many, that implies a descriptor loop. */
if (++num_bufs > max) {
- fprintf(stderr, "Looped descriptor");
+ error_report("Looped descriptor");
exit(1);
}
return 0;
}
+void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr,
+ size_t num_sg, int is_write)
+{
+ unsigned int i;
+ target_phys_addr_t len;
+
+ for (i = 0; i < num_sg; i++) {
+ len = sg[i].iov_len;
+ sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
+ if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
+ error_report("virtio: trying to map MMIO memory");
+ exit(1);
+ }
+ }
+}
+
int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
{
unsigned int i, head, max;
target_phys_addr_t desc_pa = vq->vring.desc;
- target_phys_addr_t len;
if (!virtqueue_num_heads(vq, vq->last_avail_idx))
return 0;
max = vq->vring.num;
i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
+ if (vq->vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX)) {
+ vring_avail_event(vq, vring_avail_idx(vq));
+ }
if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
- fprintf(stderr, "Invalid size for indirect buffer table\n");
+ error_report("Invalid size for indirect buffer table");
exit(1);
}
i = 0;
}
+ /* Collect all the descriptors */
do {
struct iovec *sg;
- int is_write = 0;
if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
+ if (elem->in_num >= ARRAY_SIZE(elem->in_sg)) {
+ error_report("Too many write descriptors in indirect table");
+ exit(1);
+ }
elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
sg = &elem->in_sg[elem->in_num++];
- is_write = 1;
- } else
+ } else {
+ if (elem->out_num >= ARRAY_SIZE(elem->out_sg)) {
+ error_report("Too many read descriptors in indirect table");
+ exit(1);
+ }
+ elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
sg = &elem->out_sg[elem->out_num++];
+ }
- /* Grab the first descriptor, and check it's OK. */
sg->iov_len = vring_desc_len(desc_pa, i);
- len = sg->iov_len;
-
- sg->iov_base = cpu_physical_memory_map(vring_desc_addr(desc_pa, i),
- &len, is_write);
-
- if (sg->iov_base == NULL || len != sg->iov_len) {
- fprintf(stderr, "virtio: trying to map MMIO memory\n");
- exit(1);
- }
/* If we've got too many, that implies a descriptor loop. */
if ((elem->in_num + elem->out_num) > max) {
- fprintf(stderr, "Looped descriptor");
+ error_report("Looped descriptor");
exit(1);
}
} while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
+ /* Now map what we have collected */
+ virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
+ virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
+
elem->index = head;
vq->inuse++;
+ trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
return elem->in_num + elem->out_num;
}
virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
}
+void virtio_set_status(VirtIODevice *vdev, uint8_t val)
+{
+ trace_virtio_set_status(vdev, val);
+
+ if (vdev->set_status) {
+ vdev->set_status(vdev, val);
+ }
+ vdev->status = val;
+}
+
void virtio_reset(void *opaque)
{
VirtIODevice *vdev = opaque;
int i;
+ virtio_set_status(vdev, 0);
+
if (vdev->reset)
vdev->reset(vdev);
- vdev->features = 0;
+ vdev->guest_features = 0;
vdev->queue_sel = 0;
vdev->status = 0;
vdev->isr = 0;
vdev->vq[i].last_avail_idx = 0;
vdev->vq[i].pa = 0;
vdev->vq[i].vector = VIRTIO_NO_VECTOR;
+ vdev->vq[i].signalled_used = 0;
+ vdev->vq[i].signalled_used_valid = false;
+ vdev->vq[i].notification = true;
}
}
return vdev->vq[n].vring.num;
}
-void virtio_queue_notify(VirtIODevice *vdev, int n)
+void virtio_queue_notify_vq(VirtQueue *vq)
{
- if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
- vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
+ if (vq->vring.desc) {
+ VirtIODevice *vdev = vq->vdev;
+ trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+ vq->handle_output(vdev, vq);
}
}
+void virtio_queue_notify(VirtIODevice *vdev, int n)
+{
+ virtio_queue_notify_vq(&vdev->vq[n]);
+}
+
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
{
return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
return &vdev->vq[i];
}
-void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
+void virtio_irq(VirtQueue *vq)
+{
+ trace_virtio_irq(vq);
+ vq->vdev->isr |= 0x01;
+ virtio_notify_vector(vq->vdev, vq->vector);
+}
+
+/* Assuming a given event_idx value from the other size, if
+ * we have just incremented index from old to new_idx,
+ * should we trigger an event? */
+static inline int vring_need_event(uint16_t event, uint16_t new, uint16_t old)
{
+ /* Note: Xen has similar logic for notification hold-off
+ * in include/xen/interface/io/ring.h with req_event and req_prod
+ * corresponding to event_idx + 1 and new respectively.
+ * Note also that req_event and req_prod in Xen start at 1,
+ * event indexes in virtio start at 0. */
+ return (uint16_t)(new - event - 1) < (uint16_t)(new - old);
+}
+
+static bool vring_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+ uint16_t old, new;
+ bool v;
/* Always notify when queue is empty (when feature acknowledge) */
- if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
- (!(vdev->features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
- (vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
+ if (((vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) &&
+ !vq->inuse && vring_avail_idx(vq) == vq->last_avail_idx)) {
+ return true;
+ }
+
+ if (!(vdev->guest_features & (1 << VIRTIO_RING_F_EVENT_IDX))) {
+ return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
+ }
+
+ v = vq->signalled_used_valid;
+ vq->signalled_used_valid = true;
+ old = vq->signalled_used;
+ new = vq->signalled_used = vring_used_idx(vq);
+ return !v || vring_need_event(vring_used_event(vq), new, old);
+}
+
+void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
+{
+ if (!vring_notify(vdev, vq)) {
return;
+ }
+ trace_virtio_notify(vdev, vq);
vdev->isr |= 0x01;
virtio_notify_vector(vdev, vq->vector);
}
qemu_put_8s(f, &vdev->status);
qemu_put_8s(f, &vdev->isr);
qemu_put_be16s(f, &vdev->queue_sel);
- qemu_put_be32s(f, &vdev->features);
+ qemu_put_be32s(f, &vdev->guest_features);
qemu_put_be32(f, vdev->config_len);
qemu_put_buffer(f, vdev->config, vdev->config_len);
int virtio_load(VirtIODevice *vdev, QEMUFile *f)
{
int num, i, ret;
+ uint32_t features;
+ uint32_t supported_features =
+ vdev->binding->get_features(vdev->binding_opaque);
if (vdev->binding->load_config) {
ret = vdev->binding->load_config(vdev->binding_opaque, f);
qemu_get_8s(f, &vdev->status);
qemu_get_8s(f, &vdev->isr);
qemu_get_be16s(f, &vdev->queue_sel);
- qemu_get_be32s(f, &vdev->features);
+ qemu_get_be32s(f, &features);
+ if (features & ~supported_features) {
+ error_report("Features 0x%x unsupported. Allowed features: 0x%x",
+ features, supported_features);
+ return -1;
+ }
+ if (vdev->set_features)
+ vdev->set_features(vdev, features);
+ vdev->guest_features = features;
vdev->config_len = qemu_get_be32(f);
qemu_get_buffer(f, vdev->config, vdev->config_len);
vdev->vq[i].vring.num = qemu_get_be32(f);
vdev->vq[i].pa = qemu_get_be64(f);
qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
+ vdev->vq[i].signalled_used_valid = false;
+ vdev->vq[i].notification = true;
if (vdev->vq[i].pa) {
+ uint16_t nheads;
virtqueue_init(&vdev->vq[i]);
- }
+ nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ if (nheads > vdev->vq[i].vring.num) {
+ error_report("VQ %d size 0x%x Guest index 0x%x "
+ "inconsistent with Host index 0x%x: delta 0x%x",
+ i, vdev->vq[i].vring.num,
+ vring_avail_idx(&vdev->vq[i]),
+ vdev->vq[i].last_avail_idx, nheads);
+ return -1;
+ }
+ } else if (vdev->vq[i].last_avail_idx) {
+ error_report("VQ %d address 0x0 "
+ "inconsistent with Host index 0x%x",
+ i, vdev->vq[i].last_avail_idx);
+ return -1;
+ }
if (vdev->binding->load_queue) {
ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
if (ret)
void virtio_cleanup(VirtIODevice *vdev)
{
+ qemu_del_vm_change_state_handler(vdev->vmstate);
if (vdev->config)
- qemu_free(vdev->config);
- qemu_free(vdev->vq);
+ g_free(vdev->config);
+ g_free(vdev->vq);
+ g_free(vdev);
+}
+
+static void virtio_vmstate_change(void *opaque, int running, RunState state)
+{
+ VirtIODevice *vdev = opaque;
+ bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
+ vdev->vm_running = running;
+
+ if (backend_run) {
+ virtio_set_status(vdev, vdev->status);
+ }
+
+ if (vdev->binding->vmstate_change) {
+ vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
+ }
+
+ if (!backend_run) {
+ virtio_set_status(vdev, vdev->status);
+ }
}
VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
size_t config_size, size_t struct_size)
{
VirtIODevice *vdev;
+ int i;
- vdev = qemu_mallocz(struct_size);
+ vdev = g_malloc0(struct_size);
vdev->device_id = device_id;
vdev->status = 0;
vdev->isr = 0;
vdev->queue_sel = 0;
vdev->config_vector = VIRTIO_NO_VECTOR;
- vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
+ vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
+ vdev->vm_running = runstate_is_running();
+ for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
+ vdev->vq[i].vector = VIRTIO_NO_VECTOR;
+ vdev->vq[i].vdev = vdev;
+ }
vdev->name = name;
vdev->config_len = config_size;
if (vdev->config_len)
- vdev->config = qemu_mallocz(config_size);
+ vdev->config = g_malloc0(config_size);
else
vdev->config = NULL;
+ vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev);
+
return vdev;
}
vdev->binding = binding;
vdev->binding_opaque = opaque;
}
+
+target_phys_addr_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.desc;
+}
+
+target_phys_addr_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.avail;
+}
+
+target_phys_addr_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.used;
+}
+
+target_phys_addr_t virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.desc;
+}
+
+target_phys_addr_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
+{
+ return sizeof(VRingDesc) * vdev->vq[n].vring.num;
+}
+
+target_phys_addr_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
+{
+ return offsetof(VRingAvail, ring) +
+ sizeof(uint64_t) * vdev->vq[n].vring.num;
+}
+
+target_phys_addr_t virtio_queue_get_used_size(VirtIODevice *vdev, int n)
+{
+ return offsetof(VRingUsed, ring) +
+ sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
+}
+
+target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
+ virtio_queue_get_used_size(vdev, n);
+}
+
+uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].last_avail_idx;
+}
+
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
+{
+ vdev->vq[n].last_avail_idx = idx;
+}
+
+VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
+{
+ return vdev->vq + n;
+}
+
+EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
+{
+ return &vq->guest_notifier;
+}
+EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
+{
+ return &vq->host_notifier;
+}