#include <inttypes.h>
+#include "trace.h"
+#include "qemu-error.h"
#include "virtio.h"
-#include "sysemu.h"
/* The alignment to use between consumer and producer parts of vring.
* x86 pagesize again. */
/* QEMU doesn't strictly need write barriers since everything runs in
* lock-step. We'll leave the calls to wmb() in though to make it obvious for
* KVM or if kqemu gets SMP support.
+ * In any case, we must prevent the compiler from reordering the code.
+ * TODO: we likely need some rmb()/mb() as well.
*/
-#define wmb() do { } while (0)
+
+#define wmb() __asm__ __volatile__("": : :"memory")
typedef struct VRingDesc
{
target_phys_addr_t pa;
uint16_t last_avail_idx;
int inuse;
+ uint16_t vector;
void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
+ VirtIODevice *vdev;
+ EventNotifier guest_notifier;
+ EventNotifier host_notifier;
};
-#define VIRTIO_PCI_QUEUE_MAX 16
-
/* virt queue functions */
static void virtqueue_init(VirtQueue *vq)
{
VIRTIO_PCI_VRING_ALIGN);
}
-static inline uint64_t vring_desc_addr(VirtQueue *vq, int i)
+static inline uint64_t vring_desc_addr(target_phys_addr_t desc_pa, int i)
{
target_phys_addr_t pa;
- pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
+ pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, addr);
return ldq_phys(pa);
}
-static inline uint32_t vring_desc_len(VirtQueue *vq, int i)
+static inline uint32_t vring_desc_len(target_phys_addr_t desc_pa, int i)
{
target_phys_addr_t pa;
- pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
+ pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, len);
return ldl_phys(pa);
}
-static inline uint16_t vring_desc_flags(VirtQueue *vq, int i)
+static inline uint16_t vring_desc_flags(target_phys_addr_t desc_pa, int i)
{
target_phys_addr_t pa;
- pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
+ pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, flags);
return lduw_phys(pa);
}
-static inline uint16_t vring_desc_next(VirtQueue *vq, int i)
+static inline uint16_t vring_desc_next(target_phys_addr_t desc_pa, int i)
{
target_phys_addr_t pa;
- pa = vq->vring.desc + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
+ pa = desc_pa + sizeof(VRingDesc) * i + offsetof(VRingDesc, next);
return lduw_phys(pa);
}
unsigned int offset;
int i;
+ trace_virtqueue_fill(vq, elem, len, idx);
+
offset = 0;
for (i = 0; i < elem->in_num; i++) {
size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
{
/* Make sure buffer is written before we update index. */
wmb();
+ trace_virtqueue_flush(vq, count);
vring_used_idx_increment(vq, count);
vq->inuse -= count;
}
/* Check it isn't doing very strange things with descriptor numbers. */
if (num_heads > vq->vring.num) {
- fprintf(stderr, "Guest moved used index from %u to %u",
- idx, vring_avail_idx(vq));
+ error_report("Guest moved used index from %u to %u",
+ idx, vring_avail_idx(vq));
exit(1);
}
/* If their number is silly, that's a fatal mistake. */
if (head >= vq->vring.num) {
- fprintf(stderr, "Guest says index %u is available", head);
+ error_report("Guest says index %u is available", head);
exit(1);
}
return head;
}
-static unsigned virtqueue_next_desc(VirtQueue *vq, unsigned int i)
+static unsigned virtqueue_next_desc(target_phys_addr_t desc_pa,
+ unsigned int i, unsigned int max)
{
unsigned int next;
/* If this descriptor says it doesn't chain, we're done. */
- if (!(vring_desc_flags(vq, i) & VRING_DESC_F_NEXT))
- return vq->vring.num;
+ if (!(vring_desc_flags(desc_pa, i) & VRING_DESC_F_NEXT))
+ return max;
/* Check they're not leading us off end of descriptors. */
- next = vring_desc_next(vq, i);
+ next = vring_desc_next(desc_pa, i);
/* Make sure compiler knows to grab that: we don't want it changing! */
wmb();
- if (next >= vq->vring.num) {
- fprintf(stderr, "Desc next is %u", next);
+ if (next >= max) {
+ error_report("Desc next is %u", next);
exit(1);
}
int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
{
unsigned int idx;
- int num_bufs, in_total, out_total;
+ int total_bufs, in_total, out_total;
idx = vq->last_avail_idx;
- num_bufs = in_total = out_total = 0;
+ total_bufs = in_total = out_total = 0;
while (virtqueue_num_heads(vq, idx)) {
+ unsigned int max, num_bufs, indirect = 0;
+ target_phys_addr_t desc_pa;
int i;
+ max = vq->vring.num;
+ num_bufs = total_bufs;
i = virtqueue_get_head(vq, idx++);
+ desc_pa = vq->vring.desc;
+
+ if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
+ if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
+ error_report("Invalid size for indirect buffer table");
+ exit(1);
+ }
+
+ /* If we've got too many, that implies a descriptor loop. */
+ if (num_bufs >= max) {
+ error_report("Looped descriptor");
+ exit(1);
+ }
+
+ /* loop over the indirect descriptor table */
+ indirect = 1;
+ max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
+ num_bufs = i = 0;
+ desc_pa = vring_desc_addr(desc_pa, i);
+ }
+
do {
/* If we've got too many, that implies a descriptor loop. */
- if (++num_bufs > vq->vring.num) {
- fprintf(stderr, "Looped descriptor");
+ if (++num_bufs > max) {
+ error_report("Looped descriptor");
exit(1);
}
- if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
+ if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
if (in_bytes > 0 &&
- (in_total += vring_desc_len(vq, i)) >= in_bytes)
+ (in_total += vring_desc_len(desc_pa, i)) >= in_bytes)
return 1;
} else {
if (out_bytes > 0 &&
- (out_total += vring_desc_len(vq, i)) >= out_bytes)
+ (out_total += vring_desc_len(desc_pa, i)) >= out_bytes)
return 1;
}
- } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
+ } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
+
+ if (!indirect)
+ total_bufs = num_bufs;
+ else
+ total_bufs++;
}
return 0;
}
-int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
+void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr,
+ size_t num_sg, int is_write)
{
- unsigned int i, head;
+ unsigned int i;
target_phys_addr_t len;
+ for (i = 0; i < num_sg; i++) {
+ len = sg[i].iov_len;
+ sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
+ if (sg[i].iov_base == NULL || len != sg[i].iov_len) {
+ error_report("virtio: trying to map MMIO memory");
+ exit(1);
+ }
+ }
+}
+
+int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem)
+{
+ unsigned int i, head, max;
+ target_phys_addr_t desc_pa = vq->vring.desc;
+
if (!virtqueue_num_heads(vq, vq->last_avail_idx))
return 0;
/* When we start there are none of either input nor output. */
elem->out_num = elem->in_num = 0;
+ max = vq->vring.num;
+
i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
+
+ if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_INDIRECT) {
+ if (vring_desc_len(desc_pa, i) % sizeof(VRingDesc)) {
+ error_report("Invalid size for indirect buffer table");
+ exit(1);
+ }
+
+ /* loop over the indirect descriptor table */
+ max = vring_desc_len(desc_pa, i) / sizeof(VRingDesc);
+ desc_pa = vring_desc_addr(desc_pa, i);
+ i = 0;
+ }
+
+ /* Collect all the descriptors */
do {
struct iovec *sg;
- int is_write = 0;
- if (vring_desc_flags(vq, i) & VRING_DESC_F_WRITE) {
- elem->in_addr[elem->in_num] = vring_desc_addr(vq, i);
+ if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
+ elem->in_addr[elem->in_num] = vring_desc_addr(desc_pa, i);
sg = &elem->in_sg[elem->in_num++];
- is_write = 1;
- } else
+ } else {
+ elem->out_addr[elem->out_num] = vring_desc_addr(desc_pa, i);
sg = &elem->out_sg[elem->out_num++];
-
- /* Grab the first descriptor, and check it's OK. */
- sg->iov_len = vring_desc_len(vq, i);
- len = sg->iov_len;
-
- sg->iov_base = cpu_physical_memory_map(vring_desc_addr(vq, i), &len, is_write);
-
- if (sg->iov_base == NULL || len != sg->iov_len) {
- fprintf(stderr, "virtio: trying to map MMIO memory\n");
- exit(1);
}
+ sg->iov_len = vring_desc_len(desc_pa, i);
+
/* If we've got too many, that implies a descriptor loop. */
- if ((elem->in_num + elem->out_num) > vq->vring.num) {
- fprintf(stderr, "Looped descriptor");
+ if ((elem->in_num + elem->out_num) > max) {
+ error_report("Looped descriptor");
exit(1);
}
- } while ((i = virtqueue_next_desc(vq, i)) != vq->vring.num);
+ } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
+
+ /* Now map what we have collected */
+ virtqueue_map_sg(elem->in_sg, elem->in_addr, elem->in_num, 1);
+ virtqueue_map_sg(elem->out_sg, elem->out_addr, elem->out_num, 0);
elem->index = head;
vq->inuse++;
+ trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
return elem->in_num + elem->out_num;
}
/* virtio device */
+static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
+{
+ if (vdev->binding->notify) {
+ vdev->binding->notify(vdev->binding_opaque, vector);
+ }
+}
void virtio_update_irq(VirtIODevice *vdev)
{
- if (vdev->binding->update_irq) {
- vdev->binding->update_irq(vdev->binding_opaque);
- }
+ virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
}
void virtio_reset(void *opaque)
VirtIODevice *vdev = opaque;
int i;
+ virtio_set_status(vdev, 0);
+
if (vdev->reset)
vdev->reset(vdev);
- vdev->features = 0;
+ vdev->guest_features = 0;
vdev->queue_sel = 0;
vdev->status = 0;
vdev->isr = 0;
- virtio_update_irq(vdev);
+ vdev->config_vector = VIRTIO_NO_VECTOR;
+ virtio_notify_vector(vdev, vdev->config_vector);
for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
vdev->vq[i].vring.desc = 0;
vdev->vq[i].vring.used = 0;
vdev->vq[i].last_avail_idx = 0;
vdev->vq[i].pa = 0;
+ vdev->vq[i].vector = VIRTIO_NO_VECTOR;
}
}
void virtio_queue_set_addr(VirtIODevice *vdev, int n, target_phys_addr_t addr)
{
- if (addr == 0) {
- virtio_reset(vdev);
- } else {
- vdev->vq[n].pa = addr;
- virtqueue_init(&vdev->vq[n]);
- }
+ vdev->vq[n].pa = addr;
+ virtqueue_init(&vdev->vq[n]);
}
target_phys_addr_t virtio_queue_get_addr(VirtIODevice *vdev, int n)
return vdev->vq[n].vring.num;
}
+void virtio_queue_notify_vq(VirtQueue *vq)
+{
+ if (vq->vring.desc) {
+ VirtIODevice *vdev = vq->vdev;
+ trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+ vq->handle_output(vdev, vq);
+ }
+}
+
void virtio_queue_notify(VirtIODevice *vdev, int n)
{
- if (n < VIRTIO_PCI_QUEUE_MAX && vdev->vq[n].vring.desc) {
- vdev->vq[n].handle_output(vdev, &vdev->vq[n]);
+ if (n < VIRTIO_PCI_QUEUE_MAX) {
+ virtio_queue_notify_vq(&vdev->vq[n]);
}
}
+uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
+{
+ return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
+ VIRTIO_NO_VECTOR;
+}
+
+void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
+{
+ if (n < VIRTIO_PCI_QUEUE_MAX)
+ vdev->vq[n].vector = vector;
+}
+
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
void (*handle_output)(VirtIODevice *, VirtQueue *))
{
return &vdev->vq[i];
}
+void virtio_irq(VirtQueue *vq)
+{
+ trace_virtio_irq(vq);
+ vq->vdev->isr |= 0x01;
+ virtio_notify_vector(vq->vdev, vq->vector);
+}
+
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
/* Always notify when queue is empty (when feature acknowledge) */
if ((vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT) &&
- (!(vdev->features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
+ (!(vdev->guest_features & (1 << VIRTIO_F_NOTIFY_ON_EMPTY)) ||
(vq->inuse || vring_avail_idx(vq) != vq->last_avail_idx)))
return;
+ trace_virtio_notify(vdev, vq);
vdev->isr |= 0x01;
- virtio_update_irq(vdev);
+ virtio_notify_vector(vdev, vq->vector);
}
void virtio_notify_config(VirtIODevice *vdev)
return;
vdev->isr |= 0x03;
- virtio_update_irq(vdev);
+ virtio_notify_vector(vdev, vdev->config_vector);
}
void virtio_save(VirtIODevice *vdev, QEMUFile *f)
{
int i;
- /* FIXME: load/save binding. */
- //pci_device_save(&vdev->pci_dev, f);
+ if (vdev->binding->save_config)
+ vdev->binding->save_config(vdev->binding_opaque, f);
qemu_put_8s(f, &vdev->status);
qemu_put_8s(f, &vdev->isr);
qemu_put_be16s(f, &vdev->queue_sel);
- qemu_put_be32s(f, &vdev->features);
+ qemu_put_be32s(f, &vdev->guest_features);
qemu_put_be32(f, vdev->config_len);
qemu_put_buffer(f, vdev->config, vdev->config_len);
qemu_put_be32(f, vdev->vq[i].vring.num);
qemu_put_be64(f, vdev->vq[i].pa);
qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
+ if (vdev->binding->save_queue)
+ vdev->binding->save_queue(vdev->binding_opaque, i, f);
}
}
-void virtio_load(VirtIODevice *vdev, QEMUFile *f)
+int virtio_load(VirtIODevice *vdev, QEMUFile *f)
{
- int num, i;
+ int num, i, ret;
+ uint32_t features;
+ uint32_t supported_features =
+ vdev->binding->get_features(vdev->binding_opaque);
- /* FIXME: load/save binding. */
- //pci_device_load(&vdev->pci_dev, f);
+ if (vdev->binding->load_config) {
+ ret = vdev->binding->load_config(vdev->binding_opaque, f);
+ if (ret)
+ return ret;
+ }
qemu_get_8s(f, &vdev->status);
qemu_get_8s(f, &vdev->isr);
qemu_get_be16s(f, &vdev->queue_sel);
- qemu_get_be32s(f, &vdev->features);
+ qemu_get_be32s(f, &features);
+ if (features & ~supported_features) {
+ error_report("Features 0x%x unsupported. Allowed features: 0x%x",
+ features, supported_features);
+ return -1;
+ }
+ if (vdev->set_features)
+ vdev->set_features(vdev, features);
+ vdev->guest_features = features;
vdev->config_len = qemu_get_be32(f);
qemu_get_buffer(f, vdev->config, vdev->config_len);
qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
if (vdev->vq[i].pa) {
+ uint16_t nheads;
virtqueue_init(&vdev->vq[i]);
+ nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ if (nheads > vdev->vq[i].vring.num) {
+ error_report("VQ %d size 0x%x Guest index 0x%x "
+ "inconsistent with Host index 0x%x: delta 0x%x\n",
+ i, vdev->vq[i].vring.num,
+ vring_avail_idx(&vdev->vq[i]),
+ vdev->vq[i].last_avail_idx, nheads);
+ return -1;
+ }
+ } else if (vdev->vq[i].last_avail_idx) {
+ error_report("VQ %d address 0x0 "
+ "inconsistent with Host index 0x%x\n",
+ i, vdev->vq[i].last_avail_idx);
+ return -1;
+ }
+ if (vdev->binding->load_queue) {
+ ret = vdev->binding->load_queue(vdev->binding_opaque, i, f);
+ if (ret)
+ return ret;
}
}
- virtio_update_irq(vdev);
+ virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
+ return 0;
}
void virtio_cleanup(VirtIODevice *vdev)
{
+ qemu_del_vm_change_state_handler(vdev->vmstate);
if (vdev->config)
qemu_free(vdev->config);
qemu_free(vdev->vq);
}
+static void virtio_vmstate_change(void *opaque, int running, int reason)
+{
+ VirtIODevice *vdev = opaque;
+ bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
+ vdev->vm_running = running;
+
+ if (backend_run) {
+ virtio_set_status(vdev, vdev->status);
+ }
+
+ if (vdev->binding->vmstate_change) {
+ vdev->binding->vmstate_change(vdev->binding_opaque, backend_run);
+ }
+
+ if (!backend_run) {
+ virtio_set_status(vdev, vdev->status);
+ }
+}
+
VirtIODevice *virtio_common_init(const char *name, uint16_t device_id,
size_t config_size, size_t struct_size)
{
VirtIODevice *vdev;
+ int i;
vdev = qemu_mallocz(struct_size);
vdev->status = 0;
vdev->isr = 0;
vdev->queue_sel = 0;
+ vdev->config_vector = VIRTIO_NO_VECTOR;
vdev->vq = qemu_mallocz(sizeof(VirtQueue) * VIRTIO_PCI_QUEUE_MAX);
+ for(i = 0; i < VIRTIO_PCI_QUEUE_MAX; i++) {
+ vdev->vq[i].vector = VIRTIO_NO_VECTOR;
+ vdev->vq[i].vdev = vdev;
+ }
vdev->name = name;
vdev->config_len = config_size;
else
vdev->config = NULL;
- qemu_register_reset(virtio_reset, 0, vdev);
+ vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev);
return vdev;
}
vdev->binding = binding;
vdev->binding_opaque = opaque;
}
+
+target_phys_addr_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.desc;
+}
+
+target_phys_addr_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.avail;
+}
+
+target_phys_addr_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.used;
+}
+
+target_phys_addr_t virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.desc;
+}
+
+target_phys_addr_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
+{
+ return sizeof(VRingDesc) * vdev->vq[n].vring.num;
+}
+
+target_phys_addr_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
+{
+ return offsetof(VRingAvail, ring) +
+ sizeof(uint64_t) * vdev->vq[n].vring.num;
+}
+
+target_phys_addr_t virtio_queue_get_used_size(VirtIODevice *vdev, int n)
+{
+ return offsetof(VRingUsed, ring) +
+ sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
+}
+
+target_phys_addr_t virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
+ virtio_queue_get_used_size(vdev, n);
+}
+
+uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
+{
+ return vdev->vq[n].last_avail_idx;
+}
+
+void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
+{
+ vdev->vq[n].last_avail_idx = idx;
+}
+
+VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
+{
+ return vdev->vq + n;
+}
+
+EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
+{
+ return &vq->guest_notifier;
+}
+EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
+{
+ return &vq->host_notifier;
+}