#include "qemu/osdep.h"
#include "qapi/error.h"
-#include "qemu-common.h"
#include "cpu.h"
#include "trace.h"
#include "exec/address-spaces.h"
#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
#include "hw/virtio/virtio.h"
+#include "migration/qemu-file-types.h"
#include "qemu/atomic.h"
#include "hw/virtio/virtio-bus.h"
-#include "migration/migration.h"
#include "hw/virtio/virtio-access.h"
#include "sysemu/dma.h"
VRingUsedElem ring[0];
} VRingUsed;
+typedef struct VRingMemoryRegionCaches {
+ struct rcu_head rcu;
+ MemoryRegionCache desc;
+ MemoryRegionCache avail;
+ MemoryRegionCache used;
+} VRingMemoryRegionCaches;
+
typedef struct VRing
{
unsigned int num;
hwaddr desc;
hwaddr avail;
hwaddr used;
+ VRingMemoryRegionCaches *caches;
} VRing;
struct VirtQueue
QLIST_ENTRY(VirtQueue) node;
};
+static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
+{
+ if (!caches) {
+ return;
+ }
+
+ address_space_cache_destroy(&caches->desc);
+ address_space_cache_destroy(&caches->avail);
+ address_space_cache_destroy(&caches->used);
+ g_free(caches);
+}
+
+static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
+{
+ VRingMemoryRegionCaches *caches;
+
+ caches = atomic_read(&vq->vring.caches);
+ atomic_rcu_set(&vq->vring.caches, NULL);
+ if (caches) {
+ call_rcu(caches, virtio_free_region_cache, rcu);
+ }
+}
+
+static void virtio_init_region_cache(VirtIODevice *vdev, int n)
+{
+ VirtQueue *vq = &vdev->vq[n];
+ VRingMemoryRegionCaches *old = vq->vring.caches;
+ VRingMemoryRegionCaches *new = NULL;
+ hwaddr addr, size;
+ int event_size;
+ int64_t len;
+
+ event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
+
+ addr = vq->vring.desc;
+ if (!addr) {
+ goto out_no_cache;
+ }
+ new = g_new0(VRingMemoryRegionCaches, 1);
+ size = virtio_queue_get_desc_size(vdev, n);
+ len = address_space_cache_init(&new->desc, vdev->dma_as,
+ addr, size, false);
+ if (len < size) {
+ virtio_error(vdev, "Cannot map desc");
+ goto err_desc;
+ }
+
+ size = virtio_queue_get_used_size(vdev, n) + event_size;
+ len = address_space_cache_init(&new->used, vdev->dma_as,
+ vq->vring.used, size, true);
+ if (len < size) {
+ virtio_error(vdev, "Cannot map used");
+ goto err_used;
+ }
+
+ size = virtio_queue_get_avail_size(vdev, n) + event_size;
+ len = address_space_cache_init(&new->avail, vdev->dma_as,
+ vq->vring.avail, size, false);
+ if (len < size) {
+ virtio_error(vdev, "Cannot map avail");
+ goto err_avail;
+ }
+
+ atomic_rcu_set(&vq->vring.caches, new);
+ if (old) {
+ call_rcu(old, virtio_free_region_cache, rcu);
+ }
+ return;
+
+err_avail:
+ address_space_cache_destroy(&new->avail);
+err_used:
+ address_space_cache_destroy(&new->used);
+err_desc:
+ address_space_cache_destroy(&new->desc);
+out_no_cache:
+ g_free(new);
+ virtio_virtqueue_reset_region_cache(vq);
+}
+
/* virt queue functions */
void virtio_queue_update_rings(VirtIODevice *vdev, int n)
{
VRing *vring = &vdev->vq[n].vring;
- if (!vring->desc) {
+ if (!vring->num || !vring->desc || !vring->align) {
/* not yet setup -> nothing to do */
return;
}
vring->used = vring_align(vring->avail +
offsetof(VRingAvail, ring[vring->num]),
vring->align);
+ virtio_init_region_cache(vdev, n);
}
+/* Called within rcu_read_lock(). */
static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
- uint8_t *desc_ptr, int i)
+ MemoryRegionCache *cache, int i)
{
- memcpy(desc, desc_ptr + i * sizeof(VRingDesc), sizeof(VRingDesc));
+ address_space_read_cached(cache, i * sizeof(VRingDesc),
+ desc, sizeof(VRingDesc));
virtio_tswap64s(vdev, &desc->addr);
virtio_tswap32s(vdev, &desc->len);
virtio_tswap16s(vdev, &desc->flags);
virtio_tswap16s(vdev, &desc->next);
}
+static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
+{
+ VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
+ assert(caches != NULL);
+ return caches;
+}
+/* Called within rcu_read_lock(). */
static inline uint16_t vring_avail_flags(VirtQueue *vq)
{
- hwaddr pa;
- pa = vq->vring.avail + offsetof(VRingAvail, flags);
- return virtio_lduw_phys(vq->vdev, pa);
+ VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+ hwaddr pa = offsetof(VRingAvail, flags);
+ return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
}
+/* Called within rcu_read_lock(). */
static inline uint16_t vring_avail_idx(VirtQueue *vq)
{
- hwaddr pa;
- pa = vq->vring.avail + offsetof(VRingAvail, idx);
- vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
+ VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+ hwaddr pa = offsetof(VRingAvail, idx);
+ vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
return vq->shadow_avail_idx;
}
+/* Called within rcu_read_lock(). */
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
{
- hwaddr pa;
- pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
- return virtio_lduw_phys(vq->vdev, pa);
+ VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+ hwaddr pa = offsetof(VRingAvail, ring[i]);
+ return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
}
+/* Called within rcu_read_lock(). */
static inline uint16_t vring_get_used_event(VirtQueue *vq)
{
return vring_avail_ring(vq, vq->vring.num);
}
+/* Called within rcu_read_lock(). */
static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
int i)
{
- hwaddr pa;
+ VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+ hwaddr pa = offsetof(VRingUsed, ring[i]);
virtio_tswap32s(vq->vdev, &uelem->id);
virtio_tswap32s(vq->vdev, &uelem->len);
- pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
- address_space_write(vq->vdev->dma_as, pa, MEMTXATTRS_UNSPECIFIED,
- (void *)uelem, sizeof(VRingUsedElem));
+ address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
+ address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
}
+/* Called within rcu_read_lock(). */
static uint16_t vring_used_idx(VirtQueue *vq)
{
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, idx);
- return virtio_lduw_phys(vq->vdev, pa);
+ VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+ hwaddr pa = offsetof(VRingUsed, idx);
+ return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
}
+/* Called within rcu_read_lock(). */
static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
{
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, idx);
- virtio_stw_phys(vq->vdev, pa, val);
+ VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
+ hwaddr pa = offsetof(VRingUsed, idx);
+ virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
+ address_space_cache_invalidate(&caches->used, pa, sizeof(val));
vq->used_idx = val;
}
+/* Called within rcu_read_lock(). */
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
{
+ VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
VirtIODevice *vdev = vq->vdev;
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, flags);
- virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
+ hwaddr pa = offsetof(VRingUsed, flags);
+ uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
+
+ virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
+ address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
}
+/* Called within rcu_read_lock(). */
static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
{
+ VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
VirtIODevice *vdev = vq->vdev;
- hwaddr pa;
- pa = vq->vring.used + offsetof(VRingUsed, flags);
- virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
+ hwaddr pa = offsetof(VRingUsed, flags);
+ uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
+
+ virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
+ address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
}
+/* Called within rcu_read_lock(). */
static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
{
+ VRingMemoryRegionCaches *caches;
hwaddr pa;
if (!vq->notification) {
return;
}
- pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
- virtio_stw_phys(vq->vdev, pa, val);
+
+ caches = vring_get_region_caches(vq);
+ pa = offsetof(VRingUsed, ring[vq->vring.num]);
+ virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
+ address_space_cache_invalidate(&caches->used, pa, sizeof(val));
}
void virtio_queue_set_notification(VirtQueue *vq, int enable)
{
vq->notification = enable;
+
+ if (!vq->vring.desc) {
+ return;
+ }
+
+ rcu_read_lock();
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
vring_set_avail_event(vq, vring_avail_idx(vq));
} else if (enable) {
/* Expose avail event/used flags before caller checks the avail idx. */
smp_mb();
}
+ rcu_read_unlock();
}
int virtio_queue_ready(VirtQueue *vq)
}
/* Fetch avail_idx from VQ memory only when we really need to know if
- * guest has added some buffers. */
-int virtio_queue_empty(VirtQueue *vq)
+ * guest has added some buffers.
+ * Called within rcu_read_lock(). */
+static int virtio_queue_empty_rcu(VirtQueue *vq)
{
+ if (unlikely(vq->vdev->broken)) {
+ return 1;
+ }
+
+ if (unlikely(!vq->vring.avail)) {
+ return 1;
+ }
+
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
return vring_avail_idx(vq) == vq->last_avail_idx;
}
+int virtio_queue_empty(VirtQueue *vq)
+{
+ bool empty;
+
+ if (unlikely(vq->vdev->broken)) {
+ return 1;
+ }
+
+ if (unlikely(!vq->vring.avail)) {
+ return 1;
+ }
+
+ if (vq->shadow_avail_idx != vq->last_avail_idx) {
+ return 0;
+ }
+
+ rcu_read_lock();
+ empty = vring_avail_idx(vq) == vq->last_avail_idx;
+ rcu_read_unlock();
+ return empty;
+}
+
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
{
return true;
}
+/* Called within rcu_read_lock(). */
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len, unsigned int idx)
{
return;
}
+ if (unlikely(!vq->vring.used)) {
+ return;
+ }
+
idx = (idx + vq->used_idx) % vq->vring.num;
uelem.id = elem->index;
vring_used_write(vq, &uelem, idx);
}
+/* Called within rcu_read_lock(). */
void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
uint16_t old, new;
return;
}
+ if (unlikely(!vq->vring.used)) {
+ return;
+ }
+
/* Make sure buffer is written before we update index. */
smp_wmb();
trace_virtqueue_flush(vq, count);
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
{
+ rcu_read_lock();
virtqueue_fill(vq, elem, len, 0);
virtqueue_flush(vq, 1);
+ rcu_read_unlock();
}
+/* Called within rcu_read_lock(). */
static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
{
uint16_t num_heads = vring_avail_idx(vq) - idx;
return num_heads;
}
+/* Called within rcu_read_lock(). */
static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
unsigned int *head)
{
};
static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
- void *desc_ptr, unsigned int max,
+ MemoryRegionCache *desc_cache, unsigned int max,
unsigned int *next)
{
/* If this descriptor says it doesn't chain, we're done. */
return VIRTQUEUE_READ_DESC_ERROR;
}
- vring_desc_read(vdev, desc, desc_ptr, *next);
+ vring_desc_read(vdev, desc, desc_cache, *next);
return VIRTQUEUE_READ_DESC_MORE;
}
VirtIODevice *vdev = vq->vdev;
unsigned int max, idx;
unsigned int total_bufs, in_total, out_total;
- void *vring_desc_ptr;
- void *indirect_desc_ptr = NULL;
- hwaddr len = 0;
+ VRingMemoryRegionCaches *caches;
+ MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+ int64_t len = 0;
int rc;
+ if (unlikely(!vq->vring.desc)) {
+ if (in_bytes) {
+ *in_bytes = 0;
+ }
+ if (out_bytes) {
+ *out_bytes = 0;
+ }
+ return;
+ }
+
+ rcu_read_lock();
idx = vq->last_avail_idx;
total_bufs = in_total = out_total = 0;
max = vq->vring.num;
- len = max * sizeof(VRingDesc);
- vring_desc_ptr = address_space_map(vdev->dma_as, vq->vring.desc, &len, false);
- if (len < max * sizeof(VRingDesc)) {
+ caches = vring_get_region_caches(vq);
+ if (caches->desc.len < max * sizeof(VRingDesc)) {
virtio_error(vdev, "Cannot map descriptor ring");
goto err;
}
while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
- void *desc_ptr = vring_desc_ptr;
+ MemoryRegionCache *desc_cache = &caches->desc;
unsigned int num_bufs;
VRingDesc desc;
unsigned int i;
goto err;
}
- vring_desc_read(vdev, &desc, desc_ptr, i);
+ vring_desc_read(vdev, &desc, desc_cache, i);
if (desc.flags & VRING_DESC_F_INDIRECT) {
- len = desc.len;
- if (desc.len % sizeof(VRingDesc)) {
+ if (!desc.len || (desc.len % sizeof(VRingDesc))) {
virtio_error(vdev, "Invalid size for indirect buffer table");
goto err;
}
}
/* loop over the indirect descriptor table */
- indirect_desc_ptr = address_space_map(vdev->dma_as, desc.addr,
- &len, false);
- desc_ptr = indirect_desc_ptr;
+ len = address_space_cache_init(&indirect_desc_cache,
+ vdev->dma_as,
+ desc.addr, desc.len, false);
+ desc_cache = &indirect_desc_cache;
if (len < desc.len) {
virtio_error(vdev, "Cannot map indirect buffer");
goto err;
max = desc.len / sizeof(VRingDesc);
num_bufs = i = 0;
- vring_desc_read(vdev, &desc, desc_ptr, i);
+ vring_desc_read(vdev, &desc, desc_cache, i);
}
do {
goto done;
}
- rc = virtqueue_read_next_desc(vdev, &desc, desc_ptr, max, &i);
+ rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i);
} while (rc == VIRTQUEUE_READ_DESC_MORE);
if (rc == VIRTQUEUE_READ_DESC_ERROR) {
goto err;
}
- if (desc_ptr == indirect_desc_ptr) {
- address_space_unmap(vdev->dma_as, desc_ptr, len, false, 0);
- indirect_desc_ptr = NULL;
+ if (desc_cache == &indirect_desc_cache) {
+ address_space_cache_destroy(&indirect_desc_cache);
total_bufs++;
} else {
total_bufs = num_bufs;
}
done:
- if (indirect_desc_ptr) {
- address_space_unmap(vdev->dma_as, indirect_desc_ptr, len, false, 0);
- }
- address_space_unmap(vdev->dma_as, vring_desc_ptr, len, false, 0);
+ address_space_cache_destroy(&indirect_desc_cache);
if (in_bytes) {
*in_bytes = in_total;
}
if (out_bytes) {
*out_bytes = out_total;
}
+ rcu_read_unlock();
return;
err:
}
static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
- hwaddr *addr, unsigned int *num_sg,
+ hwaddr *addr, unsigned int num_sg,
int is_write)
{
unsigned int i;
hwaddr len;
- for (i = 0; i < *num_sg; i++) {
+ for (i = 0; i < num_sg; i++) {
len = sg[i].iov_len;
sg[i].iov_base = dma_memory_map(vdev->dma_as,
addr[i], &len, is_write ?
void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
{
- virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num, 1);
- virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num, 0);
+ virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, elem->in_num, 1);
+ virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, elem->out_num, 0);
}
static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
assert(sz >= sizeof(VirtQueueElement));
elem = g_malloc(out_sg_end);
+ trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
elem->out_num = out_num;
elem->in_num = in_num;
elem->in_addr = (void *)elem + in_addr_ofs;
void *virtqueue_pop(VirtQueue *vq, size_t sz)
{
unsigned int i, head, max;
- void *vring_desc_ptr;
- void *indirect_desc_ptr = NULL;
- void *desc_ptr;
- hwaddr len;
+ VRingMemoryRegionCaches *caches;
+ MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
+ MemoryRegionCache *desc_cache;
+ int64_t len;
VirtIODevice *vdev = vq->vdev;
VirtQueueElement *elem = NULL;
- unsigned out_num, in_num;
+ unsigned out_num, in_num, elem_entries;
hwaddr addr[VIRTQUEUE_MAX_SIZE];
struct iovec iov[VIRTQUEUE_MAX_SIZE];
VRingDesc desc;
if (unlikely(vdev->broken)) {
return NULL;
}
- if (virtio_queue_empty(vq)) {
- return NULL;
+ rcu_read_lock();
+ if (virtio_queue_empty_rcu(vq)) {
+ goto done;
}
/* Needed after virtio_queue_empty(), see comment in
* virtqueue_num_heads(). */
smp_rmb();
/* When we start there are none of either input nor output. */
- out_num = in_num = 0;
+ out_num = in_num = elem_entries = 0;
max = vq->vring.num;
if (vq->inuse >= vq->vring.num) {
virtio_error(vdev, "Virtqueue size exceeded");
- return NULL;
+ goto done;
}
if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
- return NULL;
+ goto done;
}
if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
i = head;
- len = max * sizeof(VRingDesc);
- vring_desc_ptr = address_space_map(vdev->dma_as, vq->vring.desc, &len, false);
- if (len < max * sizeof(VRingDesc)) {
+ caches = vring_get_region_caches(vq);
+ if (caches->desc.len < max * sizeof(VRingDesc)) {
virtio_error(vdev, "Cannot map descriptor ring");
goto done;
}
- desc_ptr = vring_desc_ptr;
- vring_desc_read(vdev, &desc, desc_ptr, i);
+ desc_cache = &caches->desc;
+ vring_desc_read(vdev, &desc, desc_cache, i);
if (desc.flags & VRING_DESC_F_INDIRECT) {
- if (desc.len % sizeof(VRingDesc)) {
+ if (!desc.len || (desc.len % sizeof(VRingDesc))) {
virtio_error(vdev, "Invalid size for indirect buffer table");
goto done;
}
/* loop over the indirect descriptor table */
- len = desc.len;
- indirect_desc_ptr = address_space_map(vdev->dma_as, desc.addr, &len, false);
- desc_ptr = indirect_desc_ptr;
+ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
+ desc.addr, desc.len, false);
+ desc_cache = &indirect_desc_cache;
if (len < desc.len) {
virtio_error(vdev, "Cannot map indirect buffer");
goto done;
max = desc.len / sizeof(VRingDesc);
i = 0;
- vring_desc_read(vdev, &desc, desc_ptr, i);
+ vring_desc_read(vdev, &desc, desc_cache, i);
}
/* Collect all the descriptors */
}
/* If we've got too many, that implies a descriptor loop. */
- if ((in_num + out_num) > max) {
+ if (++elem_entries > max) {
virtio_error(vdev, "Looped descriptor");
goto err_undo_map;
}
- rc = virtqueue_read_next_desc(vdev, &desc, desc_ptr, max, &i);
+ rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i);
} while (rc == VIRTQUEUE_READ_DESC_MORE);
if (rc == VIRTQUEUE_READ_DESC_ERROR) {
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
done:
- if (indirect_desc_ptr) {
- address_space_unmap(vdev->dma_as, indirect_desc_ptr, len, false, 0);
- }
- address_space_unmap(vdev->dma_as, vring_desc_ptr, len, false, 0);
+ address_space_cache_destroy(&indirect_desc_cache);
+ rcu_read_unlock();
return elem;
/* TODO: teach all callers that this can fail, and return failure instead
* of asserting here.
- * When we do, we might be able to re-enable NDEBUG below.
+ * This is just one thing (there are probably more) that must be
+ * fixed before we can allow NDEBUG compilation.
*/
-#ifdef NDEBUG
-#error building with NDEBUG is not supported
-#endif
assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
assert(ARRAY_SIZE(data.out_addr) >= data.out_num);
}
}
}
+
+ if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) !=
+ (val & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK);
+ }
+
if (k->set_status) {
k->set_status(vdev, val);
}
vdev->status = val;
+
return 0;
}
-bool target_words_bigendian(void);
static enum virtio_device_endian virtio_default_endian(void)
{
if (target_words_bigendian()) {
k->reset(vdev);
}
+ vdev->start_on_kick = false;
+ vdev->started = false;
vdev->broken = false;
vdev->guest_features = 0;
vdev->queue_sel = 0;
vdev->vq[i].notification = true;
vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
vdev->vq[i].inuse = 0;
+ virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
}
}
void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
{
+ if (!vdev->vq[n].vring.num) {
+ return;
+ }
vdev->vq[n].vring.desc = addr;
virtio_queue_update_rings(vdev, n);
}
void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
hwaddr avail, hwaddr used)
{
+ if (!vdev->vq[n].vring.num) {
+ return;
+ }
vdev->vq[n].vring.desc = desc;
vdev->vq[n].vring.avail = avail;
vdev->vq[n].vring.used = used;
+ virtio_init_region_cache(vdev, n);
}
void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
*/
assert(k->has_variable_vring_alignment);
- vdev->vq[n].vring.align = align;
- virtio_queue_update_rings(vdev, n);
+ if (align) {
+ vdev->vq[n].vring.align = align;
+ virtio_queue_update_rings(vdev, n);
+ }
}
static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
{
+ bool ret = false;
+
if (vq->vring.desc && vq->handle_aio_output) {
VirtIODevice *vdev = vq->vdev;
trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
- return vq->handle_aio_output(vdev, vq);
+ ret = vq->handle_aio_output(vdev, vq);
+
+ if (unlikely(vdev->start_on_kick)) {
+ virtio_set_started(vdev, true);
+ }
}
- return false;
+ return ret;
}
static void virtio_queue_notify_vq(VirtQueue *vq)
trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
vq->handle_output(vdev, vq);
+
+ if (unlikely(vdev->start_on_kick)) {
+ virtio_set_started(vdev, true);
+ }
}
}
void virtio_queue_notify(VirtIODevice *vdev, int n)
{
- virtio_queue_notify_vq(&vdev->vq[n]);
+ VirtQueue *vq = &vdev->vq[n];
+
+ if (unlikely(!vq->vring.desc || vdev->broken)) {
+ return;
+ }
+
+ trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
+ if (vq->handle_aio_output) {
+ event_notifier_set(&vq->host_notifier);
+ } else if (vq->handle_output) {
+ vq->handle_output(vdev, vq);
+
+ if (unlikely(vdev->start_on_kick)) {
+ virtio_set_started(vdev, true);
+ }
+ }
}
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
vdev->vq[n].vring.num = 0;
vdev->vq[n].vring.num_default = 0;
+ vdev->vq[n].handle_output = NULL;
+ vdev->vq[n].handle_aio_output = NULL;
}
static void virtio_set_isr(VirtIODevice *vdev, int value)
}
}
+/* Called within rcu_read_lock(). */
static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
{
uint16_t old, new;
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
{
- if (!virtio_should_notify(vdev, vq)) {
+ bool should_notify;
+ rcu_read_lock();
+ should_notify = virtio_should_notify(vdev, vq);
+ rcu_read_unlock();
+
+ if (!should_notify) {
return;
}
event_notifier_set(&vq->guest_notifier);
}
+static void virtio_irq(VirtQueue *vq)
+{
+ virtio_set_isr(vq->vdev, 0x1);
+ virtio_notify_vector(vq->vdev, vq->vector);
+}
+
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
- if (!virtio_should_notify(vdev, vq)) {
+ bool should_notify;
+ rcu_read_lock();
+ should_notify = virtio_should_notify(vdev, vq);
+ rcu_read_unlock();
+
+ if (!should_notify) {
return;
}
trace_virtio_notify(vdev, vq);
- virtio_set_isr(vq->vdev, 0x1);
- virtio_notify_vector(vdev, vq->vector);
+ virtio_irq(vq);
}
void virtio_notify_config(VirtIODevice *vdev)
return vdev->broken;
}
+static bool virtio_started_needed(void *opaque)
+{
+ VirtIODevice *vdev = opaque;
+
+ return vdev->started;
+}
+
static const VMStateDescription vmstate_virtqueue = {
.name = "virtqueue_state",
.version_id = 1,
};
static int get_extra_state(QEMUFile *f, void *pv, size_t size,
- VMStateField *field)
+ const VMStateField *field)
{
VirtIODevice *vdev = pv;
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
}
static int put_extra_state(QEMUFile *f, void *pv, size_t size,
- VMStateField *field, QJSON *vmdesc)
+ const VMStateField *field, QJSON *vmdesc)
{
VirtIODevice *vdev = pv;
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
}
};
+static const VMStateDescription vmstate_virtio_started = {
+ .name = "virtio/started",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = &virtio_started_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_BOOL(started, VirtIODevice),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
static const VMStateDescription vmstate_virtio = {
.name = "virtio",
.version_id = 1,
&vmstate_virtio_ringsize,
&vmstate_virtio_broken,
&vmstate_virtio_extra_state,
+ &vmstate_virtio_started,
NULL
}
};
-void virtio_save(VirtIODevice *vdev, QEMUFile *f)
+int virtio_save(VirtIODevice *vdev, QEMUFile *f)
{
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
if (k->has_variable_vring_alignment) {
qemu_put_be32(f, vdev->vq[i].vring.align);
}
- /* XXX virtio-1 devices */
+ /*
+ * Save desc now, the rest of the ring addresses are saved in
+ * subsections for VIRTIO-1 devices.
+ */
qemu_put_be64(f, vdev->vq[i].vring.desc);
qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
if (k->save_queue) {
}
if (vdc->vmsd) {
- vmstate_save_state(f, vdc->vmsd, vdev, NULL);
+ int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
+ if (ret) {
+ return ret;
+ }
}
/* Subsections */
- vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
+ return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
}
/* A wrapper for use as a VMState .put function */
static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
- VMStateField *field, QJSON *vmdesc)
+ const VMStateField *field, QJSON *vmdesc)
{
- virtio_save(VIRTIO_DEVICE(opaque), f);
-
- return 0;
+ return virtio_save(VIRTIO_DEVICE(opaque), f);
}
/* A wrapper for use as a VMState .get function */
static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
- VMStateField *field)
+ const VMStateField *field)
{
VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));
int virtio_set_features(VirtIODevice *vdev, uint64_t val)
{
- /*
+ int ret;
+ /*
* The driver must not attempt to set features after feature negotiation
* has finished.
*/
if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
return -EINVAL;
}
- return virtio_set_features_nocheck(vdev, val);
+ ret = virtio_set_features_nocheck(vdev, val);
+ if (!ret) {
+ if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
+ /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */
+ int i;
+ for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+ if (vdev->vq[i].vring.num != 0) {
+ virtio_init_region_cache(vdev, i);
+ }
+ }
+ }
+
+ if (!virtio_device_started(vdev, vdev->status) &&
+ !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+ vdev->start_on_kick = true;
+ }
+ }
+ return ret;
+}
+
+size_t virtio_feature_get_config_size(VirtIOFeature *feature_sizes,
+ uint64_t host_features)
+{
+ size_t config_size = 0;
+ int i;
+
+ for (i = 0; feature_sizes[i].flags != 0; i++) {
+ if (host_features & feature_sizes[i].flags) {
+ config_size = MAX(feature_sizes[i].end, config_size);
+ }
+ }
+
+ return config_size;
}
int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
vdev->vq[i].signalled_used_valid = false;
vdev->vq[i].notification = true;
- if (vdev->vq[i].vring.desc) {
- /* XXX virtio-1 devices */
- virtio_queue_update_rings(vdev, i);
- } else if (vdev->vq[i].last_avail_idx) {
+ if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
error_report("VQ %d address 0x0 "
"inconsistent with Host index 0x%x",
i, vdev->vq[i].last_avail_idx);
- return -1;
+ return -1;
}
if (k->load_queue) {
ret = k->load_queue(qbus->parent, i, f);
}
}
+ if (!virtio_device_started(vdev, vdev->status) &&
+ !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+ vdev->start_on_kick = true;
+ }
+
+ rcu_read_lock();
for (i = 0; i < num; i++) {
if (vdev->vq[i].vring.desc) {
uint16_t nheads;
+
+ /*
+ * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
+ * only the region cache needs to be set up. Legacy devices need
+ * to calculate used and avail ring addresses based on the desc
+ * address.
+ */
+ if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
+ virtio_init_region_cache(vdev, i);
+ } else {
+ virtio_queue_update_rings(vdev, i);
+ }
+
nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
/* Check it isn't doing strange things with descriptor numbers. */
if (nheads > vdev->vq[i].vring.num) {
}
}
}
+ rcu_read_unlock();
return 0;
}
void virtio_cleanup(VirtIODevice *vdev)
{
qemu_del_vm_change_state_handler(vdev->vmstate);
- g_free(vdev->config);
- g_free(vdev->vq);
- g_free(vdev->vector_queues);
}
static void virtio_vmstate_change(void *opaque, int running, RunState state)
VirtIODevice *vdev = opaque;
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
- bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
+ bool backend_run = running && virtio_device_started(vdev, vdev->status);
vdev->vm_running = running;
if (backend_run) {
{
DeviceState *vdev = data;
- object_initialize(vdev, vdev_size, vdev_name);
- object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
- object_unref(OBJECT(vdev));
+ object_initialize_child(proxy_obj, "virtio-backend", vdev, vdev_size,
+ vdev_name, &error_abort, NULL);
qdev_alias_all_properties(vdev, proxy_obj);
}
g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
}
+ vdev->start_on_kick = false;
+ vdev->started = false;
vdev->device_id = device_id;
vdev->status = 0;
atomic_set(&vdev->isr, 0);
} else {
vdev->config = NULL;
}
- vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
- vdev);
+ vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev),
+ virtio_vmstate_change, vdev);
vdev->device_endian = virtio_default_endian();
vdev->use_guest_notifier_mask = true;
}
return vdev->vq[n].vring.desc;
}
+bool virtio_queue_enabled(VirtIODevice *vdev, int n)
+{
+ return virtio_queue_get_desc_addr(vdev, n) != 0;
+}
+
hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
{
return vdev->vq[n].vring.avail;
vdev->vq[n].shadow_avail_idx = idx;
}
+void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
+{
+ rcu_read_lock();
+ if (vdev->vq[n].vring.desc) {
+ vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
+ vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
+ }
+ rcu_read_unlock();
+}
+
void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
{
- vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
+ rcu_read_lock();
+ if (vdev->vq[n].vring.desc) {
+ vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
+ }
+ rcu_read_unlock();
}
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
{
VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
if (event_notifier_test_and_clear(n)) {
- virtio_notify_vector(vq->vdev, vq->vector);
+ virtio_irq(vq);
}
}
VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
bool progress;
- if (virtio_queue_empty(vq)) {
+ if (!vq->vring.desc || virtio_queue_empty(vq)) {
return false;
}
return &vq->host_notifier;
}
+int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
+ MemoryRegion *mr, bool assign)
+{
+ BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+ VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+ if (k->set_host_notifier_mr) {
+ return k->set_host_notifier_mr(qbus->parent, n, mr, assign);
+ }
+
+ return -1;
+}
+
void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
{
g_free(vdev->bus_name);
error_vreport(fmt, ap);
va_end(ap);
- vdev->broken = true;
-
if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
- virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
+ vdev->status = vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET;
virtio_notify_config(vdev);
}
+
+ vdev->broken = true;
+}
+
+static void virtio_memory_listener_commit(MemoryListener *listener)
+{
+ VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
+ int i;
+
+ for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+ if (vdev->vq[i].vring.num == 0) {
+ break;
+ }
+ virtio_init_region_cache(vdev, i);
+ }
}
static void virtio_device_realize(DeviceState *dev, Error **errp)
virtio_bus_device_plugged(vdev, &err);
if (err != NULL) {
error_propagate(errp, err);
+ vdc->unrealize(dev, NULL);
return;
}
+
+ vdev->listener.commit = virtio_memory_listener_commit;
+ memory_listener_register(&vdev->listener, vdev->dma_as);
}
static void virtio_device_unrealize(DeviceState *dev, Error **errp)
vdev->bus_name = NULL;
}
+static void virtio_device_free_virtqueues(VirtIODevice *vdev)
+{
+ int i;
+ if (!vdev->vq) {
+ return;
+ }
+
+ for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
+ if (vdev->vq[i].vring.num == 0) {
+ break;
+ }
+ virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
+ }
+ g_free(vdev->vq);
+}
+
+static void virtio_device_instance_finalize(Object *obj)
+{
+ VirtIODevice *vdev = VIRTIO_DEVICE(obj);
+
+ memory_listener_unregister(&vdev->listener);
+ virtio_device_free_virtqueues(vdev);
+
+ g_free(vdev->config);
+ g_free(vdev->vector_queues);
+}
+
static Property virtio_properties[] = {
DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
+ DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true),
DEFINE_PROP_END_OF_LIST(),
};
static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
{
VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
- int n, r, err;
+ int i, n, r, err;
+ memory_region_transaction_begin();
for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
VirtQueue *vq = &vdev->vq[n];
if (!virtio_queue_get_num(vdev, n)) {
}
event_notifier_set(&vq->host_notifier);
}
+ memory_region_transaction_commit();
return 0;
assign_error:
+ i = n; /* save n for a second iteration after transaction is committed. */
while (--n >= 0) {
VirtQueue *vq = &vdev->vq[n];
if (!virtio_queue_get_num(vdev, n)) {
r = virtio_bus_set_host_notifier(qbus, n, false);
assert(r >= 0);
}
+ memory_region_transaction_commit();
+
+ while (--i >= 0) {
+ if (!virtio_queue_get_num(vdev, i)) {
+ continue;
+ }
+ virtio_bus_cleanup_host_notifier(qbus, i);
+ }
return err;
}
VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
int n, r;
+ memory_region_transaction_begin();
for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
VirtQueue *vq = &vdev->vq[n];
r = virtio_bus_set_host_notifier(qbus, n, false);
assert(r >= 0);
}
+ memory_region_transaction_commit();
+
+ for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
+ if (!virtio_queue_get_num(vdev, n)) {
+ continue;
+ }
+ virtio_bus_cleanup_host_notifier(qbus, n);
+ }
}
void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
.parent = TYPE_DEVICE,
.instance_size = sizeof(VirtIODevice),
.class_init = virtio_device_class_init,
+ .instance_finalize = virtio_device_instance_finalize,
.abstract = true,
.class_size = sizeof(VirtioDeviceClass),
};