*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
*/
#include <sys/ioctl.h>
#include "hw/hw.h"
#include "range.h"
#include <linux/vhost.h>
+#include "exec-memory.h"
static void vhost_dev_sync_region(struct vhost_dev *dev,
+ MemoryRegionSection *section,
uint64_t mfirst, uint64_t mlast,
uint64_t rfirst, uint64_t rlast)
{
vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
- assert(end / VHOST_LOG_CHUNK < dev->log_size);
- assert(start / VHOST_LOG_CHUNK < dev->log_size);
if (end < start) {
return;
}
+ assert(end / VHOST_LOG_CHUNK < dev->log_size);
+ assert(start / VHOST_LOG_CHUNK < dev->log_size);
+
for (;from < to; ++from) {
vhost_log_chunk_t log;
int bit;
ffsll(log) : ffs(log))) {
ram_addr_t ram_addr;
bit -= 1;
- ram_addr = cpu_get_physical_page_desc(addr + bit * VHOST_LOG_PAGE);
- cpu_physical_memory_set_dirty(ram_addr);
+ ram_addr = section->offset_within_region + bit * VHOST_LOG_PAGE;
+ memory_region_set_dirty(section->mr, ram_addr, VHOST_LOG_PAGE);
log &= ~(0x1ull << bit);
}
addr += VHOST_LOG_CHUNK;
}
}
-static int vhost_client_sync_dirty_bitmap(CPUPhysMemoryClient *client,
- target_phys_addr_t start_addr,
- target_phys_addr_t end_addr)
+static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
+ MemoryRegionSection *section,
+ target_phys_addr_t start_addr,
+ target_phys_addr_t end_addr)
{
- struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
int i;
+
if (!dev->log_enabled || !dev->started) {
return 0;
}
for (i = 0; i < dev->mem->nregions; ++i) {
struct vhost_memory_region *reg = dev->mem->regions + i;
- vhost_dev_sync_region(dev, start_addr, end_addr,
+ vhost_dev_sync_region(dev, section, start_addr, end_addr,
reg->guest_phys_addr,
range_get_last(reg->guest_phys_addr,
reg->memory_size));
}
for (i = 0; i < dev->nvqs; ++i) {
struct vhost_virtqueue *vq = dev->vqs + i;
- vhost_dev_sync_region(dev, start_addr, end_addr, vq->used_phys,
+ vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
range_get_last(vq->used_phys, vq->used_size));
}
return 0;
}
+static void vhost_log_sync(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+ memory_listener);
+ target_phys_addr_t start_addr = section->offset_within_address_space;
+ target_phys_addr_t end_addr = start_addr + section->size;
+
+ vhost_sync_dirty_bitmap(dev, section, start_addr, end_addr);
+}
+
/* Assign/unassign. Keep an unsorted array of non-overlapping
* memory regions in dev->mem. */
static void vhost_dev_unassign_memory(struct vhost_dev *dev,
if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
--dev->mem->nregions;
--to;
- assert(to >= 0);
++overlap_middle;
continue;
}
{
vhost_log_chunk_t *log;
uint64_t log_base;
- int r;
+ int r, i;
if (size) {
- log = qemu_mallocz(size * sizeof *log);
+ log = g_malloc0(size * sizeof *log);
} else {
log = NULL;
}
log_base = (uint64_t)(unsigned long)log;
r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
assert(r >= 0);
- vhost_client_sync_dirty_bitmap(&dev->client, 0,
- (target_phys_addr_t)~0x0ull);
+ for (i = 0; i < dev->n_mem_sections; ++i) {
+ /* Sync only the range covered by the old log */
+ vhost_sync_dirty_bitmap(dev, &dev->mem_sections[i], 0,
+ dev->log_size * VHOST_LOG_CHUNK - 1);
+ }
if (dev->log) {
- qemu_free(dev->log);
+ g_free(dev->log);
}
dev->log = log;
dev->log_size = size;
return 0;
}
-static void vhost_client_set_memory(CPUPhysMemoryClient *client,
- target_phys_addr_t start_addr,
- ram_addr_t size,
- ram_addr_t phys_offset)
+static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev,
+ uint64_t start_addr,
+ uint64_t size)
{
- struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
- ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
+ int i, n = dev->mem->nregions;
+ for (i = 0; i < n; ++i) {
+ struct vhost_memory_region *reg = dev->mem->regions + i;
+ if (ranges_overlap(reg->guest_phys_addr, reg->memory_size,
+ start_addr, size)) {
+ return reg;
+ }
+ }
+ return NULL;
+}
+
+static bool vhost_dev_cmp_memory(struct vhost_dev *dev,
+ uint64_t start_addr,
+ uint64_t size,
+ uint64_t uaddr)
+{
+ struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size);
+ uint64_t reglast;
+ uint64_t memlast;
+
+ if (!reg) {
+ return true;
+ }
+
+ reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
+ memlast = range_get_last(start_addr, size);
+
+ /* Need to extend region? */
+ if (start_addr < reg->guest_phys_addr || memlast > reglast) {
+ return true;
+ }
+ /* userspace_addr changed? */
+ return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr;
+}
+
+static void vhost_set_memory(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool add)
+{
+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+ memory_listener);
+ target_phys_addr_t start_addr = section->offset_within_address_space;
+ ram_addr_t size = section->size;
+ bool log_dirty = memory_region_is_logging(section->mr);
int s = offsetof(struct vhost_memory, regions) +
(dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
uint64_t log_size;
int r;
- dev->mem = qemu_realloc(dev->mem, s);
+ void *ram;
+
+ dev->mem = g_realloc(dev->mem, s);
+
+ if (log_dirty) {
+ add = false;
+ }
assert(size);
+ /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
+ ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region;
+ if (add) {
+ if (!vhost_dev_cmp_memory(dev, start_addr, size, (uintptr_t)ram)) {
+ /* Region exists with same address. Nothing to do. */
+ return;
+ }
+ } else {
+ if (!vhost_dev_find_reg(dev, start_addr, size)) {
+ /* Removing region that we don't access. Nothing to do. */
+ return;
+ }
+ }
+
vhost_dev_unassign_memory(dev, start_addr, size);
- if (flags == IO_MEM_RAM) {
+ if (add) {
/* Add given mapping, merging adjacent regions if any */
- vhost_dev_assign_memory(dev, start_addr, size,
- (uintptr_t)qemu_get_ram_ptr(phys_offset));
+ vhost_dev_assign_memory(dev, start_addr, size, (uintptr_t)ram);
} else {
/* Remove old mapping for this memory, if any. */
vhost_dev_unassign_memory(dev, start_addr, size);
}
}
+static bool vhost_section(MemoryRegionSection *section)
+{
+ return section->address_space == get_system_memory()
+ && memory_region_is_ram(section->mr);
+}
+
+static void vhost_begin(MemoryListener *listener)
+{
+}
+
+static void vhost_commit(MemoryListener *listener)
+{
+}
+
+static void vhost_region_add(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+ memory_listener);
+
+ if (!vhost_section(section)) {
+ return;
+ }
+
+ ++dev->n_mem_sections;
+ dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections,
+ dev->n_mem_sections);
+ dev->mem_sections[dev->n_mem_sections - 1] = *section;
+ vhost_set_memory(listener, section, true);
+}
+
+static void vhost_region_del(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+ memory_listener);
+ int i;
+
+ if (!vhost_section(section)) {
+ return;
+ }
+
+ vhost_set_memory(listener, section, false);
+ for (i = 0; i < dev->n_mem_sections; ++i) {
+ if (dev->mem_sections[i].offset_within_address_space
+ == section->offset_within_address_space) {
+ --dev->n_mem_sections;
+ memmove(&dev->mem_sections[i], &dev->mem_sections[i+1],
+ (dev->n_mem_sections - i) * sizeof(*dev->mem_sections));
+ break;
+ }
+ }
+}
+
+static void vhost_region_nop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+}
+
static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
struct vhost_virtqueue *vq,
unsigned idx, bool enable_log)
return r;
}
-static int vhost_client_migration_log(CPUPhysMemoryClient *client,
- int enable)
+static int vhost_migration_log(MemoryListener *listener, int enable)
{
- struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
+ struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+ memory_listener);
int r;
if (!!enable == dev->log_enabled) {
return 0;
return r;
}
if (dev->log) {
- qemu_free(dev->log);
+ g_free(dev->log);
}
dev->log = NULL;
dev->log_size = 0;
return 0;
}
+static void vhost_log_global_start(MemoryListener *listener)
+{
+ int r;
+
+ r = vhost_migration_log(listener, true);
+ if (r < 0) {
+ abort();
+ }
+}
+
+static void vhost_log_global_stop(MemoryListener *listener)
+{
+ int r;
+
+ r = vhost_migration_log(listener, false);
+ if (r < 0) {
+ abort();
+ }
+}
+
+static void vhost_log_start(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ /* FIXME: implement */
+}
+
+static void vhost_log_stop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ /* FIXME: implement */
+}
+
static int vhost_virtqueue_init(struct vhost_dev *dev,
struct VirtIODevice *vdev,
struct vhost_virtqueue *vq,
};
struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
- if (!vdev->binding->set_host_notifier) {
- fprintf(stderr, "binding does not support host notifiers\n");
- return -ENOSYS;
- }
-
vq->num = state.num = virtio_queue_get_num(vdev, idx);
r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
if (r) {
r = -errno;
goto fail_alloc;
}
- r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true);
- if (r < 0) {
- fprintf(stderr, "Error binding host notifier: %d\n", -r);
- goto fail_host_notifier;
- }
-
file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
if (r) {
fail_call:
fail_kick:
- vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
-fail_host_notifier:
fail_alloc:
cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
0, 0);
.index = idx,
};
int r;
- r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
- if (r < 0) {
- fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r);
- fflush(stderr);
- }
- assert (r >= 0);
r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
if (r < 0) {
fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
0, virtio_queue_get_desc_size(vdev, idx));
}
-int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
+static void vhost_eventfd_add(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool match_data, uint64_t data, EventNotifier *e)
+{
+}
+
+static void vhost_eventfd_del(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool match_data, uint64_t data, EventNotifier *e)
+{
+}
+
+int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
+ bool force)
{
uint64_t features;
int r;
if (devfd >= 0) {
hdev->control = devfd;
} else {
- hdev->control = open("/dev/vhost-net", O_RDWR);
+ hdev->control = open(devpath, O_RDWR);
if (hdev->control < 0) {
return -errno;
}
}
hdev->features = features;
- hdev->client.set_memory = vhost_client_set_memory;
- hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap;
- hdev->client.migration_log = vhost_client_migration_log;
- hdev->client.log_start = NULL;
- hdev->client.log_stop = NULL;
- hdev->mem = qemu_mallocz(offsetof(struct vhost_memory, regions));
+ hdev->memory_listener = (MemoryListener) {
+ .begin = vhost_begin,
+ .commit = vhost_commit,
+ .region_add = vhost_region_add,
+ .region_del = vhost_region_del,
+ .region_nop = vhost_region_nop,
+ .log_start = vhost_log_start,
+ .log_stop = vhost_log_stop,
+ .log_sync = vhost_log_sync,
+ .log_global_start = vhost_log_global_start,
+ .log_global_stop = vhost_log_global_stop,
+ .eventfd_add = vhost_eventfd_add,
+ .eventfd_del = vhost_eventfd_del,
+ .priority = 10
+ };
+ hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
+ hdev->n_mem_sections = 0;
+ hdev->mem_sections = NULL;
hdev->log = NULL;
hdev->log_size = 0;
hdev->log_enabled = false;
hdev->started = false;
- cpu_register_phys_memory_client(&hdev->client);
+ memory_listener_register(&hdev->memory_listener, NULL);
hdev->force = force;
return 0;
fail:
void vhost_dev_cleanup(struct vhost_dev *hdev)
{
- cpu_unregister_phys_memory_client(&hdev->client);
- qemu_free(hdev->mem);
+ memory_listener_unregister(&hdev->memory_listener);
+ g_free(hdev->mem);
+ g_free(hdev->mem_sections);
close(hdev->control);
}
hdev->force;
}
+/* Stop processing guest IO notifications in qemu.
+ * Start processing them in vhost in kernel.
+ */
+int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+ int i, r;
+ if (!vdev->binding->set_host_notifier) {
+ fprintf(stderr, "binding does not support host notifiers\n");
+ r = -ENOSYS;
+ goto fail;
+ }
+
+ for (i = 0; i < hdev->nvqs; ++i) {
+ r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
+ if (r < 0) {
+ fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r);
+ goto fail_vq;
+ }
+ }
+
+ return 0;
+fail_vq:
+ while (--i >= 0) {
+ r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+ if (r < 0) {
+ fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r);
+ fflush(stderr);
+ }
+ assert (r >= 0);
+ }
+fail:
+ return r;
+}
+
+/* Stop processing guest IO notifications in vhost.
+ * Start processing them in qemu.
+ * This might actually run the qemu handlers right away,
+ * so virtio in qemu must be completely setup when this is called.
+ */
+void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+ int i, r;
+
+ for (i = 0; i < hdev->nvqs; ++i) {
+ r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+ if (r < 0) {
+ fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r);
+ fflush(stderr);
+ }
+ assert (r >= 0);
+ }
+}
+
+/* Host notifiers must be enabled at this point. */
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
{
int i, r;
if (hdev->log_enabled) {
hdev->log_size = vhost_get_log_size(hdev);
hdev->log = hdev->log_size ?
- qemu_mallocz(hdev->log_size * sizeof *hdev->log) : NULL;
+ g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
(uint64_t)(unsigned long)hdev->log);
if (r < 0) {
return r;
}
+/* Host notifiers must be enabled at this point. */
void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
{
int i, r;
hdev->vqs + i,
i);
}
- vhost_client_sync_dirty_bitmap(&hdev->client, 0,
- (target_phys_addr_t)~0x0ull);
+ for (i = 0; i < hdev->n_mem_sections; ++i) {
+ vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i],
+ 0, (target_phys_addr_t)~0x0ull);
+ }
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
if (r < 0) {
fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
assert (r >= 0);
hdev->started = false;
- qemu_free(hdev->log);
+ g_free(hdev->log);
+ hdev->log = NULL;
hdev->log_size = 0;
}