usb-redir: Get rid of local shadow copy of packet headers

[qemu.git] / hw / vhost.c
diff --git a/hw/vhost.c b/hw/vhost.c

index 14b571d07ced3ed76b83efab8cfcf4a9a81f0694..d0ce5aad9b76f57a021fb3dc19b2e71917a3681f 100644 (file)
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -8,6 +8,9 @@
   *
   * This work is licensed under the terms of the GNU GPL, version 2.  See
   * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
   */
  
  #include <sys/ioctl.h>
@@ -15,8 +18,10 @@
  #include "hw/hw.h"
  #include "range.h"
  #include <linux/vhost.h>
+#include "exec-memory.h"
  
  static void vhost_dev_sync_region(struct vhost_dev *dev,
+                                  MemoryRegionSection *section,
                                    uint64_t mfirst, uint64_t mlast,
                                    uint64_t rfirst, uint64_t rlast)
  {
@@ -26,11 +31,12 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
      vhost_log_chunk_t *to = dev->log + end / VHOST_LOG_CHUNK + 1;
      uint64_t addr = (start / VHOST_LOG_CHUNK) * VHOST_LOG_CHUNK;
  
-    assert(end / VHOST_LOG_CHUNK < dev->log_size);
-    assert(start / VHOST_LOG_CHUNK < dev->log_size);
      if (end < start) {
          return;
      }
+    assert(end / VHOST_LOG_CHUNK < dev->log_size);
+    assert(start / VHOST_LOG_CHUNK < dev->log_size);
+
      for (;from < to; ++from) {
          vhost_log_chunk_t log;
          int bit;
@@ -49,38 +55,50 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
                  ffsll(log) : ffs(log))) {
              ram_addr_t ram_addr;
              bit -= 1;
-            ram_addr = cpu_get_physical_page_desc(addr + bit * VHOST_LOG_PAGE);
-            cpu_physical_memory_set_dirty(ram_addr);
+            ram_addr = section->offset_within_region + bit * VHOST_LOG_PAGE;
+            memory_region_set_dirty(section->mr, ram_addr, VHOST_LOG_PAGE);
              log &= ~(0x1ull << bit);
          }
          addr += VHOST_LOG_CHUNK;
      }
  }
  
-static int vhost_client_sync_dirty_bitmap(CPUPhysMemoryClient *client,
-                                          target_phys_addr_t start_addr,
-                                          target_phys_addr_t end_addr)
+static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
+                                   MemoryRegionSection *section,
+                                   target_phys_addr_t start_addr,
+                                   target_phys_addr_t end_addr)
  {
-    struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
      int i;
+
      if (!dev->log_enabled || !dev->started) {
          return 0;
      }
      for (i = 0; i < dev->mem->nregions; ++i) {
          struct vhost_memory_region *reg = dev->mem->regions + i;
-        vhost_dev_sync_region(dev, start_addr, end_addr,
+        vhost_dev_sync_region(dev, section, start_addr, end_addr,
                                reg->guest_phys_addr,
                                range_get_last(reg->guest_phys_addr,
                                               reg->memory_size));
      }
      for (i = 0; i < dev->nvqs; ++i) {
          struct vhost_virtqueue *vq = dev->vqs + i;
-        vhost_dev_sync_region(dev, start_addr, end_addr, vq->used_phys,
+        vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
                                range_get_last(vq->used_phys, vq->used_size));
      }
      return 0;
  }
  
+static void vhost_log_sync(MemoryListener *listener,
+                          MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+    target_phys_addr_t start_addr = section->offset_within_address_space;
+    target_phys_addr_t end_addr = start_addr + section->size;
+
+    vhost_sync_dirty_bitmap(dev, section, start_addr, end_addr);
+}
+
  /* Assign/unassign. Keep an unsorted array of non-overlapping
   * memory regions in dev->mem. */
  static void vhost_dev_unassign_memory(struct vhost_dev *dev,
@@ -120,7 +138,6 @@ static void vhost_dev_unassign_memory(struct vhost_dev *dev,
          if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
              --dev->mem->nregions;
              --to;
-            assert(to >= 0);
              ++overlap_middle;
              continue;
          }
@@ -251,19 +268,22 @@ static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
  {
      vhost_log_chunk_t *log;
      uint64_t log_base;
-    int r;
+    int r, i;
      if (size) {
-        log = qemu_mallocz(size * sizeof *log);
+        log = g_malloc0(size * sizeof *log);
      } else {
          log = NULL;
      }
      log_base = (uint64_t)(unsigned long)log;
      r = ioctl(dev->control, VHOST_SET_LOG_BASE, &log_base);
      assert(r >= 0);
-    vhost_client_sync_dirty_bitmap(&dev->client, 0,
-                                   (target_phys_addr_t)~0x0ull);
+    for (i = 0; i < dev->n_mem_sections; ++i) {
+        /* Sync only the range covered by the old log */
+        vhost_sync_dirty_bitmap(dev, &dev->mem_sections[i], 0,
+                                dev->log_size * VHOST_LOG_CHUNK - 1);
+    }
      if (dev->log) {
-        qemu_free(dev->log);
+        g_free(dev->log);
      }
      dev->log = log;
      dev->log_size = size;
@@ -297,26 +317,86 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
      return 0;
  }
  
-static void vhost_client_set_memory(CPUPhysMemoryClient *client,
-                                    target_phys_addr_t start_addr,
-                                    ram_addr_t size,
-                                    ram_addr_t phys_offset)
+static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev,
+                                                     uint64_t start_addr,
+                                                     uint64_t size)
  {
-    struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
-    ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
+    int i, n = dev->mem->nregions;
+    for (i = 0; i < n; ++i) {
+        struct vhost_memory_region *reg = dev->mem->regions + i;
+        if (ranges_overlap(reg->guest_phys_addr, reg->memory_size,
+                           start_addr, size)) {
+            return reg;
+        }
+    }
+    return NULL;
+}
+
+static bool vhost_dev_cmp_memory(struct vhost_dev *dev,
+                                 uint64_t start_addr,
+                                 uint64_t size,
+                                 uint64_t uaddr)
+{
+    struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size);
+    uint64_t reglast;
+    uint64_t memlast;
+
+    if (!reg) {
+        return true;
+    }
+
+    reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
+    memlast = range_get_last(start_addr, size);
+
+    /* Need to extend region? */
+    if (start_addr < reg->guest_phys_addr || memlast > reglast) {
+        return true;
+    }
+    /* userspace_addr changed? */
+    return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr;
+}
+
+static void vhost_set_memory(MemoryListener *listener,
+                             MemoryRegionSection *section,
+                             bool add)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+    target_phys_addr_t start_addr = section->offset_within_address_space;
+    ram_addr_t size = section->size;
+    bool log_dirty = memory_region_is_logging(section->mr);
      int s = offsetof(struct vhost_memory, regions) +
          (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
      uint64_t log_size;
      int r;
-    dev->mem = qemu_realloc(dev->mem, s);
+    void *ram;
+
+    dev->mem = g_realloc(dev->mem, s);
+
+    if (log_dirty) {
+        add = false;
+    }
  
      assert(size);
  
+    /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
+    ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region;
+    if (add) {
+        if (!vhost_dev_cmp_memory(dev, start_addr, size, (uintptr_t)ram)) {
+            /* Region exists with same address. Nothing to do. */
+            return;
+        }
+    } else {
+        if (!vhost_dev_find_reg(dev, start_addr, size)) {
+            /* Removing region that we don't access. Nothing to do. */
+            return;
+        }
+    }
+
      vhost_dev_unassign_memory(dev, start_addr, size);
-    if (flags == IO_MEM_RAM) {
+    if (add) {
          /* Add given mapping, merging adjacent regions if any */
-        vhost_dev_assign_memory(dev, start_addr, size,
-                                (uintptr_t)qemu_get_ram_ptr(phys_offset));
+        vhost_dev_assign_memory(dev, start_addr, size, (uintptr_t)ram);
      } else {
          /* Remove old mapping for this memory, if any. */
          vhost_dev_unassign_memory(dev, start_addr, size);
@@ -352,6 +432,65 @@ static void vhost_client_set_memory(CPUPhysMemoryClient *client,
      }
  }
  
+static bool vhost_section(MemoryRegionSection *section)
+{
+    return section->address_space == get_system_memory()
+        && memory_region_is_ram(section->mr);
+}
+
+static void vhost_begin(MemoryListener *listener)
+{
+}
+
+static void vhost_commit(MemoryListener *listener)
+{
+}
+
+static void vhost_region_add(MemoryListener *listener,
+                             MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+
+    if (!vhost_section(section)) {
+        return;
+    }
+
+    ++dev->n_mem_sections;
+    dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections,
+                                dev->n_mem_sections);
+    dev->mem_sections[dev->n_mem_sections - 1] = *section;
+    vhost_set_memory(listener, section, true);
+}
+
+static void vhost_region_del(MemoryListener *listener,
+                             MemoryRegionSection *section)
+{
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
+    int i;
+
+    if (!vhost_section(section)) {
+        return;
+    }
+
+    vhost_set_memory(listener, section, false);
+    for (i = 0; i < dev->n_mem_sections; ++i) {
+        if (dev->mem_sections[i].offset_within_address_space
+            == section->offset_within_address_space) {
+            --dev->n_mem_sections;
+            memmove(&dev->mem_sections[i], &dev->mem_sections[i+1],
+                    (dev->n_mem_sections - i) * sizeof(*dev->mem_sections));
+            break;
+        }
+    }
+}
+
+static void vhost_region_nop(MemoryListener *listener,
+                             MemoryRegionSection *section)
+{
+}
+
  static int vhost_virtqueue_set_addr(struct vhost_dev *dev,
                                      struct vhost_virtqueue *vq,
                                      unsigned idx, bool enable_log)
@@ -409,10 +548,10 @@ err_features:
      return r;
  }
  
-static int vhost_client_migration_log(CPUPhysMemoryClient *client,
-                                      int enable)
+static int vhost_migration_log(MemoryListener *listener, int enable)
  {
-    struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
+    struct vhost_dev *dev = container_of(listener, struct vhost_dev,
+                                         memory_listener);
      int r;
      if (!!enable == dev->log_enabled) {
          return 0;
@@ -427,7 +566,7 @@ static int vhost_client_migration_log(CPUPhysMemoryClient *client,
              return r;
          }
          if (dev->log) {
-            qemu_free(dev->log);
+            g_free(dev->log);
          }
          dev->log = NULL;
          dev->log_size = 0;
@@ -442,6 +581,38 @@ static int vhost_client_migration_log(CPUPhysMemoryClient *client,
      return 0;
  }
  
+static void vhost_log_global_start(MemoryListener *listener)
+{
+    int r;
+
+    r = vhost_migration_log(listener, true);
+    if (r < 0) {
+        abort();
+    }
+}
+
+static void vhost_log_global_stop(MemoryListener *listener)
+{
+    int r;
+
+    r = vhost_migration_log(listener, false);
+    if (r < 0) {
+        abort();
+    }
+}
+
+static void vhost_log_start(MemoryListener *listener,
+                            MemoryRegionSection *section)
+{
+    /* FIXME: implement */
+}
+
+static void vhost_log_stop(MemoryListener *listener,
+                           MemoryRegionSection *section)
+{
+    /* FIXME: implement */
+}
+
  static int vhost_virtqueue_init(struct vhost_dev *dev,
                                  struct VirtIODevice *vdev,
                                  struct vhost_virtqueue *vq,
@@ -457,11 +628,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
      };
      struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
  
-    if (!vdev->binding->set_host_notifier) {
-        fprintf(stderr, "binding does not support host notifiers\n");
-        return -ENOSYS;
-    }
-
      vq->num = state.num = virtio_queue_get_num(vdev, idx);
      r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
      if (r) {
@@ -509,12 +675,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
          r = -errno;
          goto fail_alloc;
      }
-    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true);
-    if (r < 0) {
-        fprintf(stderr, "Error binding host notifier: %d\n", -r);
-        goto fail_host_notifier;
-    }
-
      file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
      r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
      if (r) {
@@ -533,8 +693,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
  
  fail_call:
  fail_kick:
-    vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
-fail_host_notifier:
  fail_alloc:
      cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
                                0, 0);
@@ -560,12 +718,6 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
          .index = idx,
      };
      int r;
-    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
-    if (r < 0) {
-        fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r);
-        fflush(stderr);
-    }
-    assert (r >= 0);
      r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
      if (r < 0) {
          fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
@@ -583,14 +735,27 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
                                0, virtio_queue_get_desc_size(vdev, idx));
  }
  
-int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
+static void vhost_eventfd_add(MemoryListener *listener,
+                              MemoryRegionSection *section,
+                              bool match_data, uint64_t data, EventNotifier *e)
+{
+}
+
+static void vhost_eventfd_del(MemoryListener *listener,
+                              MemoryRegionSection *section,
+                              bool match_data, uint64_t data, EventNotifier *e)
+{
+}
+
+int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
+                   bool force)
  {
      uint64_t features;
      int r;
      if (devfd >= 0) {
          hdev->control = devfd;
      } else {
-        hdev->control = open("/dev/vhost-net", O_RDWR);
+        hdev->control = open(devpath, O_RDWR);
          if (hdev->control < 0) {
              return -errno;
          }
@@ -606,17 +771,29 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
      }
      hdev->features = features;
  
-    hdev->client.set_memory = vhost_client_set_memory;
-    hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap;
-    hdev->client.migration_log = vhost_client_migration_log;
-    hdev->client.log_start = NULL;
-    hdev->client.log_stop = NULL;
-    hdev->mem = qemu_mallocz(offsetof(struct vhost_memory, regions));
+    hdev->memory_listener = (MemoryListener) {
+        .begin = vhost_begin,
+        .commit = vhost_commit,
+        .region_add = vhost_region_add,
+        .region_del = vhost_region_del,
+        .region_nop = vhost_region_nop,
+        .log_start = vhost_log_start,
+        .log_stop = vhost_log_stop,
+        .log_sync = vhost_log_sync,
+        .log_global_start = vhost_log_global_start,
+        .log_global_stop = vhost_log_global_stop,
+        .eventfd_add = vhost_eventfd_add,
+        .eventfd_del = vhost_eventfd_del,
+        .priority = 10
+    };
+    hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
+    hdev->n_mem_sections = 0;
+    hdev->mem_sections = NULL;
      hdev->log = NULL;
      hdev->log_size = 0;
      hdev->log_enabled = false;
      hdev->started = false;
-    cpu_register_phys_memory_client(&hdev->client);
+    memory_listener_register(&hdev->memory_listener, NULL);
      hdev->force = force;
      return 0;
  fail:
@@ -627,8 +804,9 @@ fail:
  
  void vhost_dev_cleanup(struct vhost_dev *hdev)
  {
-    cpu_unregister_phys_memory_client(&hdev->client);
-    qemu_free(hdev->mem);
+    memory_listener_unregister(&hdev->memory_listener);
+    g_free(hdev->mem);
+    g_free(hdev->mem_sections);
      close(hdev->control);
  }
  
@@ -639,6 +817,60 @@ bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
          hdev->force;
  }
  
+/* Stop processing guest IO notifications in qemu.
+ * Start processing them in vhost in kernel.
+ */
+int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+    int i, r;
+    if (!vdev->binding->set_host_notifier) {
+        fprintf(stderr, "binding does not support host notifiers\n");
+        r = -ENOSYS;
+        goto fail;
+    }
+
+    for (i = 0; i < hdev->nvqs; ++i) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
+        if (r < 0) {
+            fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r);
+            goto fail_vq;
+        }
+    }
+
+    return 0;
+fail_vq:
+    while (--i >= 0) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        if (r < 0) {
+            fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r);
+            fflush(stderr);
+        }
+        assert (r >= 0);
+    }
+fail:
+    return r;
+}
+
+/* Stop processing guest IO notifications in vhost.
+ * Start processing them in qemu.
+ * This might actually run the qemu handlers right away,
+ * so virtio in qemu must be completely setup when this is called.
+ */
+void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+    int i, r;
+
+    for (i = 0; i < hdev->nvqs; ++i) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        if (r < 0) {
+            fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r);
+            fflush(stderr);
+        }
+        assert (r >= 0);
+    }
+}
+
+/* Host notifiers must be enabled at this point. */
  int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
  {
      int i, r;
@@ -676,7 +908,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
      if (hdev->log_enabled) {
          hdev->log_size = vhost_get_log_size(hdev);
          hdev->log = hdev->log_size ?
-            qemu_mallocz(hdev->log_size * sizeof *hdev->log) : NULL;
+            g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
          r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
                    (uint64_t)(unsigned long)hdev->log);
          if (r < 0) {
@@ -704,6 +936,7 @@ fail:
      return r;
  }
  
+/* Host notifiers must be enabled at this point. */
  void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
  {
      int i, r;
@@ -714,8 +947,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
                                  hdev->vqs + i,
                                  i);
      }
-    vhost_client_sync_dirty_bitmap(&hdev->client, 0,
-                                   (target_phys_addr_t)~0x0ull);
+    for (i = 0; i < hdev->n_mem_sections; ++i) {
+        vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i],
+                                0, (target_phys_addr_t)~0x0ull);
+    }
      r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
      if (r < 0) {
          fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
@@ -724,6 +959,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
      assert (r >= 0);
  
      hdev->started = false;
-    qemu_free(hdev->log);
+    g_free(hdev->log);
+    hdev->log = NULL;
      hdev->log_size = 0;
  }