pseries: Under kvm use guest cpu = host cpu by default

[qemu.git] / hw / vhost.c
diff --git a/hw/vhost.c b/hw/vhost.c

index 1b8624d9812b3390fbb0716559971f858f3d91a3..0870cb7d8590db00fdc5ec880386665adc4b1a9e 100644 (file)
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -37,6 +37,7 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
          /* We first check with non-atomic: much cheaper,
           * and we expect non-dirty to be the common case. */
          if (!*from) {
+            addr += VHOST_LOG_CHUNK;
              continue;
          }
          /* Data must be read atomically. We don't really
@@ -46,8 +47,10 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
          log = __sync_fetch_and_and(from, 0);
          while ((bit = sizeof(log) > sizeof(int) ?
                  ffsll(log) : ffs(log))) {
+            ram_addr_t ram_addr;
              bit -= 1;
-            cpu_physical_memory_set_dirty(addr + bit * VHOST_LOG_PAGE);
+            ram_addr = cpu_get_physical_page_desc(addr + bit * VHOST_LOG_PAGE);
+            cpu_physical_memory_set_dirty(ram_addr);
              log &= ~(0x1ull << bit);
          }
          addr += VHOST_LOG_CHUNK;
@@ -117,7 +120,6 @@ static void vhost_dev_unassign_memory(struct vhost_dev *dev,
          if (start_addr <= reg->guest_phys_addr && memlast >= reglast) {
              --dev->mem->nregions;
              --to;
-            assert(to >= 0);
              ++overlap_middle;
              continue;
          }
@@ -250,7 +252,7 @@ static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
      uint64_t log_base;
      int r;
      if (size) {
-        log = qemu_mallocz(size * sizeof *log);
+        log = g_malloc0(size * sizeof *log);
      } else {
          log = NULL;
      }
@@ -260,7 +262,7 @@ static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size)
      vhost_client_sync_dirty_bitmap(&dev->client, 0,
                                     (target_phys_addr_t)~0x0ull);
      if (dev->log) {
-        qemu_free(dev->log);
+        g_free(dev->log);
      }
      dev->log = log;
      dev->log_size = size;
@@ -294,10 +296,50 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,
      return 0;
  }
  
+static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev,
+                                                     uint64_t start_addr,
+                                                     uint64_t size)
+{
+    int i, n = dev->mem->nregions;
+    for (i = 0; i < n; ++i) {
+        struct vhost_memory_region *reg = dev->mem->regions + i;
+        if (ranges_overlap(reg->guest_phys_addr, reg->memory_size,
+                           start_addr, size)) {
+            return reg;
+        }
+    }
+    return NULL;
+}
+
+static bool vhost_dev_cmp_memory(struct vhost_dev *dev,
+                                 uint64_t start_addr,
+                                 uint64_t size,
+                                 uint64_t uaddr)
+{
+    struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size);
+    uint64_t reglast;
+    uint64_t memlast;
+
+    if (!reg) {
+        return true;
+    }
+
+    reglast = range_get_last(reg->guest_phys_addr, reg->memory_size);
+    memlast = range_get_last(start_addr, size);
+
+    /* Need to extend region? */
+    if (start_addr < reg->guest_phys_addr || memlast > reglast) {
+        return true;
+    }
+    /* userspace_addr changed? */
+    return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr;
+}
+
  static void vhost_client_set_memory(CPUPhysMemoryClient *client,
                                      target_phys_addr_t start_addr,
                                      ram_addr_t size,
-                                    ram_addr_t phys_offset)
+                                    ram_addr_t phys_offset,
+                                    bool log_dirty)
  {
      struct vhost_dev *dev = container_of(client, struct vhost_dev, client);
      ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK;
@@ -305,10 +347,29 @@ static void vhost_client_set_memory(CPUPhysMemoryClient *client,
          (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
      uint64_t log_size;
      int r;
-    dev->mem = qemu_realloc(dev->mem, s);
+
+    dev->mem = g_realloc(dev->mem, s);
+
+    if (log_dirty) {
+        flags = IO_MEM_UNASSIGNED;
+    }
  
      assert(size);
  
+    /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */
+    if (flags == IO_MEM_RAM) {
+        if (!vhost_dev_cmp_memory(dev, start_addr, size,
+                                  (uintptr_t)qemu_get_ram_ptr(phys_offset))) {
+            /* Region exists with same address. Nothing to do. */
+            return;
+        }
+    } else {
+        if (!vhost_dev_find_reg(dev, start_addr, size)) {
+            /* Removing region that we don't access. Nothing to do. */
+            return;
+        }
+    }
+
      vhost_dev_unassign_memory(dev, start_addr, size);
      if (flags == IO_MEM_RAM) {
          /* Add given mapping, merging adjacent regions if any */
@@ -424,7 +485,7 @@ static int vhost_client_migration_log(CPUPhysMemoryClient *client,
              return r;
          }
          if (dev->log) {
-            qemu_free(dev->log);
+            g_free(dev->log);
          }
          dev->log = NULL;
          dev->log_size = 0;
@@ -454,16 +515,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
      };
      struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
  
-    if (!vdev->binding->set_guest_notifier) {
-        fprintf(stderr, "binding does not support guest notifiers\n");
-        return -ENOSYS;
-    }
-
-    if (!vdev->binding->set_host_notifier) {
-        fprintf(stderr, "binding does not support host notifiers\n");
-        return -ENOSYS;
-    }
-
      vq->num = state.num = virtio_queue_get_num(vdev, idx);
      r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
      if (r) {
@@ -511,27 +562,17 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
          r = -errno;
          goto fail_alloc;
      }
-    r = vdev->binding->set_guest_notifier(vdev->binding_opaque, idx, true);
-    if (r < 0) {
-        fprintf(stderr, "Error binding guest notifier: %d\n", -r);
-        goto fail_guest_notifier;
-    }
-
-    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, true);
-    if (r < 0) {
-        fprintf(stderr, "Error binding host notifier: %d\n", -r);
-        goto fail_host_notifier;
-    }
-
      file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
      r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
      if (r) {
+        r = -errno;
          goto fail_kick;
      }
  
      file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
      r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
      if (r) {
+        r = -errno;
          goto fail_call;
      }
  
@@ -539,10 +580,6 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
  
  fail_call:
  fail_kick:
-    vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
-fail_host_notifier:
-    vdev->binding->set_guest_notifier(vdev->binding_opaque, idx, false);
-fail_guest_notifier:
  fail_alloc:
      cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
                                0, 0);
@@ -568,19 +605,6 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
          .index = idx,
      };
      int r;
-    r = vdev->binding->set_guest_notifier(vdev->binding_opaque, idx, false);
-    if (r < 0) {
-        fprintf(stderr, "vhost VQ %d guest cleanup failed: %d\n", idx, r);
-        fflush(stderr);
-    }
-    assert (r >= 0);
-
-    r = vdev->binding->set_host_notifier(vdev->binding_opaque, idx, false);
-    if (r < 0) {
-        fprintf(stderr, "vhost VQ %d host cleanup failed: %d\n", idx, r);
-        fflush(stderr);
-    }
-    assert (r >= 0);
      r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
      if (r < 0) {
          fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r);
@@ -598,7 +622,7 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
                                0, virtio_queue_get_desc_size(vdev, idx));
  }
  
-int vhost_dev_init(struct vhost_dev *hdev, int devfd)
+int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
  {
      uint64_t features;
      int r;
@@ -624,12 +648,15 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd)
      hdev->client.set_memory = vhost_client_set_memory;
      hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap;
      hdev->client.migration_log = vhost_client_migration_log;
-    hdev->mem = qemu_mallocz(offsetof(struct vhost_memory, regions));
+    hdev->client.log_start = NULL;
+    hdev->client.log_stop = NULL;
+    hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions));
      hdev->log = NULL;
      hdev->log_size = 0;
      hdev->log_enabled = false;
      hdev->started = false;
      cpu_register_phys_memory_client(&hdev->client);
+    hdev->force = force;
      return 0;
  fail:
      r = -errno;
@@ -640,22 +667,94 @@ fail:
  void vhost_dev_cleanup(struct vhost_dev *hdev)
  {
      cpu_unregister_phys_memory_client(&hdev->client);
-    qemu_free(hdev->mem);
+    g_free(hdev->mem);
      close(hdev->control);
  }
  
+bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+    return !vdev->binding->query_guest_notifiers ||
+        vdev->binding->query_guest_notifiers(vdev->binding_opaque) ||
+        hdev->force;
+}
+
+/* Stop processing guest IO notifications in qemu.
+ * Start processing them in vhost in kernel.
+ */
+int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+    int i, r;
+    if (!vdev->binding->set_host_notifier) {
+        fprintf(stderr, "binding does not support host notifiers\n");
+        r = -ENOSYS;
+        goto fail;
+    }
+
+    for (i = 0; i < hdev->nvqs; ++i) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
+        if (r < 0) {
+            fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i, -r);
+            goto fail_vq;
+        }
+    }
+
+    return 0;
+fail_vq:
+    while (--i >= 0) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        if (r < 0) {
+            fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i, -r);
+            fflush(stderr);
+        }
+        assert (r >= 0);
+    }
+fail:
+    return r;
+}
+
+/* Stop processing guest IO notifications in vhost.
+ * Start processing them in qemu.
+ * This might actually run the qemu handlers right away,
+ * so virtio in qemu must be completely setup when this is called.
+ */
+void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
+{
+    int i, r;
+
+    for (i = 0; i < hdev->nvqs; ++i) {
+        r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
+        if (r < 0) {
+            fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i, -r);
+            fflush(stderr);
+        }
+        assert (r >= 0);
+    }
+}
+
+/* Host notifiers must be enabled at this point. */
  int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
  {
      int i, r;
+    if (!vdev->binding->set_guest_notifiers) {
+        fprintf(stderr, "binding does not support guest notifiers\n");
+        r = -ENOSYS;
+        goto fail;
+    }
+
+    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
+    if (r < 0) {
+        fprintf(stderr, "Error binding guest notifier: %d\n", -r);
+        goto fail_notifiers;
+    }
  
      r = vhost_dev_set_features(hdev, hdev->log_enabled);
      if (r < 0) {
-        goto fail;
+        goto fail_features;
      }
      r = ioctl(hdev->control, VHOST_SET_MEM_TABLE, hdev->mem);
      if (r < 0) {
          r = -errno;
-        goto fail;
+        goto fail_mem;
      }
      for (i = 0; i < hdev->nvqs; ++i) {
          r = vhost_virtqueue_init(hdev,
@@ -670,18 +769,19 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
      if (hdev->log_enabled) {
          hdev->log_size = vhost_get_log_size(hdev);
          hdev->log = hdev->log_size ?
-            qemu_mallocz(hdev->log_size * sizeof *hdev->log) : NULL;
+            g_malloc0(hdev->log_size * sizeof *hdev->log) : NULL;
          r = ioctl(hdev->control, VHOST_SET_LOG_BASE,
                    (uint64_t)(unsigned long)hdev->log);
          if (r < 0) {
              r = -errno;
-            goto fail_vq;
+            goto fail_log;
          }
      }
  
      hdev->started = true;
  
      return 0;
+fail_log:
  fail_vq:
      while (--i >= 0) {
          vhost_virtqueue_cleanup(hdev,
@@ -689,13 +789,19 @@ fail_vq:
                                  hdev->vqs + i,
                                  i);
      }
+fail_mem:
+fail_features:
+    vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
+fail_notifiers:
  fail:
      return r;
  }
  
+/* Host notifiers must be enabled at this point. */
  void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
  {
-    int i;
+    int i, r;
+
      for (i = 0; i < hdev->nvqs; ++i) {
          vhost_virtqueue_cleanup(hdev,
                                  vdev,
@@ -704,7 +810,15 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
      }
      vhost_client_sync_dirty_bitmap(&hdev->client, 0,
                                     (target_phys_addr_t)~0x0ull);
+    r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
+    if (r < 0) {
+        fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
+        fflush(stderr);
+    }
+    assert (r >= 0);
+
      hdev->started = false;
-    qemu_free(hdev->log);
+    g_free(hdev->log);
+    hdev->log = NULL;
      hdev->log_size = 0;
  }