migration: fix multifd_recv event typo

[qemu.git] / migration / postcopy-ram.c
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c

index 8ceeaa2a93f629df13a7dd72a039aba580e5b4fa..9faacacc9eb72c16f8212f678c9fc7fb63c0676b 100644 (file)
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -29,6 +29,7 @@
  #include "sysemu/balloon.h"
  #include "qemu/error-report.h"
  #include "trace.h"
+#include "hw/boards.h"
  
  /* Arbitrary limit on size of each discard command,
   * keeps them around ~200 bytes
@@ -128,6 +129,8 @@ static void migration_exit_cb(Notifier *n, void *data)
  
  static struct PostcopyBlocktimeContext *blocktime_context_new(void)
  {
+    MachineState *ms = MACHINE(qdev_get_machine());
+    unsigned int smp_cpus = ms->smp.cpus;
      PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
      ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
      ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
@@ -141,10 +144,11 @@ static struct PostcopyBlocktimeContext *blocktime_context_new(void)
  
  static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
  {
+    MachineState *ms = MACHINE(qdev_get_machine());
      uint32List *list = NULL, *entry = NULL;
      int i;
  
-    for (i = smp_cpus - 1; i >= 0; i--) {
+    for (i = ms->smp.cpus - 1; i >= 0; i--) {
          entry = g_new0(uint32List, 1);
          entry->value = ctx->vcpu_blocktime[i];
          entry->next = list;
@@ -319,10 +323,10 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis)
  
  /* Callback from postcopy_ram_supported_by_host block iterator.
   */
-static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
-                             ram_addr_t offset, ram_addr_t length, void *opaque)
+static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque)
  {
-    RAMBlock *rb = qemu_ram_block_by_name(block_name);
+    const char *block_name = qemu_ram_get_idstr(rb);
+    ram_addr_t length = qemu_ram_get_used_length(rb);
      size_t pagesize = qemu_ram_pagesize(rb);
  
      if (length % pagesize) {
@@ -374,7 +378,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
      }
  
      /* We don't support postcopy with shared RAM yet */
-    if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) {
+    if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) {
          goto out;
      }
  
@@ -443,9 +447,12 @@ out:
   * must be done right at the start prior to pre-copy.
   * opaque should be the MIS.
   */
-static int init_range(const char *block_name, void *host_addr,
-                      ram_addr_t offset, ram_addr_t length, void *opaque)
+static int init_range(RAMBlock *rb, void *opaque)
  {
+    const char *block_name = qemu_ram_get_idstr(rb);
+    void *host_addr = qemu_ram_get_host_addr(rb);
+    ram_addr_t offset = qemu_ram_get_offset(rb);
+    ram_addr_t length = qemu_ram_get_used_length(rb);
      trace_postcopy_init_range(block_name, host_addr, offset, length);
  
      /*
@@ -465,9 +472,12 @@ static int init_range(const char *block_name, void *host_addr,
   * At the end of migration, undo the effects of init_range
   * opaque should be the MIS.
   */
-static int cleanup_range(const char *block_name, void *host_addr,
-                        ram_addr_t offset, ram_addr_t length, void *opaque)
+static int cleanup_range(RAMBlock *rb, void *opaque)
  {
+    const char *block_name = qemu_ram_get_idstr(rb);
+    void *host_addr = qemu_ram_get_host_addr(rb);
+    ram_addr_t offset = qemu_ram_get_offset(rb);
+    ram_addr_t length = qemu_ram_get_used_length(rb);
      MigrationIncomingState *mis = opaque;
      struct uffdio_range range_struct;
      trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
@@ -500,15 +510,29 @@ static int cleanup_range(const char *block_name, void *host_addr,
   * postcopy later; must be called prior to any precopy.
   * called from arch_init's similarly named ram_postcopy_incoming_init
   */
-int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
+int postcopy_ram_incoming_init(MigrationIncomingState *mis)
  {
-    if (qemu_ram_foreach_block(init_range, NULL)) {
+    if (foreach_not_ignored_block(init_range, NULL)) {
          return -1;
      }
  
      return 0;
  }
  
+/*
+ * Manage a single vote to the QEMU balloon inhibitor for all postcopy usage,
+ * last caller wins.
+ */
+static void postcopy_balloon_inhibit(bool state)
+{
+    static bool cur_state = false;
+
+    if (state != cur_state) {
+        qemu_balloon_inhibit(state);
+        cur_state = state;
+    }
+}
+
  /*
   * At the end of a migration where postcopy_ram_incoming_init was called.
   */
@@ -519,19 +543,20 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
      if (mis->have_fault_thread) {
          Error *local_err = NULL;
  
+        /* Let the fault thread quit */
+        atomic_set(&mis->fault_thread_quit, 1);
+        postcopy_fault_thread_notify(mis);
+        trace_postcopy_ram_incoming_cleanup_join();
+        qemu_thread_join(&mis->fault_thread);
+
          if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_END, &local_err)) {
              error_report_err(local_err);
              return -1;
          }
  
-        if (qemu_ram_foreach_block(cleanup_range, mis)) {
+        if (foreach_not_ignored_block(cleanup_range, mis)) {
              return -1;
          }
-        /* Let the fault thread quit */
-        atomic_set(&mis->fault_thread_quit, 1);
-        postcopy_fault_thread_notify(mis);
-        trace_postcopy_ram_incoming_cleanup_join();
-        qemu_thread_join(&mis->fault_thread);
  
          trace_postcopy_ram_incoming_cleanup_closeuf();
          close(mis->userfault_fd);
@@ -539,7 +564,7 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
          mis->have_fault_thread = false;
      }
  
-    qemu_balloon_inhibit(false);
+    postcopy_balloon_inhibit(false);
  
      if (enable_mlock) {
          if (os_mlock() < 0) {
@@ -571,9 +596,12 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
  /*
   * Disable huge pages on an area
   */
-static int nhp_range(const char *block_name, void *host_addr,
-                    ram_addr_t offset, ram_addr_t length, void *opaque)
+static int nhp_range(RAMBlock *rb, void *opaque)
  {
+    const char *block_name = qemu_ram_get_idstr(rb);
+    void *host_addr = qemu_ram_get_host_addr(rb);
+    ram_addr_t offset = qemu_ram_get_offset(rb);
+    ram_addr_t length = qemu_ram_get_used_length(rb);
      trace_postcopy_nhp_range(block_name, host_addr, offset, length);
  
      /*
@@ -593,7 +621,7 @@ static int nhp_range(const char *block_name, void *host_addr,
   */
  int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
  {
-    if (qemu_ram_foreach_block(nhp_range, mis)) {
+    if (foreach_not_ignored_block(nhp_range, mis)) {
          return -1;
      }
  
@@ -604,22 +632,20 @@ int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
  
  /*
   * Mark the given area of RAM as requiring notification to unwritten areas
- * Used as a  callback on qemu_ram_foreach_block.
+ * Used as a  callback on foreach_not_ignored_block.
   *   host_addr: Base of area to mark
   *   offset: Offset in the whole ram arena
   *   length: Length of the section
   *   opaque: MigrationIncomingState pointer
   * Returns 0 on success
   */
-static int ram_block_enable_notify(const char *block_name, void *host_addr,
-                                   ram_addr_t offset, ram_addr_t length,
-                                   void *opaque)
+static int ram_block_enable_notify(RAMBlock *rb, void *opaque)
  {
      MigrationIncomingState *mis = opaque;
      struct uffdio_register reg_struct;
  
-    reg_struct.range.start = (uintptr_t)host_addr;
-    reg_struct.range.len = length;
+    reg_struct.range.start = (uintptr_t)qemu_ram_get_host_addr(rb);
+    reg_struct.range.len = qemu_ram_get_used_length(rb);
      reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
  
      /* Now tell our userfault_fd that it's responsible for this area */
@@ -632,7 +658,6 @@ static int ram_block_enable_notify(const char *block_name, void *host_addr,
          return -1;
      }
      if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) {
-        RAMBlock *rb = qemu_ram_block_by_name(block_name);
          qemu_ram_set_uf_zeroable(rb);
      }
  
@@ -786,6 +811,8 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
  {
      MigrationIncomingState *mis = migration_incoming_get_current();
      PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+    MachineState *ms = MACHINE(qdev_get_machine());
+    unsigned int smp_cpus = ms->smp.cpus;
      int i, affected_cpu = 0;
      bool vcpu_total_blocktime = false;
      uint32_t read_vcpu_time, low_time_offset;
@@ -830,6 +857,17 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
                                        affected_cpu);
  }
  
+static bool postcopy_pause_fault_thread(MigrationIncomingState *mis)
+{
+    trace_postcopy_pause_fault_thread();
+
+    qemu_sem_wait(&mis->postcopy_pause_sem_fault);
+
+    trace_postcopy_pause_fault_thread_continued();
+
+    return true;
+}
+
  /*
   * Handle faults detected by the USERFAULT markings
   */
@@ -842,6 +880,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
      RAMBlock *rb = NULL;
  
      trace_postcopy_ram_fault_thread_entry();
+    rcu_register_thread();
      mis->last_rb = NULL; /* last RAMBlock we sent part of */
      qemu_sem_post(&mis->fault_thread_sem);
  
@@ -880,6 +919,22 @@ static void *postcopy_ram_fault_thread(void *opaque)
              break;
          }
  
+        if (!mis->to_src_file) {
+            /*
+             * Possibly someone tells us that the return path is
+             * broken already using the event. We should hold until
+             * the channel is rebuilt.
+             */
+            if (postcopy_pause_fault_thread(mis)) {
+                mis->last_rb = NULL;
+                /* Continue to read the userfaultfd */
+            } else {
+                error_report("%s: paused but don't allow to continue",
+                             __func__);
+                break;
+            }
+        }
+
          if (pfd[1].revents) {
              uint64_t tmp64 = 0;
  
@@ -942,18 +997,37 @@ static void *postcopy_ram_fault_thread(void *opaque)
                      (uintptr_t)(msg.arg.pagefault.address),
                                  msg.arg.pagefault.feat.ptid, rb);
  
+retry:
              /*
               * Send the request to the source - we want to request one
               * of our host page sizes (which is >= TPS)
               */
              if (rb != mis->last_rb) {
                  mis->last_rb = rb;
-                migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
-                                         rb_offset, qemu_ram_pagesize(rb));
+                ret = migrate_send_rp_req_pages(mis,
+                                                qemu_ram_get_idstr(rb),
+                                                rb_offset,
+                                                qemu_ram_pagesize(rb));
              } else {
                  /* Save some space */
-                migrate_send_rp_req_pages(mis, NULL,
-                                         rb_offset, qemu_ram_pagesize(rb));
+                ret = migrate_send_rp_req_pages(mis,
+                                                NULL,
+                                                rb_offset,
+                                                qemu_ram_pagesize(rb));
+            }
+
+            if (ret) {
+                /* May be network failure, try to wait for recovery */
+                if (ret == -EIO && postcopy_pause_fault_thread(mis)) {
+                    /* We got reconnected somehow, try to continue */
+                    mis->last_rb = NULL;
+                    goto retry;
+                } else {
+                    /* This is a unavoidable fault */
+                    error_report("%s: migrate_send_rp_req_pages() get %d",
+                                 __func__, ret);
+                    break;
+                }
              }
          }
  
@@ -1013,6 +1087,7 @@ static void *postcopy_ram_fault_thread(void *opaque)
              }
          }
      }
+    rcu_unregister_thread();
      trace_postcopy_ram_fault_thread_exit();
      g_free(pfd);
      return NULL;
@@ -1053,7 +1128,8 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
      mis->have_fault_thread = true;
  
      /* Mark so that we get notified of accesses to unwritten areas */
-    if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
+    if (foreach_not_ignored_block(ram_block_enable_notify, mis)) {
+        error_report("ram_block_enable_notify failed");
          return -1;
      }
  
@@ -1061,7 +1137,7 @@ int postcopy_ram_enable_notify(MigrationIncomingState *mis)
       * Ballooning can mark pages as absent while we're postcopying
       * that would cause false userfaults.
       */
-    qemu_balloon_inhibit(true);
+    postcopy_balloon_inhibit(true);
  
      trace_postcopy_ram_enable_notify();
  
@@ -1219,7 +1295,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
      return false;
  }
  
-int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
+int postcopy_ram_incoming_init(MigrationIncomingState *mis)
  {
      error_report("postcopy_ram_incoming_init: No OS support");
      return -1;