migration: fix migrate_cancel leads live_migration thread endless loop

[qemu.git] / migration / ram.c
diff --git a/migration/ram.c b/migration/ram.c

index 4c608692262e0532bbb69fd628bfac556df1f0d9..52a2d498e4d40e7ae5c061dc37fb95e2cb141269 100644 (file)
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -661,8 +661,6 @@ typedef struct {
      uint64_t num_packets;
      /* pages sent through this channel */
      uint64_t num_pages;
-    /* syncs main thread and channels */
-    QemuSemaphore sem_sync;
  }  MultiFDSendParams;
  
  typedef struct {
@@ -894,8 +892,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
  
  struct {
      MultiFDSendParams *params;
-    /* number of created threads */
-    int count;
      /* array of pages to sent */
      MultiFDPages_t *pages;
      /* syncs main thread and channels */
@@ -924,7 +920,7 @@ struct {
   * false.
   */
  
-static void multifd_send_pages(void)
+static int multifd_send_pages(void)
  {
      int i;
      static int next_channel;
@@ -937,6 +933,11 @@ static void multifd_send_pages(void)
          p = &multifd_send_state->params[i];
  
          qemu_mutex_lock(&p->mutex);
+        if (p->quit) {
+            error_report("%s: channel %d has already quit!", __func__, i);
+            qemu_mutex_unlock(&p->mutex);
+            return -1;
+        }
          if (!p->pending_job) {
              p->pending_job++;
              next_channel = (i + 1) % migrate_multifd_channels();
@@ -955,9 +956,11 @@ static void multifd_send_pages(void)
      ram_counters.transferred += transferred;;
      qemu_mutex_unlock(&p->mutex);
      qemu_sem_post(&p->sem);
+
+    return 1;
  }
  
-static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
+static int multifd_queue_page(RAMBlock *block, ram_addr_t offset)
  {
      MultiFDPages_t *pages = multifd_send_state->pages;
  
@@ -972,15 +975,19 @@ static void multifd_queue_page(RAMBlock *block, ram_addr_t offset)
          pages->used++;
  
          if (pages->used < pages->allocated) {
-            return;
+            return 1;
          }
      }
  
-    multifd_send_pages();
+    if (multifd_send_pages() < 0) {
+        return -1;
+    }
  
      if (pages->block != block) {
-        multifd_queue_page(block, offset);
+        return  multifd_queue_page(block, offset);
      }
+
+    return 1;
  }
  
  static void multifd_send_terminate_threads(Error *err)
@@ -1027,7 +1034,6 @@ void multifd_save_cleanup(void)
          p->c = NULL;
          qemu_mutex_destroy(&p->mutex);
          qemu_sem_destroy(&p->sem);
-        qemu_sem_destroy(&p->sem_sync);
          g_free(p->name);
          p->name = NULL;
          multifd_pages_clear(p->pages);
@@ -1054,7 +1060,10 @@ static void multifd_send_sync_main(void)
          return;
      }
      if (multifd_send_state->pages->used) {
-        multifd_send_pages();
+        if (multifd_send_pages() < 0) {
+            error_report("%s: multifd_send_pages fail", __func__);
+            return;
+        }
      }
      for (i = 0; i < migrate_multifd_channels(); i++) {
          MultiFDSendParams *p = &multifd_send_state->params[i];
@@ -1063,6 +1072,12 @@ static void multifd_send_sync_main(void)
  
          qemu_mutex_lock(&p->mutex);
  
+        if (p->quit) {
+            error_report("%s: channel %d has already quit", __func__, i);
+            qemu_mutex_unlock(&p->mutex);
+            return;
+        }
+
          p->packet_num = multifd_send_state->packet_num++;
          p->flags |= MULTIFD_FLAG_SYNC;
          p->pending_job++;
@@ -1174,8 +1189,6 @@ static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
          p->running = true;
          qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
                             QEMU_THREAD_JOINABLE);
-
-        atomic_inc(&multifd_send_state->count);
      }
  }
  
@@ -1191,7 +1204,6 @@ int multifd_save_setup(void)
      thread_count = migrate_multifd_channels();
      multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
      multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
-    atomic_set(&multifd_send_state->count, 0);
      multifd_send_state->pages = multifd_pages_init(page_count);
      qemu_sem_init(&multifd_send_state->sem_sync, 0);
      qemu_sem_init(&multifd_send_state->channels_ready, 0);
@@ -1201,7 +1213,6 @@ int multifd_save_setup(void)
  
          qemu_mutex_init(&p->mutex);
          qemu_sem_init(&p->sem, 0);
-        qemu_sem_init(&p->sem_sync, 0);
          p->quit = false;
          p->pending_job = 0;
          p->id = i;
@@ -1300,15 +1311,15 @@ static void multifd_recv_sync_main(void)
  
          trace_multifd_recv_sync_main_wait(p->id);
          qemu_sem_wait(&multifd_recv_state->sem_sync);
+    }
+    for (i = 0; i < migrate_multifd_channels(); i++) {
+        MultiFDRecvParams *p = &multifd_recv_state->params[i];
+
          qemu_mutex_lock(&p->mutex);
          if (multifd_recv_state->packet_num < p->packet_num) {
              multifd_recv_state->packet_num = p->packet_num;
          }
          qemu_mutex_unlock(&p->mutex);
-    }
-    for (i = 0; i < migrate_multifd_channels(); i++) {
-        MultiFDRecvParams *p = &multifd_recv_state->params[i];
-
          trace_multifd_recv_sync_main_signal(p->id);
          qemu_sem_post(&p->sem_sync);
      }
@@ -1594,25 +1605,30 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
      encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
                                         TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
                                         TARGET_PAGE_SIZE);
+
+    /*
+     * Update the cache contents, so that it corresponds to the data
+     * sent, in all cases except where we skip the page.
+     */
+    if (!last_stage && encoded_len != 0) {
+        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
+        /*
+         * In the case where we couldn't compress, ensure that the caller
+         * sends the data from the cache, since the guest might have
+         * changed the RAM since we copied it.
+         */
+        *current_data = prev_cached_page;
+    }
+
      if (encoded_len == 0) {
          trace_save_xbzrle_page_skipping();
          return 0;
      } else if (encoded_len == -1) {
          trace_save_xbzrle_page_overflow();
          xbzrle_counters.overflow++;
-        /* update data in the cache */
-        if (!last_stage) {
-            memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
-            *current_data = prev_cached_page;
-        }
          return -1;
      }
  
-    /* we need to update the data in the cache, in order to get the same data */
-    if (!last_stage) {
-        memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
-    }
-
      /* Send XBZRLE based compressed page */
      bytes_xbzrle = save_page_header(rs, rs->f, block,
                                      offset | RAM_SAVE_FLAG_XBZRLE);
@@ -1668,6 +1684,33 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
      bool ret;
  
      qemu_mutex_lock(&rs->bitmap_mutex);
+
+    /*
+     * Clear dirty bitmap if needed.  This _must_ be called before we
+     * send any of the page in the chunk because we need to make sure
+     * we can capture further page content changes when we sync dirty
+     * log the next time.  So as long as we are going to send any of
+     * the page in the chunk we clear the remote dirty bitmap for all.
+     * Clearing it earlier won't be a problem, but too late will.
+     */
+    if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) {
+        uint8_t shift = rb->clear_bmap_shift;
+        hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift);
+        hwaddr start = (page << TARGET_PAGE_BITS) & (-size);
+
+        /*
+         * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this
+         * can make things easier sometimes since then start address
+         * of the small chunk will always be 64 pages aligned so the
+         * bitmap will always be aligned to unsigned long.  We should
+         * even be able to remove this restriction but I'm simply
+         * keeping it.
+         */
+        assert(shift >= 6);
+        trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page);
+        memory_region_clear_dirty_bitmap(rb->mr, start, size);
+    }
+
      ret = test_and_clear_bit(page, rb->bmap);
  
      if (ret) {
@@ -1678,6 +1721,7 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs,
      return ret;
  }
  
+/* Called with RCU critical section */
  static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
                                          ram_addr_t length)
  {
@@ -2009,7 +2053,9 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
  static int ram_save_multifd_page(RAMState *rs, RAMBlock *block,
                                   ram_addr_t offset)
  {
-    multifd_queue_page(block, offset);
+    if (multifd_queue_page(block, offset) < 0) {
+        return -1;
+    }
      ram_counters.normal++;
  
      return 1;
@@ -2236,7 +2282,7 @@ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
  }
  
  /**
- * get_queued_page: unqueue a page from the postocpy requests
+ * get_queued_page: unqueue a page from the postcopy requests
   *
   * Skips pages that are already sent (!dirty)
   *
@@ -2290,6 +2336,12 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
           */
          pss->block = block;
          pss->page = offset >> TARGET_PAGE_BITS;
+
+        /*
+         * This unqueued page would break the "one round" check, even is
+         * really rare.
+         */
+        pss->complete_round = false;
      }
  
      return !!block;
@@ -2684,6 +2736,8 @@ static void ram_save_cleanup(void *opaque)
      memory_global_dirty_log_stop();
  
      RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+        g_free(block->clear_bmap);
+        block->clear_bmap = NULL;
          g_free(block->bmap);
          block->bmap = NULL;
          g_free(block->unsentmap);
@@ -2772,8 +2826,7 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms)
   *
   * @ms: current migration state
   * @pds: state for postcopy
- * @start: RAMBlock starting page
- * @length: RAMBlock size
+ * @block: RAMBlock to discard
   */
  static int postcopy_send_discard_bm_ram(MigrationState *ms,
                                          PostcopyDiscardState *pds,
@@ -2972,7 +3025,7 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
  }
  
  /**
- * postcopy_chuck_hostpages: discrad any partially sent host page
+ * postcopy_chunk_hostpages: discard any partially sent host page
   *
   * Utility for the outgoing postcopy code.
   *
@@ -3184,9 +3237,9 @@ static int ram_state_init(RAMState **rsp)
      /*
       * Count the total number of pages used by ram blocks not including any
       * gaps due to alignment or unplugs.
+     * This must match with the initial values of dirty bitmap.
       */
      (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
-
      ram_state_reset(*rsp);
  
      return 0;
@@ -3194,15 +3247,39 @@ static int ram_state_init(RAMState **rsp)
  
  static void ram_list_init_bitmaps(void)
  {
+    MigrationState *ms = migrate_get_current();
      RAMBlock *block;
      unsigned long pages;
+    uint8_t shift;
  
      /* Skip setting bitmap if there is no RAM */
      if (ram_bytes_total()) {
+        shift = ms->clear_bitmap_shift;
+        if (shift > CLEAR_BITMAP_SHIFT_MAX) {
+            error_report("clear_bitmap_shift (%u) too big, using "
+                         "max value (%u)", shift, CLEAR_BITMAP_SHIFT_MAX);
+            shift = CLEAR_BITMAP_SHIFT_MAX;
+        } else if (shift < CLEAR_BITMAP_SHIFT_MIN) {
+            error_report("clear_bitmap_shift (%u) too small, using "
+                         "min value (%u)", shift, CLEAR_BITMAP_SHIFT_MIN);
+            shift = CLEAR_BITMAP_SHIFT_MIN;
+        }
+
          RAMBLOCK_FOREACH_NOT_IGNORED(block) {
              pages = block->max_length >> TARGET_PAGE_BITS;
+            /*
+             * The initial dirty bitmap for migration must be set with all
+             * ones to make sure we'll migrate every guest RAM page to
+             * destination.
+             * Here we set RAMBlock.bmap all to 1 because when rebegin a
+             * new migration after a failed migration, ram_list.
+             * dirty_memory[DIRTY_MEMORY_MIGRATION] don't include the whole
+             * guest memory.
+             */
              block->bmap = bitmap_new(pages);
              bitmap_set(block->bmap, 0, pages);
+            block->clear_bmap_shift = shift;
+            block->clear_bmap = bitmap_new(clear_bmap_size(pages, shift));
              if (migrate_postcopy_ram()) {
                  block->unsentmap = bitmap_new(pages);
                  bitmap_set(block->unsentmap, 0, pages);
@@ -3371,7 +3448,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
          }
          if (migrate_ignore_shared()) {
              qemu_put_be64(f, block->mr->addr);
-            qemu_put_byte(f, ramblock_is_ignored(block) ? 1 : 0);
          }
      }
  
@@ -3467,8 +3543,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
       */
      ram_control_after_iterate(f, RAM_CONTROL_ROUND);
  
-    multifd_send_sync_main();
  out:
+    multifd_send_sync_main();
      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
      qemu_fflush(f);
      ram_counters.transferred += 8;
@@ -4338,12 +4414,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                      }
                      if (migrate_ignore_shared()) {
                          hwaddr addr = qemu_get_be64(f);
-                        bool ignored = qemu_get_byte(f);
-                        if (ignored != ramblock_is_ignored(block)) {
-                            error_report("RAM block %s should %s be migrated",
-                                         id, ignored ? "" : "not");
-                            ret = -EINVAL;
-                        }
                          if (ramblock_is_ignored(block) &&
                              block->mr->addr != addr) {
                              error_report("Mismatched GPAs for block %s "