migration: multifd_send_thread always post p->sem_sync when error happen

[qemu.git] / migration / colo.c
diff --git a/migration/colo.c b/migration/colo.c

index 4381067ed4c54dd8b93ae625ec29ed2a706bded6..2c88aa57a29307963a15fc017b1d8979e464f4c0 100644 (file)
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -23,10 +23,26 @@
  #include "io/channel-buffer.h"
  #include "trace.h"
  #include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/rcu.h"
  #include "migration/failover.h"
+#ifdef CONFIG_REPLICATION
  #include "replication.h"
+#endif
+#include "net/colo-compare.h"
+#include "net/colo.h"
+#include "block/block.h"
+#include "qapi/qapi-events-migration.h"
+#include "qapi/qmp/qerror.h"
+#include "sysemu/cpus.h"
+#include "sysemu/runstate.h"
+#include "net/filter.h"
  
  static bool vmstate_loading;
+static Notifier packets_compare_notifier;
+
+/* User need to know colo mode after COLO failover */
+static COLOMode last_colo_mode;
  
  #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
  
@@ -51,8 +67,11 @@ static bool colo_runstate_is_stopped(void)
  
  static void secondary_vm_do_failover(void)
  {
+/* COLO needs enable block-replication */
+#ifdef CONFIG_REPLICATION
      int old_state;
      MigrationIncomingState *mis = migration_incoming_get_current();
+    Error *local_err = NULL;
  
      /* Can not do failover during the process of VM's loading VMstate, Or
       * it will break the secondary VM.
@@ -70,6 +89,17 @@ static void secondary_vm_do_failover(void)
      migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
                        MIGRATION_STATUS_COMPLETED);
  
+    replication_stop_all(true, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+
+    /* Notify all filters of all NIC to do checkpoint */
+    colo_notify_filters_event(COLO_EVENT_FAILOVER, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+
      if (!autostart) {
          error_report("\"-S\" qemu option will be ignored in secondary side");
          /* recover runstate to normal migration finish state */
@@ -97,19 +127,30 @@ static void secondary_vm_do_failover(void)
      }
      /* Notify COLO incoming thread that failover work is finished */
      qemu_sem_post(&mis->colo_incoming_sem);
+
      /* For Secondary VM, jump to incoming co */
      if (mis->migration_incoming_co) {
          qemu_coroutine_enter(mis->migration_incoming_co);
      }
+#else
+    abort();
+#endif
  }
  
  static void primary_vm_do_failover(void)
  {
+#ifdef CONFIG_REPLICATION
      MigrationState *s = migrate_get_current();
      int old_state;
+    Error *local_err = NULL;
  
      migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
                        MIGRATION_STATUS_COMPLETED);
+    /*
+     * kick COLO thread which might wait at
+     * qemu_sem_wait(&s->colo_checkpoint_sem).
+     */
+    colo_checkpoint_notify(migrate_get_current());
  
      /*
       * Wake up COLO thread which may blocked in recv() or send(),
@@ -130,29 +171,56 @@ static void primary_vm_do_failover(void)
                       FailoverStatus_str(old_state));
          return;
      }
+
+    replication_stop_all(true, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        local_err = NULL;
+    }
+
      /* Notify COLO thread that failover work is finished */
      qemu_sem_post(&s->colo_exit_sem);
+#else
+    abort();
+#endif
  }
  
-void colo_do_failover(MigrationState *s)
+COLOMode get_colo_mode(void)
+{
+    if (migration_in_colo_state()) {
+        return COLO_MODE_PRIMARY;
+    } else if (migration_incoming_in_colo_state()) {
+        return COLO_MODE_SECONDARY;
+    } else {
+        return COLO_MODE_NONE;
+    }
+}
+
+void colo_do_failover(void)
  {
      /* Make sure VM stopped while failover happened. */
      if (!colo_runstate_is_stopped()) {
          vm_stop_force_state(RUN_STATE_COLO);
      }
  
-    if (get_colo_mode() == COLO_MODE_PRIMARY) {
+    switch (get_colo_mode()) {
+    case COLO_MODE_PRIMARY:
          primary_vm_do_failover();
-    } else {
+        break;
+    case COLO_MODE_SECONDARY:
          secondary_vm_do_failover();
+        break;
+    default:
+        error_report("colo_do_failover failed because the colo mode"
+                     " could not be obtained");
      }
  }
  
+#ifdef CONFIG_REPLICATION
  void qmp_xen_set_replication(bool enable, bool primary,
                               bool has_failover, bool failover,
                               Error **errp)
  {
-#ifdef CONFIG_REPLICATION
      ReplicationMode mode = primary ?
                             REPLICATION_MODE_PRIMARY :
                             REPLICATION_MODE_SECONDARY;
@@ -171,14 +239,10 @@ void qmp_xen_set_replication(bool enable, bool primary,
          }
          replication_stop_all(failover, failover ? NULL : errp);
      }
-#else
-    abort();
-#endif
  }
  
  ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
  {
-#ifdef CONFIG_REPLICATION
      Error *err = NULL;
      ReplicationStatus *s = g_new0(ReplicationStatus, 1);
  
@@ -193,18 +257,39 @@ ReplicationStatus *qmp_query_xen_replication_status(Error **errp)
  
      error_free(err);
      return s;
-#else
-    abort();
-#endif
  }
  
  void qmp_xen_colo_do_checkpoint(Error **errp)
  {
-#ifdef CONFIG_REPLICATION
      replication_do_checkpoint_all(errp);
-#else
-    abort();
+    /* Notify all filters of all NIC to do checkpoint */
+    colo_notify_filters_event(COLO_EVENT_CHECKPOINT, errp);
+}
  #endif
+
+COLOStatus *qmp_query_colo_status(Error **errp)
+{
+    COLOStatus *s = g_new0(COLOStatus, 1);
+
+    s->mode = get_colo_mode();
+    s->last_mode = last_colo_mode;
+
+    switch (failover_get_state()) {
+    case FAILOVER_STATUS_NONE:
+        s->reason = COLO_EXIT_REASON_NONE;
+        break;
+    case FAILOVER_STATUS_COMPLETED:
+        s->reason = COLO_EXIT_REASON_REQUEST;
+        break;
+    default:
+        if (migration_in_colo_state()) {
+            s->reason = COLO_EXIT_REASON_PROCESSING;
+        } else {
+            s->reason = COLO_EXIT_REASON_ERROR;
+        }
+    }
+
+    return s;
  }
  
  static void colo_send_message(QEMUFile *f, COLOMessage msg,
@@ -343,20 +428,46 @@ static int colo_do_checkpoint_transaction(MigrationState *s,
          goto out;
      }
  
+    colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, &local_err);
+    if (local_err) {
+        goto out;
+    }
+
      /* Disable block migration */
      migrate_set_block_enabled(false, &local_err);
-    qemu_savevm_state_header(fb);
-    qemu_savevm_state_setup(fb);
      qemu_mutex_lock_iothread();
-    qemu_savevm_state_complete_precopy(fb, false, false);
-    qemu_mutex_unlock_iothread();
  
-    qemu_fflush(fb);
+#ifdef CONFIG_REPLICATION
+    replication_do_checkpoint_all(&local_err);
+    if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+#else
+        abort();
+#endif
  
      colo_send_message(s->to_dst_file, COLO_MESSAGE_VMSTATE_SEND, &local_err);
      if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+    /* Note: device state is saved into buffer */
+    ret = qemu_save_device_state(fb);
+
+    qemu_mutex_unlock_iothread();
+    if (ret < 0) {
          goto out;
      }
+    /*
+     * Only save VM's live state, which not including device state.
+     * TODO: We may need a timeout mechanism to prevent COLO process
+     * to be blocked here.
+     */
+    qemu_savevm_live_state(s->to_dst_file);
+
+    qemu_fflush(fb);
+
      /*
       * We need the size of the VMstate data in Secondary side,
       * With which we can decide how much data should be read.
@@ -400,6 +511,11 @@ out:
      return ret;
  }
  
+static void colo_compare_notify_checkpoint(Notifier *notifier, void *data)
+{
+    colo_checkpoint_notify(data);
+}
+
  static void colo_process_checkpoint(MigrationState *s)
  {
      QIOChannelBuffer *bioc;
@@ -408,6 +524,12 @@ static void colo_process_checkpoint(MigrationState *s)
      Error *local_err = NULL;
      int ret;
  
+    last_colo_mode = get_colo_mode();
+    if (last_colo_mode != COLO_MODE_PRIMARY) {
+        error_report("COLO mode must be COLO_MODE_PRIMARY");
+        return;
+    }
+
      failover_init_state();
  
      s->rp_state.from_dst_file = qemu_file_get_return_path(s->to_dst_file);
@@ -416,6 +538,9 @@ static void colo_process_checkpoint(MigrationState *s)
          goto out;
      }
  
+    packets_compare_notifier.notify = colo_compare_notify_checkpoint;
+    colo_compare_register_notifier(&packets_compare_notifier);
+
      /*
       * Wait for Secondary finish loading VM states and enter COLO
       * restore.
@@ -430,6 +555,16 @@ static void colo_process_checkpoint(MigrationState *s)
      object_unref(OBJECT(bioc));
  
      qemu_mutex_lock_iothread();
+#ifdef CONFIG_REPLICATION
+    replication_start_all(REPLICATION_MODE_PRIMARY, &local_err);
+    if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+#else
+        abort();
+#endif
+
      vm_start();
      qemu_mutex_unlock_iothread();
      trace_colo_vm_state_change("stop", "run");
@@ -445,6 +580,9 @@ static void colo_process_checkpoint(MigrationState *s)
  
          qemu_sem_wait(&s->colo_checkpoint_sem);
  
+        if (s->state != MIGRATION_STATUS_COLO) {
+            goto out;
+        }
          ret = colo_do_checkpoint_transaction(s, bioc, fb);
          if (ret < 0) {
              goto out;
@@ -461,11 +599,35 @@ out:
          qemu_fclose(fb);
      }
  
-    timer_del(s->colo_delay_timer);
+    /*
+     * There are only two reasons we can get here, some error happened
+     * or the user triggered failover.
+     */
+    switch (failover_get_state()) {
+    case FAILOVER_STATUS_COMPLETED:
+        qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
+                                  COLO_EXIT_REASON_REQUEST);
+        break;
+    default:
+        qapi_event_send_colo_exit(COLO_MODE_PRIMARY,
+                                  COLO_EXIT_REASON_ERROR);
+    }
  
      /* Hope this not to be too long to wait here */
      qemu_sem_wait(&s->colo_exit_sem);
      qemu_sem_destroy(&s->colo_exit_sem);
+
+    /*
+     * It is safe to unregister notifier after failover finished.
+     * Besides, colo_delay_timer and colo_checkpoint_sem can't be
+     * released befor unregister notifier, or there will be use-after-free
+     * error.
+     */
+    colo_compare_unregister_notifier(&packets_compare_notifier);
+    timer_del(s->colo_delay_timer);
+    timer_free(s->colo_delay_timer);
+    qemu_sem_destroy(&s->colo_checkpoint_sem);
+
      /*
       * Must be called after failover BH is completed,
       * Or the failover BH may shutdown the wrong fd that
@@ -533,12 +695,20 @@ void *colo_process_incoming_thread(void *opaque)
      uint64_t total_size;
      uint64_t value;
      Error *local_err = NULL;
+    int ret;
  
+    rcu_register_thread();
      qemu_sem_init(&mis->colo_incoming_sem, 0);
  
      migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                        MIGRATION_STATUS_COLO);
  
+    last_colo_mode = get_colo_mode();
+    if (last_colo_mode != COLO_MODE_SECONDARY) {
+        error_report("COLO mode must be COLO_MODE_SECONDARY");
+        return NULL;
+    }
+
      failover_init_state();
  
      mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
@@ -558,6 +728,20 @@ void *colo_process_incoming_thread(void *opaque)
      fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
      object_unref(OBJECT(bioc));
  
+    qemu_mutex_lock_iothread();
+#ifdef CONFIG_REPLICATION
+    replication_start_all(REPLICATION_MODE_SECONDARY, &local_err);
+    if (local_err) {
+        qemu_mutex_unlock_iothread();
+        goto out;
+    }
+#else
+        abort();
+#endif
+    vm_start();
+    trace_colo_vm_state_change("stop", "run");
+    qemu_mutex_unlock_iothread();
+
      colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY,
                        &local_err);
      if (local_err) {
@@ -577,6 +761,11 @@ void *colo_process_incoming_thread(void *opaque)
              goto out;
          }
  
+        qemu_mutex_lock_iothread();
+        vm_stop_force_state(RUN_STATE_COLO);
+        trace_colo_vm_state_change("run", "stop");
+        qemu_mutex_unlock_iothread();
+
          /* FIXME: This is unnecessary for periodic checkpoint mode */
          colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
                       &local_err);
@@ -590,6 +779,16 @@ void *colo_process_incoming_thread(void *opaque)
              goto out;
          }
  
+        qemu_mutex_lock_iothread();
+        cpu_synchronize_all_pre_loadvm();
+        ret = qemu_loadvm_state_main(mis->from_src_file, mis);
+        qemu_mutex_unlock_iothread();
+
+        if (ret < 0) {
+            error_report("Load VM's live state (ram) error");
+            goto out;
+        }
+
          value = colo_receive_message_value(mis->from_src_file,
                                   COLO_MESSAGE_VMSTATE_SIZE, &local_err);
          if (local_err) {
@@ -621,15 +820,41 @@ void *colo_process_incoming_thread(void *opaque)
          }
  
          qemu_mutex_lock_iothread();
-        qemu_system_reset(SHUTDOWN_CAUSE_NONE);
          vmstate_loading = true;
-        if (qemu_loadvm_state(fb) < 0) {
-            error_report("COLO: loadvm failed");
+        ret = qemu_load_device_state(fb);
+        if (ret < 0) {
+            error_report("COLO: load device state failed");
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+
+#ifdef CONFIG_REPLICATION
+        replication_get_error_all(&local_err);
+        if (local_err) {
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+
+        /* discard colo disk buffer */
+        replication_do_checkpoint_all(&local_err);
+        if (local_err) {
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+#else
+        abort();
+#endif
+        /* Notify all filters of all NIC to do checkpoint */
+        colo_notify_filters_event(COLO_EVENT_CHECKPOINT, &local_err);
+
+        if (local_err) {
              qemu_mutex_unlock_iothread();
              goto out;
          }
  
          vmstate_loading = false;
+        vm_start();
+        trace_colo_vm_state_change("stop", "run");
          qemu_mutex_unlock_iothread();
  
          if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
@@ -653,6 +878,20 @@ out:
          error_report_err(local_err);
      }
  
+    /*
+     * There are only two reasons we can get here, some error happened
+     * or the user triggered failover.
+     */
+    switch (failover_get_state()) {
+    case FAILOVER_STATUS_COMPLETED:
+        qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
+                                  COLO_EXIT_REASON_REQUEST);
+        break;
+    default:
+        qapi_event_send_colo_exit(COLO_MODE_SECONDARY,
+                                  COLO_EXIT_REASON_ERROR);
+    }
+
      if (fb) {
          qemu_fclose(fb);
      }
@@ -663,8 +902,9 @@ out:
      /* Must be called after failover BH is completed */
      if (mis->to_src_file) {
          qemu_fclose(mis->to_src_file);
+        mis->to_src_file = NULL;
      }
-    migration_incoming_exit_colo();
  
+    rcu_unregister_thread();
      return NULL;
  }