qcow2: No persistent dirty bitmaps for compat=0.10

[qemu.git] / cpus.c
diff --git a/cpus.c b/cpus.c

index c857ad29572fe718e6847b82228501bfb07b57c2..2cb0af9b2249f77998cf205108bff42e2f51c2fd 100644 (file)
--- a/cpus.c
+++ b/cpus.c
@@ -37,6 +37,7 @@
  #include "sysemu/hw_accel.h"
  #include "sysemu/kvm.h"
  #include "sysemu/hax.h"
+#include "sysemu/hvf.h"
  #include "qmp-commands.h"
  #include "exec/exec-all.h"
  
@@ -50,6 +51,7 @@
  #include "qapi-event.h"
  #include "hw/nmi.h"
  #include "sysemu/replay.h"
+#include "hw/boards.h"
  
  #ifdef CONFIG_LINUX
  
@@ -118,16 +120,11 @@ static bool all_cpu_threads_idle(void)
  /* Protected by TimersState seqlock */
  
  static bool icount_sleep = true;
-static int64_t vm_clock_warp_start = -1;
  /* Conversion factor from emulated instructions to virtual clock ticks.  */
  static int icount_time_shift;
  /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
  #define MAX_ICOUNT_SHIFT 10
  
-static QEMUTimer *icount_rt_timer;
-static QEMUTimer *icount_vm_timer;
-static QEMUTimer *icount_warp_timer;
-
  typedef struct TimersState {
      /* Protected by BQL.  */
      int64_t cpu_ticks_prev;
@@ -145,6 +142,11 @@ typedef struct TimersState {
      int64_t qemu_icount_bias;
      /* Only written by TCG thread */
      int64_t qemu_icount;
+    /* for adjusting icount */
+    int64_t vm_clock_warp_start;
+    QEMUTimer *icount_rt_timer;
+    QEMUTimer *icount_vm_timer;
+    QEMUTimer *icount_warp_timer;
  } TimersState;
  
  static TimersState timers_state;
@@ -181,10 +183,7 @@ static bool check_tcg_memory_orders_compatible(void)
  
  static bool default_mttcg_enabled(void)
  {
-    QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
-    const char *rr = qemu_opt_get(icount_opts, "rr");
-
-    if (rr || TCG_OVERSIZED_GUEST) {
+    if (use_icount || TCG_OVERSIZED_GUEST) {
          return false;
      } else {
  #ifdef TARGET_SUPPORTS_MTTCG
@@ -202,11 +201,17 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp)
          if (strcmp(t, "multi") == 0) {
              if (TCG_OVERSIZED_GUEST) {
                  error_setg(errp, "No MTTCG when guest word size > hosts");
+            } else if (use_icount) {
+                error_setg(errp, "No MTTCG when icount is enabled");
              } else {
+#ifndef TARGET_SUPPORTS_MTTCG
+                error_report("Guest not yet converted to MTTCG - "
+                             "you may get unexpected results");
+#endif
                  if (!check_tcg_memory_orders_compatible()) {
                      error_report("Guest expects a stronger memory ordering "
                                   "than the host provides");
-                    error_printf("This may cause strange/hard to debug errors");
+                    error_printf("This may cause strange/hard to debug errors\n");
                  }
                  mttcg_enabled = true;
              }
@@ -220,20 +225,51 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp)
      }
  }
  
+/* The current number of executed instructions is based on what we
+ * originally budgeted minus the current state of the decrementing
+ * icount counters in extra/u16.low.
+ */
+static int64_t cpu_get_icount_executed(CPUState *cpu)
+{
+    return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
+}
+
+/*
+ * Update the global shared timer_state.qemu_icount to take into
+ * account executed instructions. This is done by the TCG vCPU
+ * thread so the main-loop can see time has moved forward.
+ */
+void cpu_update_icount(CPUState *cpu)
+{
+    int64_t executed = cpu_get_icount_executed(cpu);
+    cpu->icount_budget -= executed;
+
+#ifdef CONFIG_ATOMIC64
+    atomic_set__nocheck(&timers_state.qemu_icount,
+                        atomic_read__nocheck(&timers_state.qemu_icount) +
+                        executed);
+#else /* FIXME: we need 64bit atomics to do this safely */
+    timers_state.qemu_icount += executed;
+#endif
+}
+
  int64_t cpu_get_icount_raw(void)
  {
-    int64_t icount;
      CPUState *cpu = current_cpu;
  
-    icount = timers_state.qemu_icount;
-    if (cpu) {
+    if (cpu && cpu->running) {
          if (!cpu->can_do_io) {
              fprintf(stderr, "Bad icount read\n");
              exit(1);
          }
-        icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
+        /* Take into account what has run */
+        cpu_update_icount(cpu);
      }
-    return icount;
+#ifdef CONFIG_ATOMIC64
+    return atomic_read__nocheck(&timers_state.qemu_icount);
+#else /* FIXME: we need 64bit atomics to do this safely */
+    return timers_state.qemu_icount;
+#endif
  }
  
  /* Return the virtual CPU time, based on the instruction counter.  */
@@ -396,14 +432,14 @@ static void icount_adjust(void)
  
  static void icount_adjust_rt(void *opaque)
  {
-    timer_mod(icount_rt_timer,
+    timer_mod(timers_state.icount_rt_timer,
                qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
      icount_adjust();
  }
  
  static void icount_adjust_vm(void *opaque)
  {
-    timer_mod(icount_vm_timer,
+    timer_mod(timers_state.icount_vm_timer,
                     qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
                     NANOSECONDS_PER_SECOND / 10);
      icount_adjust();
@@ -424,7 +460,7 @@ static void icount_warp_rt(void)
       */
      do {
          seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
-        warp_start = vm_clock_warp_start;
+        warp_start = timers_state.vm_clock_warp_start;
      } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
  
      if (warp_start == -1) {
@@ -437,7 +473,7 @@ static void icount_warp_rt(void)
                                       cpu_get_clock_locked());
          int64_t warp_delta;
  
-        warp_delta = clock - vm_clock_warp_start;
+        warp_delta = clock - timers_state.vm_clock_warp_start;
          if (use_icount == 2) {
              /*
               * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
@@ -449,7 +485,7 @@ static void icount_warp_rt(void)
          }
          timers_state.qemu_icount_bias += warp_delta;
      }
-    vm_clock_warp_start = -1;
+    timers_state.vm_clock_warp_start = -1;
      seqlock_write_end(&timers_state.vm_clock_seqlock);
  
      if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
@@ -522,7 +558,7 @@ void qemu_start_warp_timer(void)
      if (deadline < 0) {
          static bool notified;
          if (!icount_sleep && !notified) {
-            error_report("WARNING: icount sleep disabled and no active timers");
+            warn_report("icount sleep disabled and no active timers");
              notified = true;
          }
          return;
@@ -558,11 +594,13 @@ void qemu_start_warp_timer(void)
               * every 100ms.
               */
              seqlock_write_begin(&timers_state.vm_clock_seqlock);
-            if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
-                vm_clock_warp_start = clock;
+            if (timers_state.vm_clock_warp_start == -1
+                || timers_state.vm_clock_warp_start > clock) {
+                timers_state.vm_clock_warp_start = clock;
              }
              seqlock_write_end(&timers_state.vm_clock_seqlock);
-            timer_mod_anticipate(icount_warp_timer, clock + deadline);
+            timer_mod_anticipate(timers_state.icount_warp_timer,
+                                 clock + deadline);
          }
      } else if (deadline == 0) {
          qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
@@ -587,7 +625,7 @@ static void qemu_account_warp_timer(void)
          return;
      }
  
-    timer_del(icount_warp_timer);
+    timer_del(timers_state.icount_warp_timer);
      icount_warp_rt();
  }
  
@@ -596,6 +634,45 @@ static bool icount_state_needed(void *opaque)
      return use_icount;
  }
  
+static bool warp_timer_state_needed(void *opaque)
+{
+    TimersState *s = opaque;
+    return s->icount_warp_timer != NULL;
+}
+
+static bool adjust_timers_state_needed(void *opaque)
+{
+    TimersState *s = opaque;
+    return s->icount_rt_timer != NULL;
+}
+
+/*
+ * Subsection for warp timer migration is optional, because may not be created
+ */
+static const VMStateDescription icount_vmstate_warp_timer = {
+    .name = "timer/icount/warp_timer",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = warp_timer_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(vm_clock_warp_start, TimersState),
+        VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription icount_vmstate_adjust_timers = {
+    .name = "timer/icount/timers",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = adjust_timers_state_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
+        VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
  /*
   * This is a subsection for icount migration.
   */
@@ -608,6 +685,11 @@ static const VMStateDescription icount_vmstate_timers = {
          VMSTATE_INT64(qemu_icount_bias, TimersState),
          VMSTATE_INT64(qemu_icount, TimersState),
          VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription*[]) {
+        &icount_vmstate_warp_timer,
+        &icount_vmstate_adjust_timers,
+        NULL
      }
  };
  
@@ -642,9 +724,9 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
      sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
  
      qemu_mutex_unlock_iothread();
-    atomic_set(&cpu->throttle_thread_scheduled, 0);
      g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
      qemu_mutex_lock_iothread();
+    atomic_set(&cpu->throttle_thread_scheduled, 0);
  }
  
  static void cpu_throttle_timer_tick(void *opaque)
@@ -718,7 +800,7 @@ void configure_icount(QemuOpts *opts, Error **errp)
  
      icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
      if (icount_sleep) {
-        icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
+        timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
                                           icount_timer_cb, NULL);
      }
  
@@ -752,13 +834,14 @@ void configure_icount(QemuOpts *opts, Error **errp)
         the virtual time trigger catches emulated time passing too fast.
         Realtime triggers occur even when idle, so use them less frequently
         than VM triggers.  */
-    icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
+    timers_state.vm_clock_warp_start = -1;
+    timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
                                     icount_adjust_rt, NULL);
-    timer_mod(icount_rt_timer,
+    timer_mod(timers_state.icount_rt_timer,
                     qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
-    icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+    timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                          icount_adjust_vm, NULL);
-    timer_mod(icount_vm_timer,
+    timer_mod(timers_state.icount_vm_timer,
                     qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
                     NANOSECONDS_PER_SECOND / 10);
  }
@@ -797,6 +880,27 @@ static void qemu_cpu_kick_rr_cpu(void)
      } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
  }
  
+static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
+{
+}
+
+void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
+{
+    if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
+        qemu_notify_event();
+        return;
+    }
+
+    if (!qemu_in_vcpu_thread() && first_cpu) {
+        /* qemu_cpu_kick is not enough to kick a halted CPU out of
+         * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
+         * causes cpu_thread_is_idle to return false.  This way,
+         * handle_icount_deadline can run.
+         */
+        async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
+    }
+}
+
  static void kick_tcg_thread(void *opaque)
  {
      timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
@@ -805,7 +909,8 @@ static void kick_tcg_thread(void *opaque)
  
  static void start_tcg_kick_timer(void)
  {
-    if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
+    assert(!mttcg_enabled);
+    if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
          tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                             kick_tcg_thread, NULL);
          timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
@@ -814,6 +919,7 @@ static void start_tcg_kick_timer(void)
  
  static void stop_tcg_kick_timer(void)
  {
+    assert(!mttcg_enabled);
      if (tcg_kick_vcpu_timer) {
          timer_del(tcg_kick_vcpu_timer);
          tcg_kick_vcpu_timer = NULL;
@@ -844,6 +950,10 @@ void cpu_synchronize_all_states(void)
  
      CPU_FOREACH(cpu) {
          cpu_synchronize_state(cpu);
+        /* TODO: move to cpu_synchronize_state() */
+        if (hvf_enabled()) {
+            hvf_cpu_synchronize_state(cpu);
+        }
      }
  }
  
@@ -853,6 +963,10 @@ void cpu_synchronize_all_post_reset(void)
  
      CPU_FOREACH(cpu) {
          cpu_synchronize_post_reset(cpu);
+        /* TODO: move to cpu_synchronize_post_reset() */
+        if (hvf_enabled()) {
+            hvf_cpu_synchronize_post_reset(cpu);
+        }
      }
  }
  
@@ -862,6 +976,19 @@ void cpu_synchronize_all_post_init(void)
  
      CPU_FOREACH(cpu) {
          cpu_synchronize_post_init(cpu);
+        /* TODO: move to cpu_synchronize_post_init() */
+        if (hvf_enabled()) {
+            hvf_cpu_synchronize_post_init(cpu);
+        }
+    }
+}
+
+void cpu_synchronize_all_pre_loadvm(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        cpu_synchronize_pre_loadvm(cpu);
      }
  }
  
@@ -992,29 +1119,29 @@ static void qemu_tcg_destroy_vcpu(CPUState *cpu)
  {
  }
  
-static void qemu_wait_io_event_common(CPUState *cpu)
+static void qemu_cpu_stop(CPUState *cpu, bool exit)
  {
-    atomic_mb_set(&cpu->thread_kicked, false);
-    if (cpu->stop) {
-        cpu->stop = false;
-        cpu->stopped = true;
-        qemu_cond_broadcast(&qemu_pause_cond);
+    g_assert(qemu_cpu_is_self(cpu));
+    cpu->stop = false;
+    cpu->stopped = true;
+    if (exit) {
+        cpu_exit(cpu);
      }
-    process_queued_cpu_work(cpu);
+    qemu_cond_broadcast(&qemu_pause_cond);
  }
  
-static bool qemu_tcg_should_sleep(CPUState *cpu)
+static void qemu_wait_io_event_common(CPUState *cpu)
  {
-    if (mttcg_enabled) {
-        return cpu_thread_is_idle(cpu);
-    } else {
-        return all_cpu_threads_idle();
+    atomic_mb_set(&cpu->thread_kicked, false);
+    if (cpu->stop) {
+        qemu_cpu_stop(cpu, false);
      }
+    process_queued_cpu_work(cpu);
  }
  
-static void qemu_tcg_wait_io_event(CPUState *cpu)
+static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
  {
-    while (qemu_tcg_should_sleep(cpu)) {
+    while (all_cpu_threads_idle()) {
          stop_tcg_kick_timer();
          qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
      }
@@ -1024,12 +1151,18 @@ static void qemu_tcg_wait_io_event(CPUState *cpu)
      qemu_wait_io_event_common(cpu);
  }
  
-static void qemu_kvm_wait_io_event(CPUState *cpu)
+static void qemu_wait_io_event(CPUState *cpu)
  {
      while (cpu_thread_is_idle(cpu)) {
          qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
      }
  
+#ifdef _WIN32
+    /* Eat dummy APC queued by qemu_cpu_kick_thread.  */
+    if (!tcg_enabled()) {
+        SleepEx(0, TRUE);
+    }
+#endif
      qemu_wait_io_event_common(cpu);
  }
  
@@ -1065,7 +1198,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
                  cpu_handle_guest_debug(cpu);
              }
          }
-        qemu_kvm_wait_io_event(cpu);
+        qemu_wait_io_event(cpu);
      } while (!cpu->unplug || cpu_can_run(cpu));
  
      qemu_kvm_destroy_vcpu(cpu);
@@ -1111,7 +1244,7 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
              exit(1);
          }
          qemu_mutex_lock_iothread();
-        qemu_wait_io_event_common(cpu);
+        qemu_wait_io_event(cpu);
      }
  
      return NULL;
@@ -1142,16 +1275,54 @@ static int64_t tcg_get_icount_limit(void)
  
  static void handle_icount_deadline(void)
  {
+    assert(qemu_in_vcpu_thread());
      if (use_icount) {
          int64_t deadline =
              qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
  
          if (deadline == 0) {
+            /* Wake up other AioContexts.  */
              qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+            qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
          }
      }
  }
  
+static void prepare_icount_for_run(CPUState *cpu)
+{
+    if (use_icount) {
+        int insns_left;
+
+        /* These should always be cleared by process_icount_data after
+         * each vCPU execution. However u16.high can be raised
+         * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
+         */
+        g_assert(cpu->icount_decr.u16.low == 0);
+        g_assert(cpu->icount_extra == 0);
+
+        cpu->icount_budget = tcg_get_icount_limit();
+        insns_left = MIN(0xffff, cpu->icount_budget);
+        cpu->icount_decr.u16.low = insns_left;
+        cpu->icount_extra = cpu->icount_budget - insns_left;
+    }
+}
+
+static void process_icount_data(CPUState *cpu)
+{
+    if (use_icount) {
+        /* Account for executed instructions */
+        cpu_update_icount(cpu);
+
+        /* Reset the counters */
+        cpu->icount_decr.u16.low = 0;
+        cpu->icount_extra = 0;
+        cpu->icount_budget = 0;
+
+        replay_account_executed_instructions();
+    }
+}
+
+
  static int tcg_cpu_exec(CPUState *cpu)
  {
      int ret;
@@ -1162,20 +1333,6 @@ static int tcg_cpu_exec(CPUState *cpu)
  #ifdef CONFIG_PROFILER
      ti = profile_getclock();
  #endif
-    if (use_icount) {
-        int64_t count;
-        int decr;
-        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
-                                    + cpu->icount_extra);
-        cpu->icount_decr.u16.low = 0;
-        cpu->icount_extra = 0;
-        count = tcg_get_icount_limit();
-        timers_state.qemu_icount += count;
-        decr = (count > 0xffff) ? 0xffff : count;
-        count -= decr;
-        cpu->icount_decr.u16.low = decr;
-        cpu->icount_extra = count;
-    }
      qemu_mutex_unlock_iothread();
      cpu_exec_start(cpu);
      ret = cpu_exec(cpu);
@@ -1184,15 +1341,6 @@ static int tcg_cpu_exec(CPUState *cpu)
  #ifdef CONFIG_PROFILER
      tcg_time += profile_getclock() - ti;
  #endif
-    if (use_icount) {
-        /* Fold pending instructions back into the
-           instruction counter, and clear the interrupt flag.  */
-        timers_state.qemu_icount -= (cpu->icount_decr.u16.low
-                        + cpu->icount_extra);
-        cpu->icount_decr.u32 = 0;
-        cpu->icount_extra = 0;
-        replay_account_executed_instructions();
-    }
      return ret;
  }
  
@@ -1227,6 +1375,7 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
      CPUState *cpu = arg;
  
      rcu_register_thread();
+    tcg_register_thread();
  
      qemu_mutex_lock_iothread();
      qemu_thread_get_self(cpu->thread);
@@ -1260,6 +1409,11 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
          /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
          qemu_account_warp_timer();
  
+        /* Run the timers here.  This is much more efficient than
+         * waking up the I/O thread and waiting for completion.
+         */
+        handle_icount_deadline();
+
          if (!cpu) {
              cpu = first_cpu;
          }
@@ -1274,7 +1428,13 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
  
              if (cpu_can_run(cpu)) {
                  int r;
+
+                prepare_icount_for_run(cpu);
+
                  r = tcg_cpu_exec(cpu);
+
+                process_icount_data(cpu);
+
                  if (r == EXCP_DEBUG) {
                      cpu_handle_guest_debug(cpu);
                      break;
@@ -1301,9 +1461,7 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
              atomic_mb_set(&cpu->exit_request, 0);
          }
  
-        handle_icount_deadline();
-
-        qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
+        qemu_tcg_rr_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
          deal_with_unplugged_cpus();
      }
  
@@ -1314,8 +1472,9 @@ static void *qemu_hax_cpu_thread_fn(void *arg)
  {
      CPUState *cpu = arg;
      int r;
+
+    qemu_mutex_lock_iothread();
      qemu_thread_get_self(cpu->thread);
-    qemu_mutex_lock(&qemu_global_mutex);
  
      cpu->thread_id = qemu_get_thread_id();
      cpu->created = true;
@@ -1333,17 +1492,53 @@ static void *qemu_hax_cpu_thread_fn(void *arg)
              }
          }
  
-        while (cpu_thread_is_idle(cpu)) {
-            qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
-        }
-#ifdef _WIN32
-        SleepEx(0, TRUE);
-#endif
-        qemu_wait_io_event_common(cpu);
+        qemu_wait_io_event(cpu);
      }
      return NULL;
  }
  
+/* The HVF-specific vCPU thread function. This one should only run when the host
+ * CPU supports the VMX "unrestricted guest" feature. */
+static void *qemu_hvf_cpu_thread_fn(void *arg)
+{
+    CPUState *cpu = arg;
+
+    int r;
+
+    assert(hvf_enabled());
+
+    rcu_register_thread();
+
+    qemu_mutex_lock_iothread();
+    qemu_thread_get_self(cpu->thread);
+
+    cpu->thread_id = qemu_get_thread_id();
+    cpu->can_do_io = 1;
+    current_cpu = cpu;
+
+    hvf_init_vcpu(cpu);
+
+    /* signal CPU creation */
+    cpu->created = true;
+    qemu_cond_signal(&qemu_cpu_cond);
+
+    do {
+        if (cpu_can_run(cpu)) {
+            r = hvf_vcpu_exec(cpu);
+            if (r == EXCP_DEBUG) {
+                cpu_handle_guest_debug(cpu);
+            }
+        }
+        qemu_wait_io_event(cpu);
+    } while (!cpu->unplug || cpu_can_run(cpu));
+
+    hvf_vcpu_destroy(cpu);
+    cpu->created = false;
+    qemu_cond_signal(&qemu_cpu_cond);
+    qemu_mutex_unlock_iothread();
+    return NULL;
+}
+
  #ifdef _WIN32
  static void CALLBACK dummy_apc_func(ULONG_PTR unused)
  {
@@ -1361,7 +1556,10 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
  {
      CPUState *cpu = arg;
  
+    g_assert(!use_icount);
+
      rcu_register_thread();
+    tcg_register_thread();
  
      qemu_mutex_lock_iothread();
      qemu_thread_get_self(cpu->thread);
@@ -1401,12 +1599,16 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
                  /* Ignore everything else? */
                  break;
              }
+        } else if (cpu->unplug) {
+            qemu_tcg_destroy_vcpu(cpu);
+            cpu->created = false;
+            qemu_cond_signal(&qemu_cpu_cond);
+            qemu_mutex_unlock_iothread();
+            return NULL;
          }
  
-        handle_icount_deadline();
-
          atomic_mb_set(&cpu->exit_request, 0);
-        qemu_tcg_wait_io_event(cpu);
+        qemu_wait_io_event(cpu);
      }
  
      return NULL;
@@ -1512,12 +1714,12 @@ void pause_all_vcpus(void)
  
      qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
      CPU_FOREACH(cpu) {
-        cpu->stop = true;
-        qemu_cpu_kick(cpu);
-    }
-
-    if (qemu_in_vcpu_thread()) {
-        cpu_stop_current();
+        if (qemu_cpu_is_self(cpu)) {
+            qemu_cpu_stop(cpu, true);
+        } else {
+            cpu->stop = true;
+            qemu_cpu_kick(cpu);
+        }
      }
  
      while (!all_vcpus_paused()) {
@@ -1568,6 +1770,18 @@ static void qemu_tcg_init_vcpu(CPUState *cpu)
      char thread_name[VCPU_THREAD_NAME_SIZE];
      static QemuCond *single_tcg_halt_cond;
      static QemuThread *single_tcg_cpu_thread;
+    static int tcg_region_inited;
+
+    /*
+     * Initialize TCG regions--once. Now is a good time, because:
+     * (1) TCG's init context, prologue and target globals have been set up.
+     * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
+     *     -accel flag is processed, so the check doesn't work then).
+     */
+    if (!tcg_region_inited) {
+        tcg_region_inited = 1;
+        tcg_region_init();
+    }
  
      if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
          cpu->thread = g_malloc0(sizeof(QemuThread));
@@ -1642,6 +1856,27 @@ static void qemu_kvm_start_vcpu(CPUState *cpu)
      }
  }
  
+static void qemu_hvf_start_vcpu(CPUState *cpu)
+{
+    char thread_name[VCPU_THREAD_NAME_SIZE];
+
+    /* HVF currently does not support TCG, and only runs in
+     * unrestricted-guest mode. */
+    assert(hvf_enabled());
+
+    cpu->thread = g_malloc0(sizeof(QemuThread));
+    cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+    qemu_cond_init(cpu->halt_cond);
+
+    snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
+             cpu->cpu_index);
+    qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
+                       cpu, QEMU_THREAD_JOINABLE);
+    while (!cpu->created) {
+        qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
+    }
+}
+
  static void qemu_dummy_start_vcpu(CPUState *cpu)
  {
      char thread_name[VCPU_THREAD_NAME_SIZE];
@@ -1668,16 +1903,16 @@ void qemu_init_vcpu(CPUState *cpu)
          /* If the target cpu hasn't set up any address spaces itself,
           * give it the default one.
           */
-        AddressSpace *as = address_space_init_shareable(cpu->memory,
-                                                        "cpu-memory");
          cpu->num_ases = 1;
-        cpu_address_space_init(cpu, as, 0);
+        cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
      }
  
      if (kvm_enabled()) {
          qemu_kvm_start_vcpu(cpu);
      } else if (hax_enabled()) {
          qemu_hax_start_vcpu(cpu);
+    } else if (hvf_enabled()) {
+        qemu_hvf_start_vcpu(cpu);
      } else if (tcg_enabled()) {
          qemu_tcg_init_vcpu(cpu);
      } else {
@@ -1688,10 +1923,7 @@ void qemu_init_vcpu(CPUState *cpu)
  void cpu_stop_current(void)
  {
      if (current_cpu) {
-        current_cpu->stop = false;
-        current_cpu->stopped = true;
-        cpu_exit(current_cpu);
-        qemu_cond_broadcast(&qemu_pause_cond);
+        qemu_cpu_stop(current_cpu, true);
      }
  }
  
@@ -1779,6 +2011,8 @@ void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
  
  CpuInfoList *qmp_query_cpus(Error **errp)
  {
+    MachineState *ms = MACHINE(qdev_get_machine());
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
      CpuInfoList *head = NULL, *cur_item = NULL;
      CPUState *cpu;
  
@@ -1829,6 +2063,13 @@ CpuInfoList *qmp_query_cpus(Error **errp)
  #else
          info->value->arch = CPU_INFO_ARCH_OTHER;
  #endif
+        info->value->has_props = !!mc->cpu_index_to_instance_props;
+        if (info->value->has_props) {
+            CpuInstanceProperties *props;
+            props = g_malloc0(sizeof(*props));
+            *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
+            info->value->props = props;
+        }
  
          /* XXX: waiting for the qapi to support GSList */
          if (!cur_item) {