/* Needed early for CONFIG_BSD etc. */
#include "qemu/osdep.h"
#include "qemu-common.h"
+#include "qemu/config-file.h"
#include "cpu.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qerror.h"
#include "sysemu/block-backend.h"
#include "exec/gdbstub.h"
#include "sysemu/dma.h"
+#include "sysemu/hw_accel.h"
#include "sysemu/kvm.h"
+#include "sysemu/hax.h"
#include "qmp-commands.h"
#include "exec/exec-all.h"
#include "qemu/main-loop.h"
#include "qemu/bitmap.h"
#include "qemu/seqlock.h"
+#include "tcg.h"
#include "qapi-event.h"
#include "hw/nmi.h"
#include "sysemu/replay.h"
-#ifndef _WIN32
-#include "qemu/compatfd.h"
-#endif
-
#ifdef CONFIG_LINUX
#include <sys/prctl.h>
#endif /* CONFIG_LINUX */
-static CPUState *next_cpu;
int64_t max_delay;
int64_t max_advance;
} TimersState;
static TimersState timers_state;
+bool mttcg_enabled;
+
+/*
+ * We default to false if we know other options have been enabled
+ * which are currently incompatible with MTTCG. Otherwise when each
+ * guest (target) has been updated to support:
+ * - atomic instructions
+ * - memory ordering primitives (barriers)
+ * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
+ *
+ * Once a guest architecture has been converted to the new primitives
+ * there are two remaining limitations to check.
+ *
+ * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
+ * - The host must have a stronger memory order than the guest
+ *
+ * It may be possible in future to support strong guests on weak hosts
+ * but that will require tagging all load/stores in a guest with their
+ * implicit memory order requirements which would likely slow things
+ * down a lot.
+ */
+
+static bool check_tcg_memory_orders_compatible(void)
+{
+#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
+ return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
+#else
+ return false;
+#endif
+}
+
+static bool default_mttcg_enabled(void)
+{
+ QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
+ const char *rr = qemu_opt_get(icount_opts, "rr");
+
+ if (rr || TCG_OVERSIZED_GUEST) {
+ return false;
+ } else {
+#ifdef TARGET_SUPPORTS_MTTCG
+ return check_tcg_memory_orders_compatible();
+#else
+ return false;
+#endif
+ }
+}
+
+void qemu_tcg_configure(QemuOpts *opts, Error **errp)
+{
+ const char *t = qemu_opt_get(opts, "thread");
+ if (t) {
+ if (strcmp(t, "multi") == 0) {
+ if (TCG_OVERSIZED_GUEST) {
+ error_setg(errp, "No MTTCG when guest word size > hosts");
+ } else {
+ if (!check_tcg_memory_orders_compatible()) {
+ error_report("Guest expects a stronger memory ordering "
+ "than the host provides");
+ error_printf("This may cause strange/hard to debug errors");
+ }
+ mttcg_enabled = true;
+ }
+ } else if (strcmp(t, "single") == 0) {
+ mttcg_enabled = false;
+ } else {
+ error_setg(errp, "Invalid 'thread' setting %s", t);
+ }
+ } else {
+ mttcg_enabled = default_mttcg_enabled();
+ }
+}
int64_t cpu_get_icount_raw(void)
{
return icount << icount_time_shift;
}
-/* return the host CPU cycle counter and handle stop/restart */
-/* Caller must hold the BQL */
+/* return the time elapsed in VM between vm_start and vm_stop. Unless
+ * icount is active, cpu_get_ticks() uses units of the host CPU cycle
+ * counter.
+ *
+ * Caller must hold the BQL
+ */
int64_t cpu_get_ticks(void)
{
int64_t ticks;
static int64_t cpu_get_clock_locked(void)
{
- int64_t ticks;
+ int64_t time;
- ticks = timers_state.cpu_clock_offset;
+ time = timers_state.cpu_clock_offset;
if (timers_state.cpu_ticks_enabled) {
- ticks += get_clock();
+ time += get_clock();
}
- return ticks;
+ return time;
}
-/* return the host CPU monotonic time */
+/* Return the monotonic time elapsed in VM, i.e.,
+ * the time between vm_start and vm_stop
+ */
int64_t cpu_get_clock(void)
{
int64_t ti;
}
};
-static void cpu_throttle_thread(void *opaque)
+static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
{
- CPUState *cpu = opaque;
double pct;
double throttle_ratio;
long sleeptime_ns;
}
CPU_FOREACH(cpu) {
if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
- async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
+ async_run_on_cpu(cpu, cpu_throttle_thread,
+ RUN_ON_CPU_NULL);
}
}
NANOSECONDS_PER_SECOND / 10);
}
+/***********************************************************/
+/* TCG vCPU kick timer
+ *
+ * The kick timer is responsible for moving single threaded vCPU
+ * emulation on to the next vCPU. If more than one vCPU is running a
+ * timer event with force a cpu->exit so the next vCPU can get
+ * scheduled.
+ *
+ * The timer is removed if all vCPUs are idle and restarted again once
+ * idleness is complete.
+ */
+
+static QEMUTimer *tcg_kick_vcpu_timer;
+static CPUState *tcg_current_rr_cpu;
+
+#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
+
+static inline int64_t qemu_tcg_next_kick(void)
+{
+ return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
+}
+
+/* Kick the currently round-robin scheduled vCPU */
+static void qemu_cpu_kick_rr_cpu(void)
+{
+ CPUState *cpu;
+ do {
+ cpu = atomic_mb_read(&tcg_current_rr_cpu);
+ if (cpu) {
+ cpu_exit(cpu);
+ }
+ } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
+}
+
+static void kick_tcg_thread(void *opaque)
+{
+ timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+ qemu_cpu_kick_rr_cpu();
+}
+
+static void start_tcg_kick_timer(void)
+{
+ if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
+ tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ kick_tcg_thread, NULL);
+ timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
+ }
+}
+
+static void stop_tcg_kick_timer(void)
+{
+ if (tcg_kick_vcpu_timer) {
+ timer_del(tcg_kick_vcpu_timer);
+ tcg_kick_vcpu_timer = NULL;
+ }
+}
+
/***********************************************************/
void hw_error(const char *fmt, ...)
{
}
bdrv_drain_all();
- ret = blk_flush_all();
+ replay_disable_events();
+ ret = bdrv_flush_all();
return ret;
}
abort();
}
-static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
- void *ctx)
+static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
{
- if (kvm_on_sigbus(siginfo->ssi_code,
- (void *)(intptr_t)siginfo->ssi_addr)) {
+ if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
sigbus_reraise();
}
+
+ if (current_cpu) {
+ /* Called asynchronously in VCPU thread. */
+ if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
+ sigbus_reraise();
+ }
+ } else {
+ /* Called synchronously (via signalfd) in main thread. */
+ if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
+ sigbus_reraise();
+ }
+ }
}
static void qemu_init_sigbus(void)
memset(&action, 0, sizeof(action));
action.sa_flags = SA_SIGINFO;
- action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
+ action.sa_sigaction = sigbus_handler;
sigaction(SIGBUS, &action, NULL);
prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
}
-
-static void qemu_kvm_eat_signals(CPUState *cpu)
-{
- struct timespec ts = { 0, 0 };
- siginfo_t siginfo;
- sigset_t waitset;
- sigset_t chkset;
- int r;
-
- sigemptyset(&waitset);
- sigaddset(&waitset, SIG_IPI);
- sigaddset(&waitset, SIGBUS);
-
- do {
- r = sigtimedwait(&waitset, &siginfo, &ts);
- if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
- perror("sigtimedwait");
- exit(1);
- }
-
- switch (r) {
- case SIGBUS:
- if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
- sigbus_reraise();
- }
- break;
- default:
- break;
- }
-
- r = sigpending(&chkset);
- if (r == -1) {
- perror("sigpending");
- exit(1);
- }
- } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
-}
-
#else /* !CONFIG_LINUX */
-
static void qemu_init_sigbus(void)
{
}
-
-static void qemu_kvm_eat_signals(CPUState *cpu)
-{
-}
#endif /* !CONFIG_LINUX */
-#ifndef _WIN32
-static void dummy_signal(int sig)
-{
-}
-
-static void qemu_kvm_init_cpu_signals(CPUState *cpu)
-{
- int r;
- sigset_t set;
- struct sigaction sigact;
-
- memset(&sigact, 0, sizeof(sigact));
- sigact.sa_handler = dummy_signal;
- sigaction(SIG_IPI, &sigact, NULL);
-
- pthread_sigmask(SIG_BLOCK, NULL, &set);
- sigdelset(&set, SIG_IPI);
- sigdelset(&set, SIGBUS);
- r = kvm_set_signal_mask(cpu, &set);
- if (r) {
- fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
- exit(1);
- }
-}
-
-#else /* _WIN32 */
-static void qemu_kvm_init_cpu_signals(CPUState *cpu)
-{
- abort();
-}
-#endif /* _WIN32 */
-
static QemuMutex qemu_global_mutex;
-static QemuCond qemu_io_proceeded_cond;
-static unsigned iothread_requesting_mutex;
static QemuThread io_thread;
static QemuCond qemu_cpu_cond;
/* system init */
static QemuCond qemu_pause_cond;
-static QemuCond qemu_work_cond;
void qemu_init_cpu_loop(void)
{
qemu_init_sigbus();
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_pause_cond);
- qemu_cond_init(&qemu_work_cond);
- qemu_cond_init(&qemu_io_proceeded_cond);
qemu_mutex_init(&qemu_global_mutex);
qemu_thread_get_self(&io_thread);
}
-void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
{
- struct qemu_work_item wi;
-
- if (qemu_cpu_is_self(cpu)) {
- func(data);
- return;
- }
-
- wi.func = func;
- wi.data = data;
- wi.free = false;
-
- qemu_mutex_lock(&cpu->work_mutex);
- if (cpu->queued_work_first == NULL) {
- cpu->queued_work_first = &wi;
- } else {
- cpu->queued_work_last->next = &wi;
- }
- cpu->queued_work_last = &wi;
- wi.next = NULL;
- wi.done = false;
- qemu_mutex_unlock(&cpu->work_mutex);
-
- qemu_cpu_kick(cpu);
- while (!atomic_mb_read(&wi.done)) {
- CPUState *self_cpu = current_cpu;
-
- qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
- current_cpu = self_cpu;
- }
-}
-
-void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
-{
- struct qemu_work_item *wi;
-
- if (qemu_cpu_is_self(cpu)) {
- func(data);
- return;
- }
-
- wi = g_malloc0(sizeof(struct qemu_work_item));
- wi->func = func;
- wi->data = data;
- wi->free = true;
-
- qemu_mutex_lock(&cpu->work_mutex);
- if (cpu->queued_work_first == NULL) {
- cpu->queued_work_first = wi;
- } else {
- cpu->queued_work_last->next = wi;
- }
- cpu->queued_work_last = wi;
- wi->next = NULL;
- wi->done = false;
- qemu_mutex_unlock(&cpu->work_mutex);
-
- qemu_cpu_kick(cpu);
+ do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
}
static void qemu_kvm_destroy_vcpu(CPUState *cpu)
{
}
-static void flush_queued_work(CPUState *cpu)
-{
- struct qemu_work_item *wi;
-
- if (cpu->queued_work_first == NULL) {
- return;
- }
-
- qemu_mutex_lock(&cpu->work_mutex);
- while (cpu->queued_work_first != NULL) {
- wi = cpu->queued_work_first;
- cpu->queued_work_first = wi->next;
- if (!cpu->queued_work_first) {
- cpu->queued_work_last = NULL;
- }
- qemu_mutex_unlock(&cpu->work_mutex);
- wi->func(wi->data);
- qemu_mutex_lock(&cpu->work_mutex);
- if (wi->free) {
- g_free(wi);
- } else {
- atomic_mb_set(&wi->done, true);
- }
- }
- qemu_mutex_unlock(&cpu->work_mutex);
- qemu_cond_broadcast(&qemu_work_cond);
-}
-
static void qemu_wait_io_event_common(CPUState *cpu)
{
+ atomic_mb_set(&cpu->thread_kicked, false);
if (cpu->stop) {
cpu->stop = false;
cpu->stopped = true;
qemu_cond_broadcast(&qemu_pause_cond);
}
- flush_queued_work(cpu);
- cpu->thread_kicked = false;
+ process_queued_cpu_work(cpu);
+}
+
+static bool qemu_tcg_should_sleep(CPUState *cpu)
+{
+ if (mttcg_enabled) {
+ return cpu_thread_is_idle(cpu);
+ } else {
+ return all_cpu_threads_idle();
+ }
}
static void qemu_tcg_wait_io_event(CPUState *cpu)
{
- while (all_cpu_threads_idle()) {
+ while (qemu_tcg_should_sleep(cpu)) {
+ stop_tcg_kick_timer();
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
- while (iothread_requesting_mutex) {
- qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
- }
+ start_tcg_kick_timer();
- CPU_FOREACH(cpu) {
- qemu_wait_io_event_common(cpu);
- }
+ qemu_wait_io_event_common(cpu);
}
static void qemu_kvm_wait_io_event(CPUState *cpu)
qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
- qemu_kvm_eat_signals(cpu);
qemu_wait_io_event_common(cpu);
}
exit(1);
}
- qemu_kvm_init_cpu_signals(cpu);
+ kvm_init_cpu_signals(cpu);
/* signal CPU creation */
cpu->created = true;
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
cpu->can_do_io = 1;
+ current_cpu = cpu;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
- current_cpu = cpu;
while (1) {
- current_cpu = NULL;
qemu_mutex_unlock_iothread();
do {
int sig;
exit(1);
}
qemu_mutex_lock_iothread();
- current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
#endif
}
-static void tcg_exec_all(void);
+static int64_t tcg_get_icount_limit(void)
+{
+ int64_t deadline;
-static void *qemu_tcg_cpu_thread_fn(void *arg)
+ if (replay_mode != REPLAY_MODE_PLAY) {
+ deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+
+ /* Maintain prior (possibly buggy) behaviour where if no deadline
+ * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
+ * INT32_MAX nanoseconds ahead, we still use INT32_MAX
+ * nanoseconds.
+ */
+ if ((deadline < 0) || (deadline > INT32_MAX)) {
+ deadline = INT32_MAX;
+ }
+
+ return qemu_icount_round(deadline);
+ } else {
+ return replay_get_instructions();
+ }
+}
+
+static void handle_icount_deadline(void)
+{
+ if (use_icount) {
+ int64_t deadline =
+ qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+
+ if (deadline == 0) {
+ qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+ }
+ }
+}
+
+static int tcg_cpu_exec(CPUState *cpu)
+{
+ int ret;
+#ifdef CONFIG_PROFILER
+ int64_t ti;
+#endif
+
+#ifdef CONFIG_PROFILER
+ ti = profile_getclock();
+#endif
+ if (use_icount) {
+ int64_t count;
+ int decr;
+ timers_state.qemu_icount -= (cpu->icount_decr.u16.low
+ + cpu->icount_extra);
+ cpu->icount_decr.u16.low = 0;
+ cpu->icount_extra = 0;
+ count = tcg_get_icount_limit();
+ timers_state.qemu_icount += count;
+ decr = (count > 0xffff) ? 0xffff : count;
+ count -= decr;
+ cpu->icount_decr.u16.low = decr;
+ cpu->icount_extra = count;
+ }
+ qemu_mutex_unlock_iothread();
+ cpu_exec_start(cpu);
+ ret = cpu_exec(cpu);
+ cpu_exec_end(cpu);
+ qemu_mutex_lock_iothread();
+#ifdef CONFIG_PROFILER
+ tcg_time += profile_getclock() - ti;
+#endif
+ if (use_icount) {
+ /* Fold pending instructions back into the
+ instruction counter, and clear the interrupt flag. */
+ timers_state.qemu_icount -= (cpu->icount_decr.u16.low
+ + cpu->icount_extra);
+ cpu->icount_decr.u32 = 0;
+ cpu->icount_extra = 0;
+ replay_account_executed_instructions();
+ }
+ return ret;
+}
+
+/* Destroy any remaining vCPUs which have been unplugged and have
+ * finished running
+ */
+static void deal_with_unplugged_cpus(void)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu) {
+ if (cpu->unplug && !cpu_can_run(cpu)) {
+ qemu_tcg_destroy_vcpu(cpu);
+ cpu->created = false;
+ qemu_cond_signal(&qemu_cpu_cond);
+ break;
+ }
+ }
+}
+
+/* Single-threaded TCG
+ *
+ * In the single-threaded case each vCPU is simulated in turn. If
+ * there is more than a single vCPU we create a simple timer to kick
+ * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
+ * This is done explicitly rather than relying on side-effects
+ * elsewhere.
+ */
+
+static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
- CPUState *remove_cpu = NULL;
rcu_register_thread();
/* process any pending work */
CPU_FOREACH(cpu) {
+ current_cpu = cpu;
qemu_wait_io_event_common(cpu);
}
}
+ start_tcg_kick_timer();
+
+ cpu = first_cpu;
+
/* process any pending work */
- atomic_mb_set(&exit_request, 1);
+ cpu->exit_request = 1;
while (1) {
- tcg_exec_all();
+ /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
+ qemu_account_warp_timer();
- if (use_icount) {
- int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+ if (!cpu) {
+ cpu = first_cpu;
+ }
- if (deadline == 0) {
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+ while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
+
+ atomic_mb_set(&tcg_current_rr_cpu, cpu);
+ current_cpu = cpu;
+
+ qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
+ (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
+
+ if (cpu_can_run(cpu)) {
+ int r;
+ r = tcg_cpu_exec(cpu);
+ if (r == EXCP_DEBUG) {
+ cpu_handle_guest_debug(cpu);
+ break;
+ } else if (r == EXCP_ATOMIC) {
+ qemu_mutex_unlock_iothread();
+ cpu_exec_step_atomic(cpu);
+ qemu_mutex_lock_iothread();
+ break;
+ }
+ } else if (cpu->stop) {
+ if (cpu->unplug) {
+ cpu = CPU_NEXT(cpu);
+ }
+ break;
}
+
+ cpu = CPU_NEXT(cpu);
+ } /* while (cpu && !cpu->exit_request).. */
+
+ /* Does not need atomic_mb_set because a spurious wakeup is okay. */
+ atomic_set(&tcg_current_rr_cpu, NULL);
+
+ if (cpu && cpu->exit_request) {
+ atomic_mb_set(&cpu->exit_request, 0);
}
- qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
- CPU_FOREACH(cpu) {
- if (cpu->unplug && !cpu_can_run(cpu)) {
- remove_cpu = cpu;
- break;
+
+ handle_icount_deadline();
+
+ qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
+ deal_with_unplugged_cpus();
+ }
+
+ return NULL;
+}
+
+static void *qemu_hax_cpu_thread_fn(void *arg)
+{
+ CPUState *cpu = arg;
+ int r;
+ qemu_thread_get_self(cpu->thread);
+ qemu_mutex_lock(&qemu_global_mutex);
+
+ cpu->thread_id = qemu_get_thread_id();
+ cpu->created = true;
+ cpu->halted = 0;
+ current_cpu = cpu;
+
+ hax_init_vcpu(cpu);
+ qemu_cond_signal(&qemu_cpu_cond);
+
+ while (1) {
+ if (cpu_can_run(cpu)) {
+ r = hax_smp_cpu_exec(cpu);
+ if (r == EXCP_DEBUG) {
+ cpu_handle_guest_debug(cpu);
}
}
- if (remove_cpu) {
- qemu_tcg_destroy_vcpu(remove_cpu);
- cpu->created = false;
- qemu_cond_signal(&qemu_cpu_cond);
- remove_cpu = NULL;
+
+ while (cpu_thread_is_idle(cpu)) {
+ qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
+ }
+#ifdef _WIN32
+ SleepEx(0, TRUE);
+#endif
+ qemu_wait_io_event_common(cpu);
+ }
+ return NULL;
+}
+
+#ifdef _WIN32
+static void CALLBACK dummy_apc_func(ULONG_PTR unused)
+{
+}
+#endif
+
+/* Multi-threaded TCG
+ *
+ * In the multi-threaded case each vCPU has its own thread. The TLS
+ * variable current_cpu can be used deep in the code to find the
+ * current CPUState for a given thread.
+ */
+
+static void *qemu_tcg_cpu_thread_fn(void *arg)
+{
+ CPUState *cpu = arg;
+
+ rcu_register_thread();
+
+ qemu_mutex_lock_iothread();
+ qemu_thread_get_self(cpu->thread);
+
+ cpu->thread_id = qemu_get_thread_id();
+ cpu->created = true;
+ cpu->can_do_io = 1;
+ current_cpu = cpu;
+ qemu_cond_signal(&qemu_cpu_cond);
+
+ /* process any pending work */
+ cpu->exit_request = 1;
+
+ while (1) {
+ if (cpu_can_run(cpu)) {
+ int r;
+ r = tcg_cpu_exec(cpu);
+ switch (r) {
+ case EXCP_DEBUG:
+ cpu_handle_guest_debug(cpu);
+ break;
+ case EXCP_HALTED:
+ /* during start-up the vCPU is reset and the thread is
+ * kicked several times. If we don't ensure we go back
+ * to sleep in the halted state we won't cleanly
+ * start-up when the vCPU is enabled.
+ *
+ * cpu->halted should ensure we sleep in wait_io_event
+ */
+ g_assert(cpu->halted);
+ break;
+ case EXCP_ATOMIC:
+ qemu_mutex_unlock_iothread();
+ cpu_exec_step_atomic(cpu);
+ qemu_mutex_lock_iothread();
+ default:
+ /* Ignore everything else? */
+ break;
+ }
}
+
+ handle_icount_deadline();
+
+ atomic_mb_set(&cpu->exit_request, 0);
+ qemu_tcg_wait_io_event(cpu);
}
return NULL;
exit(1);
}
#else /* _WIN32 */
- abort();
-#endif
-}
-
-static void qemu_cpu_kick_no_halt(void)
-{
- CPUState *cpu;
- /* Ensure whatever caused the exit has reached the CPU threads before
- * writing exit_request.
- */
- atomic_mb_set(&exit_request, 1);
- cpu = atomic_mb_read(&tcg_current_cpu);
- if (cpu) {
- cpu_exit(cpu);
+ if (!qemu_cpu_is_self(cpu)) {
+ if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
+ fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
+ __func__, GetLastError());
+ exit(1);
+ }
}
+#endif
}
void qemu_cpu_kick(CPUState *cpu)
{
qemu_cond_broadcast(cpu->halt_cond);
if (tcg_enabled()) {
- qemu_cpu_kick_no_halt();
+ cpu_exit(cpu);
+ /* NOP unless doing single-thread RR */
+ qemu_cpu_kick_rr_cpu();
} else {
+ if (hax_enabled()) {
+ /*
+ * FIXME: race condition with the exit_request check in
+ * hax_vcpu_hax_exec
+ */
+ cpu->exit_request = 1;
+ }
qemu_cpu_kick_thread(cpu);
}
}
void qemu_mutex_lock_iothread(void)
{
- atomic_inc(&iothread_requesting_mutex);
- /* In the simple case there is no need to bump the VCPU thread out of
- * TCG code execution.
- */
- if (!tcg_enabled() || qemu_in_vcpu_thread() ||
- !first_cpu || !first_cpu->created) {
- qemu_mutex_lock(&qemu_global_mutex);
- atomic_dec(&iothread_requesting_mutex);
- } else {
- if (qemu_mutex_trylock(&qemu_global_mutex)) {
- qemu_cpu_kick_no_halt();
- qemu_mutex_lock(&qemu_global_mutex);
- }
- atomic_dec(&iothread_requesting_mutex);
- qemu_cond_broadcast(&qemu_io_proceeded_cond);
- }
+ g_assert(!qemu_mutex_iothread_locked());
+ qemu_mutex_lock(&qemu_global_mutex);
iothread_locked = true;
}
void qemu_mutex_unlock_iothread(void)
{
+ g_assert(qemu_mutex_iothread_locked());
iothread_locked = false;
qemu_mutex_unlock(&qemu_global_mutex);
}
-static int all_vcpus_paused(void)
+static bool all_vcpus_paused(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (!cpu->stopped) {
- return 0;
+ return false;
}
}
- return 1;
+ return true;
}
void pause_all_vcpus(void)
if (qemu_in_vcpu_thread()) {
cpu_stop_current();
- if (!kvm_enabled()) {
- CPU_FOREACH(cpu) {
- cpu->stop = false;
- cpu->stopped = true;
- }
- return;
- }
}
while (!all_vcpus_paused()) {
static void qemu_tcg_init_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
- static QemuCond *tcg_halt_cond;
- static QemuThread *tcg_cpu_thread;
+ static QemuCond *single_tcg_halt_cond;
+ static QemuThread *single_tcg_cpu_thread;
- /* share a single thread for all cpus with TCG */
- if (!tcg_cpu_thread) {
+ if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
- tcg_halt_cond = cpu->halt_cond;
- snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
+
+ if (qemu_tcg_mttcg_enabled()) {
+ /* create a thread per vCPU with TCG (MTTCG) */
+ parallel_cpus = true;
+ snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
cpu->cpu_index);
- qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
- cpu, QEMU_THREAD_JOINABLE);
+
+ qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE);
+
+ } else {
+ /* share a single thread for all cpus with TCG */
+ snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
+ qemu_thread_create(cpu->thread, thread_name,
+ qemu_tcg_rr_cpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE);
+
+ single_tcg_halt_cond = cpu->halt_cond;
+ single_tcg_cpu_thread = cpu->thread;
+ }
#ifdef _WIN32
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
while (!cpu->created) {
qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
- tcg_cpu_thread = cpu->thread;
} else {
- cpu->thread = tcg_cpu_thread;
- cpu->halt_cond = tcg_halt_cond;
+ /* For non-MTTCG cases we share the thread */
+ cpu->thread = single_tcg_cpu_thread;
+ cpu->halt_cond = single_tcg_halt_cond;
+ }
+}
+
+static void qemu_hax_start_vcpu(CPUState *cpu)
+{
+ char thread_name[VCPU_THREAD_NAME_SIZE];
+
+ cpu->thread = g_malloc0(sizeof(QemuThread));
+ cpu->halt_cond = g_malloc0(sizeof(QemuCond));
+ qemu_cond_init(cpu->halt_cond);
+
+ snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
+ cpu->cpu_index);
+ qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
+ cpu, QEMU_THREAD_JOINABLE);
+#ifdef _WIN32
+ cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
+ while (!cpu->created) {
+ qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
if (kvm_enabled()) {
qemu_kvm_start_vcpu(cpu);
+ } else if (hax_enabled()) {
+ qemu_hax_start_vcpu(cpu);
} else if (tcg_enabled()) {
qemu_tcg_init_vcpu(cpu);
} else {
return do_vm_stop(state);
}
-/* does a state transition even if the VM is already stopped,
- current state is forgotten forever */
-int vm_stop_force_state(RunState state)
+/**
+ * Prepare for (re)starting the VM.
+ * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
+ * running or in case of an error condition), 0 otherwise.
+ */
+int vm_prepare_start(void)
{
- if (runstate_is_running()) {
- return vm_stop(state);
- } else {
- runstate_set(state);
+ RunState requested;
+ int res = 0;
- bdrv_drain_all();
- /* Make sure to return an error if the flush in a previous vm_stop()
- * failed. */
- return blk_flush_all();
+ qemu_vmstop_requested(&requested);
+ if (runstate_is_running() && requested == RUN_STATE__MAX) {
+ return -1;
}
-}
-
-static int64_t tcg_get_icount_limit(void)
-{
- int64_t deadline;
- if (replay_mode != REPLAY_MODE_PLAY) {
- deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
-
- /* Maintain prior (possibly buggy) behaviour where if no deadline
- * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
- * INT32_MAX nanoseconds ahead, we still use INT32_MAX
- * nanoseconds.
- */
- if ((deadline < 0) || (deadline > INT32_MAX)) {
- deadline = INT32_MAX;
- }
-
- return qemu_icount_round(deadline);
+ /* Ensure that a STOP/RESUME pair of events is emitted if a
+ * vmstop request was pending. The BLOCK_IO_ERROR event, for
+ * example, according to documentation is always followed by
+ * the STOP event.
+ */
+ if (runstate_is_running()) {
+ qapi_event_send_stop(&error_abort);
+ res = -1;
} else {
- return replay_get_instructions();
+ replay_enable_events();
+ cpu_enable_ticks();
+ runstate_set(RUN_STATE_RUNNING);
+ vm_state_notify(1, RUN_STATE_RUNNING);
}
+
+ /* We are sending this now, but the CPUs will be resumed shortly later */
+ qapi_event_send_resume(&error_abort);
+ return res;
}
-static int tcg_cpu_exec(CPUState *cpu)
+void vm_start(void)
{
- int ret;
-#ifdef CONFIG_PROFILER
- int64_t ti;
-#endif
-
-#ifdef CONFIG_PROFILER
- ti = profile_getclock();
-#endif
- if (use_icount) {
- int64_t count;
- int decr;
- timers_state.qemu_icount -= (cpu->icount_decr.u16.low
- + cpu->icount_extra);
- cpu->icount_decr.u16.low = 0;
- cpu->icount_extra = 0;
- count = tcg_get_icount_limit();
- timers_state.qemu_icount += count;
- decr = (count > 0xffff) ? 0xffff : count;
- count -= decr;
- cpu->icount_decr.u16.low = decr;
- cpu->icount_extra = count;
+ if (!vm_prepare_start()) {
+ resume_all_vcpus();
}
- ret = cpu_exec(cpu);
-#ifdef CONFIG_PROFILER
- tcg_time += profile_getclock() - ti;
-#endif
- if (use_icount) {
- /* Fold pending instructions back into the
- instruction counter, and clear the interrupt flag. */
- timers_state.qemu_icount -= (cpu->icount_decr.u16.low
- + cpu->icount_extra);
- cpu->icount_decr.u32 = 0;
- cpu->icount_extra = 0;
- replay_account_executed_instructions();
- }
- return ret;
}
-static void tcg_exec_all(void)
+/* does a state transition even if the VM is already stopped,
+ current state is forgotten forever */
+int vm_stop_force_state(RunState state)
{
- int r;
-
- /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
- qemu_account_warp_timer();
-
- if (next_cpu == NULL) {
- next_cpu = first_cpu;
- }
- for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
- CPUState *cpu = next_cpu;
-
- qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
- (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
+ if (runstate_is_running()) {
+ return vm_stop(state);
+ } else {
+ runstate_set(state);
- if (cpu_can_run(cpu)) {
- r = tcg_cpu_exec(cpu);
- if (r == EXCP_DEBUG) {
- cpu_handle_guest_debug(cpu);
- break;
- }
- } else if (cpu->stop || cpu->stopped) {
- if (cpu->unplug) {
- next_cpu = CPU_NEXT(cpu);
- }
- break;
- }
+ bdrv_drain_all();
+ /* Make sure to return an error if the flush in a previous vm_stop()
+ * failed. */
+ return bdrv_flush_all();
}
-
- /* Pairs with smp_wmb in qemu_cpu_kick. */
- atomic_mb_set(&exit_request, 0);
}
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)