/* Needed early for CONFIG_BSD etc. */
#include "qemu/osdep.h"
-
+#include "qemu-common.h"
+#include "cpu.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
#include "sysemu/dma.h"
#include "sysemu/kvm.h"
#include "qmp-commands.h"
+#include "exec/exec-all.h"
#include "qemu/thread.h"
#include "sysemu/cpus.h"
#endif /* CONFIG_LINUX */
-static CPUState *next_cpu;
int64_t max_delay;
int64_t max_advance;
return icount << icount_time_shift;
}
-/* return the host CPU cycle counter and handle stop/restart */
-/* Caller must hold the BQL */
+/* return the time elapsed in VM between vm_start and vm_stop. Unless
+ * icount is active, cpu_get_ticks() uses units of the host CPU cycle
+ * counter.
+ *
+ * Caller must hold the BQL
+ */
int64_t cpu_get_ticks(void)
{
int64_t ticks;
static int64_t cpu_get_clock_locked(void)
{
- int64_t ticks;
+ int64_t time;
- ticks = timers_state.cpu_clock_offset;
+ time = timers_state.cpu_clock_offset;
if (timers_state.cpu_ticks_enabled) {
- ticks += get_clock();
+ time += get_clock();
}
- return ticks;
+ return time;
}
-/* return the host CPU monotonic timer and handle stop/restart */
+/* Return the monotonic time elapsed in VM, i.e.,
+ * the time between vm_start and vm_stop
+ */
int64_t cpu_get_clock(void)
{
int64_t ti;
}
/* enable cpu_get_ticks()
- * Caller must hold BQL which server as mutex for vm_clock_seqlock.
+ * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
*/
void cpu_enable_ticks(void)
{
/* Here, the really thing protected by seqlock is cpu_clock_offset. */
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (!timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
timers_state.cpu_clock_offset -= get_clock();
timers_state.cpu_ticks_enabled = 1;
}
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
}
/* disable cpu_get_ticks() : the clock is stopped. You must not call
* cpu_get_ticks() after that.
- * Caller must hold BQL which server as mutex for vm_clock_seqlock.
+ * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
*/
void cpu_disable_ticks(void)
{
/* Here, the really thing protected by seqlock is cpu_clock_offset. */
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset += cpu_get_host_ticks();
timers_state.cpu_clock_offset = cpu_get_clock_locked();
timers_state.cpu_ticks_enabled = 0;
}
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
}
/* Correlation between real and virtual time is always going to be
return;
}
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
cur_time = cpu_get_clock_locked();
cur_icount = cpu_get_icount_locked();
last_delta = delta;
timers_state.qemu_icount_bias = cur_icount
- (timers_state.qemu_icount << icount_time_shift);
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
}
static void icount_adjust_rt(void *opaque)
return;
}
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (runstate_is_running()) {
int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
cpu_get_clock_locked());
timers_state.qemu_icount_bias += warp_delta;
}
vm_clock_warp_start = -1;
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
timers_state.qemu_icount_bias += warp;
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
* It is useful when we want a deterministic execution time,
* isolated from host latencies.
*/
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
timers_state.qemu_icount_bias += deadline;
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
} else {
/*
* you will not be sending network packets continuously instead of
* every 100ms.
*/
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
vm_clock_warp_start = clock;
}
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
timer_mod_anticipate(icount_warp_timer, clock + deadline);
}
} else if (deadline == 0) {
}
};
-static void cpu_throttle_thread(void *opaque)
+static void cpu_throttle_thread(CPUState *cpu, void *opaque)
{
- CPUState *cpu = opaque;
double pct;
double throttle_ratio;
long sleeptime_ns;
}
CPU_FOREACH(cpu) {
if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
- async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
+ async_run_on_cpu(cpu, cpu_throttle_thread, NULL);
}
}
void cpu_ticks_init(void)
{
- seqlock_init(&timers_state.vm_clock_seqlock, NULL);
+ seqlock_init(&timers_state.vm_clock_seqlock);
vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
cpu_throttle_timer_tick, NULL);
}
bdrv_drain_all();
- ret = blk_flush_all();
+ replay_disable_events();
+ ret = bdrv_flush_all();
return ret;
}
raise(SIGBUS);
sigemptyset(&set);
sigaddset(&set, SIGBUS);
- sigprocmask(SIG_UNBLOCK, &set, NULL);
+ pthread_sigmask(SIG_UNBLOCK, &set, NULL);
}
perror("Failed to re-raise SIGBUS!\n");
abort();
static QemuCond qemu_cpu_cond;
/* system init */
static QemuCond qemu_pause_cond;
-static QemuCond qemu_work_cond;
void qemu_init_cpu_loop(void)
{
qemu_init_sigbus();
qemu_cond_init(&qemu_cpu_cond);
qemu_cond_init(&qemu_pause_cond);
- qemu_cond_init(&qemu_work_cond);
qemu_cond_init(&qemu_io_proceeded_cond);
qemu_mutex_init(&qemu_global_mutex);
qemu_thread_get_self(&io_thread);
}
-void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
+void run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
{
- struct qemu_work_item wi;
-
- if (qemu_cpu_is_self(cpu)) {
- func(data);
- return;
- }
-
- wi.func = func;
- wi.data = data;
- wi.free = false;
-
- qemu_mutex_lock(&cpu->work_mutex);
- if (cpu->queued_work_first == NULL) {
- cpu->queued_work_first = &wi;
- } else {
- cpu->queued_work_last->next = &wi;
- }
- cpu->queued_work_last = &wi;
- wi.next = NULL;
- wi.done = false;
- qemu_mutex_unlock(&cpu->work_mutex);
-
- qemu_cpu_kick(cpu);
- while (!atomic_mb_read(&wi.done)) {
- CPUState *self_cpu = current_cpu;
-
- qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
- current_cpu = self_cpu;
- }
+ do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
}
-void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
+static void qemu_kvm_destroy_vcpu(CPUState *cpu)
{
- struct qemu_work_item *wi;
-
- if (qemu_cpu_is_self(cpu)) {
- func(data);
- return;
- }
-
- wi = g_malloc0(sizeof(struct qemu_work_item));
- wi->func = func;
- wi->data = data;
- wi->free = true;
-
- qemu_mutex_lock(&cpu->work_mutex);
- if (cpu->queued_work_first == NULL) {
- cpu->queued_work_first = wi;
- } else {
- cpu->queued_work_last->next = wi;
+ if (kvm_destroy_vcpu(cpu) < 0) {
+ error_report("kvm_destroy_vcpu failed");
+ exit(EXIT_FAILURE);
}
- cpu->queued_work_last = wi;
- wi->next = NULL;
- wi->done = false;
- qemu_mutex_unlock(&cpu->work_mutex);
-
- qemu_cpu_kick(cpu);
}
-static void flush_queued_work(CPUState *cpu)
+static void qemu_tcg_destroy_vcpu(CPUState *cpu)
{
- struct qemu_work_item *wi;
-
- if (cpu->queued_work_first == NULL) {
- return;
- }
-
- qemu_mutex_lock(&cpu->work_mutex);
- while (cpu->queued_work_first != NULL) {
- wi = cpu->queued_work_first;
- cpu->queued_work_first = wi->next;
- if (!cpu->queued_work_first) {
- cpu->queued_work_last = NULL;
- }
- qemu_mutex_unlock(&cpu->work_mutex);
- wi->func(wi->data);
- qemu_mutex_lock(&cpu->work_mutex);
- if (wi->free) {
- g_free(wi);
- } else {
- atomic_mb_set(&wi->done, true);
- }
- }
- qemu_mutex_unlock(&cpu->work_mutex);
- qemu_cond_broadcast(&qemu_work_cond);
}
static void qemu_wait_io_event_common(CPUState *cpu)
cpu->stopped = true;
qemu_cond_broadcast(&qemu_pause_cond);
}
- flush_queued_work(cpu);
+ process_queued_cpu_work(cpu);
cpu->thread_kicked = false;
}
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
- while (1) {
+ do {
if (cpu_can_run(cpu)) {
r = kvm_cpu_exec(cpu);
if (r == EXCP_DEBUG) {
}
}
qemu_kvm_wait_io_event(cpu);
- }
+ } while (!cpu->unplug || cpu_can_run(cpu));
+ qemu_kvm_destroy_vcpu(cpu);
+ cpu->created = false;
+ qemu_cond_signal(&qemu_cpu_cond);
+ qemu_mutex_unlock_iothread();
return NULL;
}
#endif
}
-static void tcg_exec_all(void);
+static int64_t tcg_get_icount_limit(void)
+{
+ int64_t deadline;
+
+ if (replay_mode != REPLAY_MODE_PLAY) {
+ deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+
+ /* Maintain prior (possibly buggy) behaviour where if no deadline
+ * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
+ * INT32_MAX nanoseconds ahead, we still use INT32_MAX
+ * nanoseconds.
+ */
+ if ((deadline < 0) || (deadline > INT32_MAX)) {
+ deadline = INT32_MAX;
+ }
+
+ return qemu_icount_round(deadline);
+ } else {
+ return replay_get_instructions();
+ }
+}
+
+static void handle_icount_deadline(void)
+{
+ if (use_icount) {
+ int64_t deadline =
+ qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+
+ if (deadline == 0) {
+ qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+ }
+ }
+}
+
+static int tcg_cpu_exec(CPUState *cpu)
+{
+ int ret;
+#ifdef CONFIG_PROFILER
+ int64_t ti;
+#endif
+
+#ifdef CONFIG_PROFILER
+ ti = profile_getclock();
+#endif
+ if (use_icount) {
+ int64_t count;
+ int decr;
+ timers_state.qemu_icount -= (cpu->icount_decr.u16.low
+ + cpu->icount_extra);
+ cpu->icount_decr.u16.low = 0;
+ cpu->icount_extra = 0;
+ count = tcg_get_icount_limit();
+ timers_state.qemu_icount += count;
+ decr = (count > 0xffff) ? 0xffff : count;
+ count -= decr;
+ cpu->icount_decr.u16.low = decr;
+ cpu->icount_extra = count;
+ }
+ cpu_exec_start(cpu);
+ ret = cpu_exec(cpu);
+ cpu_exec_end(cpu);
+#ifdef CONFIG_PROFILER
+ tcg_time += profile_getclock() - ti;
+#endif
+ if (use_icount) {
+ /* Fold pending instructions back into the
+ instruction counter, and clear the interrupt flag. */
+ timers_state.qemu_icount -= (cpu->icount_decr.u16.low
+ + cpu->icount_extra);
+ cpu->icount_decr.u32 = 0;
+ cpu->icount_extra = 0;
+ replay_account_executed_instructions();
+ }
+ return ret;
+}
+
+/* Destroy any remaining vCPUs which have been unplugged and have
+ * finished running
+ */
+static void deal_with_unplugged_cpus(void)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu) {
+ if (cpu->unplug && !cpu_can_run(cpu)) {
+ qemu_tcg_destroy_vcpu(cpu);
+ cpu->created = false;
+ qemu_cond_signal(&qemu_cpu_cond);
+ break;
+ }
+ }
+}
static void *qemu_tcg_cpu_thread_fn(void *arg)
{
/* process any pending work */
atomic_mb_set(&exit_request, 1);
+ cpu = first_cpu;
+
while (1) {
- tcg_exec_all();
+ /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
+ qemu_account_warp_timer();
- if (use_icount) {
- int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
+ if (!cpu) {
+ cpu = first_cpu;
+ }
- if (deadline == 0) {
- qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+ for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
+
+ qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
+ (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
+
+ if (cpu_can_run(cpu)) {
+ int r;
+ r = tcg_cpu_exec(cpu);
+ if (r == EXCP_DEBUG) {
+ cpu_handle_guest_debug(cpu);
+ break;
+ }
+ } else if (cpu->stop || cpu->stopped) {
+ if (cpu->unplug) {
+ cpu = CPU_NEXT(cpu);
+ }
+ break;
}
- }
+
+ } /* for cpu.. */
+
+ /* Pairs with smp_wmb in qemu_cpu_kick. */
+ atomic_mb_set(&exit_request, 0);
+
+ handle_icount_deadline();
+
qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
+ deal_with_unplugged_cpus();
}
return NULL;
qemu_mutex_unlock(&qemu_global_mutex);
}
-static int all_vcpus_paused(void)
+static bool all_vcpus_paused(void)
{
CPUState *cpu;
CPU_FOREACH(cpu) {
if (!cpu->stopped) {
- return 0;
+ return false;
}
}
- return 1;
+ return true;
}
void pause_all_vcpus(void)
}
}
+void cpu_remove(CPUState *cpu)
+{
+ cpu->stop = true;
+ cpu->unplug = true;
+ qemu_cpu_kick(cpu);
+}
+
+void cpu_remove_sync(CPUState *cpu)
+{
+ cpu_remove(cpu);
+ while (cpu->created) {
+ qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
+ }
+}
+
/* For temporary buffers for forming a name */
#define VCPU_THREAD_NAME_SIZE 16
bdrv_drain_all();
/* Make sure to return an error if the flush in a previous vm_stop()
* failed. */
- return blk_flush_all();
- }
-}
-
-static int64_t tcg_get_icount_limit(void)
-{
- int64_t deadline;
-
- if (replay_mode != REPLAY_MODE_PLAY) {
- deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
-
- /* Maintain prior (possibly buggy) behaviour where if no deadline
- * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
- * INT32_MAX nanoseconds ahead, we still use INT32_MAX
- * nanoseconds.
- */
- if ((deadline < 0) || (deadline > INT32_MAX)) {
- deadline = INT32_MAX;
- }
-
- return qemu_icount_round(deadline);
- } else {
- return replay_get_instructions();
- }
-}
-
-static int tcg_cpu_exec(CPUState *cpu)
-{
- int ret;
-#ifdef CONFIG_PROFILER
- int64_t ti;
-#endif
-
-#ifdef CONFIG_PROFILER
- ti = profile_getclock();
-#endif
- if (use_icount) {
- int64_t count;
- int decr;
- timers_state.qemu_icount -= (cpu->icount_decr.u16.low
- + cpu->icount_extra);
- cpu->icount_decr.u16.low = 0;
- cpu->icount_extra = 0;
- count = tcg_get_icount_limit();
- timers_state.qemu_icount += count;
- decr = (count > 0xffff) ? 0xffff : count;
- count -= decr;
- cpu->icount_decr.u16.low = decr;
- cpu->icount_extra = count;
- }
- ret = cpu_exec(cpu);
-#ifdef CONFIG_PROFILER
- tcg_time += profile_getclock() - ti;
-#endif
- if (use_icount) {
- /* Fold pending instructions back into the
- instruction counter, and clear the interrupt flag. */
- timers_state.qemu_icount -= (cpu->icount_decr.u16.low
- + cpu->icount_extra);
- cpu->icount_decr.u32 = 0;
- cpu->icount_extra = 0;
- replay_account_executed_instructions();
- }
- return ret;
-}
-
-static void tcg_exec_all(void)
-{
- int r;
-
- /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
- qemu_account_warp_timer();
-
- if (next_cpu == NULL) {
- next_cpu = first_cpu;
- }
- for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
- CPUState *cpu = next_cpu;
-
- qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
- (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
-
- if (cpu_can_run(cpu)) {
- r = tcg_cpu_exec(cpu);
- if (r == EXCP_DEBUG) {
- cpu_handle_guest_debug(cpu);
- break;
- }
- } else if (cpu->stop || cpu->stopped) {
- break;
- }
+ return bdrv_flush_all();
}
-
- /* Pairs with smp_wmb in qemu_cpu_kick. */
- atomic_mb_set(&exit_request, 0);
}
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
void qmp_inject_nmi(Error **errp)
{
-#if defined(TARGET_I386)
- CPUState *cs;
-
- CPU_FOREACH(cs) {
- X86CPU *cpu = X86_CPU(cs);
-
- if (!cpu->apic_state) {
- cpu_interrupt(cs, CPU_INTERRUPT_NMI);
- } else {
- apic_deliver_nmi(cpu->apic_state);
- }
- }
-#else
nmi_monitor_handle(monitor_get_cpu_index(), errp);
-#endif
}
void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)