#include "qapi/qapi-events-run-state.h"
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
+#include "qemu/qemu-print.h"
#include "sysemu/sysemu.h"
#include "sysemu/block-backend.h"
#include "exec/gdbstub.h"
/* Protected by TimersState seqlock */
static bool icount_sleep = true;
-/* Conversion factor from emulated instructions to virtual clock ticks. */
-static int icount_time_shift;
/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
#define MAX_ICOUNT_SHIFT 10
int64_t cpu_ticks_prev;
int64_t cpu_ticks_offset;
- /* cpu_clock_offset can be read out of BQL, so protect it with
- * this lock.
+ /* Protect fields that can be respectively read outside the
+ * BQL, and written from multiple threads.
*/
QemuSeqLock vm_clock_seqlock;
- int64_t cpu_clock_offset;
- int32_t cpu_ticks_enabled;
- int64_t dummy;
+ QemuSpin vm_clock_lock;
+
+ int16_t cpu_ticks_enabled;
+
+ /* Conversion factor from emulated instructions to virtual clock ticks. */
+ int16_t icount_time_shift;
/* Compensate for varying guest execution speed. */
int64_t qemu_icount_bias;
+
+ int64_t vm_clock_warp_start;
+ int64_t cpu_clock_offset;
+
/* Only written by TCG thread */
int64_t qemu_icount;
+
/* for adjusting icount */
- int64_t vm_clock_warp_start;
QEMUTimer *icount_rt_timer;
QEMUTimer *icount_vm_timer;
QEMUTimer *icount_warp_timer;
error_setg(errp, "No MTTCG when icount is enabled");
} else {
#ifndef TARGET_SUPPORTS_MTTCG
- error_report("Guest not yet converted to MTTCG - "
- "you may get unexpected results");
+ warn_report("Guest not yet converted to MTTCG - "
+ "you may get unexpected results");
#endif
if (!check_tcg_memory_orders_compatible()) {
- error_report("Guest expects a stronger memory ordering "
- "than the host provides");
+ warn_report("Guest expects a stronger memory ordering "
+ "than the host provides");
error_printf("This may cause strange/hard to debug errors\n");
}
mttcg_enabled = true;
* account executed instructions. This is done by the TCG vCPU
* thread so the main-loop can see time has moved forward.
*/
-void cpu_update_icount(CPUState *cpu)
+static void cpu_update_icount_locked(CPUState *cpu)
{
int64_t executed = cpu_get_icount_executed(cpu);
cpu->icount_budget -= executed;
-#ifdef CONFIG_ATOMIC64
- atomic_set__nocheck(&timers_state.qemu_icount,
- atomic_read__nocheck(&timers_state.qemu_icount) +
- executed);
-#else /* FIXME: we need 64bit atomics to do this safely */
- timers_state.qemu_icount += executed;
-#endif
+ atomic_set_i64(&timers_state.qemu_icount,
+ timers_state.qemu_icount + executed);
}
-int64_t cpu_get_icount_raw(void)
+/*
+ * Update the global shared timer_state.qemu_icount to take into
+ * account executed instructions. This is done by the TCG vCPU
+ * thread so the main-loop can see time has moved forward.
+ */
+void cpu_update_icount(CPUState *cpu)
+{
+ seqlock_write_lock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
+ cpu_update_icount_locked(cpu);
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
+}
+
+static int64_t cpu_get_icount_raw_locked(void)
{
CPUState *cpu = current_cpu;
exit(1);
}
/* Take into account what has run */
- cpu_update_icount(cpu);
+ cpu_update_icount_locked(cpu);
}
-#ifdef CONFIG_ATOMIC64
- return atomic_read__nocheck(&timers_state.qemu_icount);
-#else /* FIXME: we need 64bit atomics to do this safely */
- return timers_state.qemu_icount;
-#endif
+ /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
+ return atomic_read_i64(&timers_state.qemu_icount);
}
-/* Return the virtual CPU time, based on the instruction counter. */
static int64_t cpu_get_icount_locked(void)
{
- int64_t icount = cpu_get_icount_raw();
- return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
+ int64_t icount = cpu_get_icount_raw_locked();
+ return atomic_read_i64(&timers_state.qemu_icount_bias) +
+ cpu_icount_to_ns(icount);
}
+int64_t cpu_get_icount_raw(void)
+{
+ int64_t icount;
+ unsigned start;
+
+ do {
+ start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+ icount = cpu_get_icount_raw_locked();
+ } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
+
+ return icount;
+}
+
+/* Return the virtual CPU time, based on the instruction counter. */
int64_t cpu_get_icount(void)
{
int64_t icount;
int64_t cpu_icount_to_ns(int64_t icount)
{
- return icount << icount_time_shift;
+ return icount << atomic_read(&timers_state.icount_time_shift);
+}
+
+static int64_t cpu_get_ticks_locked(void)
+{
+ int64_t ticks = timers_state.cpu_ticks_offset;
+ if (timers_state.cpu_ticks_enabled) {
+ ticks += cpu_get_host_ticks();
+ }
+
+ if (timers_state.cpu_ticks_prev > ticks) {
+ /* Non increasing ticks may happen if the host uses software suspend. */
+ timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
+ ticks = timers_state.cpu_ticks_prev;
+ }
+
+ timers_state.cpu_ticks_prev = ticks;
+ return ticks;
}
/* return the time elapsed in VM between vm_start and vm_stop. Unless
* icount is active, cpu_get_ticks() uses units of the host CPU cycle
* counter.
- *
- * Caller must hold the BQL
*/
int64_t cpu_get_ticks(void)
{
return cpu_get_icount();
}
- ticks = timers_state.cpu_ticks_offset;
- if (timers_state.cpu_ticks_enabled) {
- ticks += cpu_get_host_ticks();
- }
-
- if (timers_state.cpu_ticks_prev > ticks) {
- /* Note: non increasing ticks may happen if the host uses
- software suspend */
- timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
- ticks = timers_state.cpu_ticks_prev;
- }
-
- timers_state.cpu_ticks_prev = ticks;
+ qemu_spin_lock(&timers_state.vm_clock_lock);
+ ticks = cpu_get_ticks_locked();
+ qemu_spin_unlock(&timers_state.vm_clock_lock);
return ticks;
}
*/
void cpu_enable_ticks(void)
{
- /* Here, the really thing protected by seqlock is cpu_clock_offset. */
- seqlock_write_begin(&timers_state.vm_clock_seqlock);
+ seqlock_write_lock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
if (!timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
timers_state.cpu_clock_offset -= get_clock();
timers_state.cpu_ticks_enabled = 1;
}
- seqlock_write_end(&timers_state.vm_clock_seqlock);
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
}
/* disable cpu_get_ticks() : the clock is stopped. You must not call
*/
void cpu_disable_ticks(void)
{
- /* Here, the really thing protected by seqlock is cpu_clock_offset. */
- seqlock_write_begin(&timers_state.vm_clock_seqlock);
+ seqlock_write_lock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
if (timers_state.cpu_ticks_enabled) {
timers_state.cpu_ticks_offset += cpu_get_host_ticks();
timers_state.cpu_clock_offset = cpu_get_clock_locked();
timers_state.cpu_ticks_enabled = 0;
}
- seqlock_write_end(&timers_state.vm_clock_seqlock);
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
}
/* Correlation between real and virtual time is always going to be
return;
}
- seqlock_write_begin(&timers_state.vm_clock_seqlock);
+ seqlock_write_lock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
cur_time = cpu_get_clock_locked();
cur_icount = cpu_get_icount_locked();
/* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
if (delta > 0
&& last_delta + ICOUNT_WOBBLE < delta * 2
- && icount_time_shift > 0) {
+ && timers_state.icount_time_shift > 0) {
/* The guest is getting too far ahead. Slow time down. */
- icount_time_shift--;
+ atomic_set(&timers_state.icount_time_shift,
+ timers_state.icount_time_shift - 1);
}
if (delta < 0
&& last_delta - ICOUNT_WOBBLE > delta * 2
- && icount_time_shift < MAX_ICOUNT_SHIFT) {
+ && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
/* The guest is getting too far behind. Speed time up. */
- icount_time_shift++;
+ atomic_set(&timers_state.icount_time_shift,
+ timers_state.icount_time_shift + 1);
}
last_delta = delta;
- timers_state.qemu_icount_bias = cur_icount
- - (timers_state.qemu_icount << icount_time_shift);
- seqlock_write_end(&timers_state.vm_clock_seqlock);
+ atomic_set_i64(&timers_state.qemu_icount_bias,
+ cur_icount - (timers_state.qemu_icount
+ << timers_state.icount_time_shift));
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
}
static void icount_adjust_rt(void *opaque)
static int64_t qemu_icount_round(int64_t count)
{
- return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
+ int shift = atomic_read(&timers_state.icount_time_shift);
+ return (count + (1 << shift) - 1) >> shift;
}
static void icount_warp_rt(void)
return;
}
- seqlock_write_begin(&timers_state.vm_clock_seqlock);
+ seqlock_write_lock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
if (runstate_is_running()) {
- int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
- cpu_get_clock_locked());
+ int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
+ cpu_get_clock_locked());
int64_t warp_delta;
warp_delta = clock - timers_state.vm_clock_warp_start;
int64_t delta = clock - cur_icount;
warp_delta = MIN(warp_delta, delta);
}
- timers_state.qemu_icount_bias += warp_delta;
+ atomic_set_i64(&timers_state.qemu_icount_bias,
+ timers_state.qemu_icount_bias + warp_delta);
}
timers_state.vm_clock_warp_start = -1;
- seqlock_write_end(&timers_state.vm_clock_seqlock);
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
- seqlock_write_begin(&timers_state.vm_clock_seqlock);
- timers_state.qemu_icount_bias += warp;
- seqlock_write_end(&timers_state.vm_clock_seqlock);
+ seqlock_write_lock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
+ atomic_set_i64(&timers_state.qemu_icount_bias,
+ timers_state.qemu_icount_bias + warp);
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
return;
}
- /* warp clock deterministically in record/replay mode */
- if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
- return;
- }
+ if (replay_mode != REPLAY_MODE_PLAY) {
+ if (!all_cpu_threads_idle()) {
+ return;
+ }
- if (!all_cpu_threads_idle()) {
- return;
- }
+ if (qtest_enabled()) {
+ /* When testing, qtest commands advance icount. */
+ return;
+ }
- if (qtest_enabled()) {
- /* When testing, qtest commands advance icount. */
- return;
+ replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
+ } else {
+ /* warp clock deterministically in record/replay mode */
+ if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
+ /* vCPU is sleeping and warp can't be started.
+ It is probably a race condition: notification sent
+ to vCPU was processed in advance and vCPU went to sleep.
+ Therefore we have to wake it up for doing someting. */
+ if (replay_has_checkpoint()) {
+ qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+ }
+ return;
+ }
}
/* We want to use the earliest deadline from ALL vm_clocks */
* It is useful when we want a deterministic execution time,
* isolated from host latencies.
*/
- seqlock_write_begin(&timers_state.vm_clock_seqlock);
- timers_state.qemu_icount_bias += deadline;
- seqlock_write_end(&timers_state.vm_clock_seqlock);
+ seqlock_write_lock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
+ atomic_set_i64(&timers_state.qemu_icount_bias,
+ timers_state.qemu_icount_bias + deadline);
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
} else {
/*
* you will not be sending network packets continuously instead of
* every 100ms.
*/
- seqlock_write_begin(&timers_state.vm_clock_seqlock);
+ seqlock_write_lock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
if (timers_state.vm_clock_warp_start == -1
|| timers_state.vm_clock_warp_start > clock) {
timers_state.vm_clock_warp_start = clock;
}
- seqlock_write_end(&timers_state.vm_clock_seqlock);
+ seqlock_write_unlock(&timers_state.vm_clock_seqlock,
+ &timers_state.vm_clock_lock);
timer_mod_anticipate(timers_state.icount_warp_timer,
clock + deadline);
}
.minimum_version_id = 1,
.fields = (VMStateField[]) {
VMSTATE_INT64(cpu_ticks_offset, TimersState),
- VMSTATE_INT64(dummy, TimersState),
+ VMSTATE_UNUSED(8),
VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
VMSTATE_END_OF_LIST()
},
void cpu_ticks_init(void)
{
seqlock_init(&timers_state.vm_clock_seqlock);
+ qemu_spin_init(&timers_state.vm_clock_lock);
vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
cpu_throttle_timer_tick, NULL);
}
if (strcmp(option, "auto") != 0) {
errno = 0;
- icount_time_shift = strtol(option, &rem_str, 0);
+ timers_state.icount_time_shift = strtol(option, &rem_str, 0);
if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
error_setg(errp, "icount: Invalid shift value");
}
/* 125MIPS seems a reasonable initial guess at the guest speed.
It will be corrected fairly quickly anyway. */
- icount_time_shift = 3;
+ timers_state.icount_time_shift = 3;
/* Have both realtime and virtual time triggers for speed adjustment.
The realtime trigger catches emulated time passing too slowly,
if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
kick_tcg_thread, NULL);
+ }
+ if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
}
}
static void stop_tcg_kick_timer(void)
{
assert(!mttcg_enabled);
- if (tcg_kick_vcpu_timer) {
+ if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
timer_del(tcg_kick_vcpu_timer);
- tcg_kick_vcpu_timer = NULL;
}
}
fprintf(stderr, "\n");
CPU_FOREACH(cpu) {
fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
- cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
+ cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
}
va_end(ap);
abort();
runstate_set(state);
vm_state_notify(0, state);
if (send_stop) {
- qapi_event_send_stop(&error_abort);
+ qapi_event_send_stop();
}
}
process_queued_cpu_work(cpu);
}
-static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
+static void qemu_tcg_rr_wait_io_event(void)
{
+ CPUState *cpu;
+
while (all_cpu_threads_idle()) {
stop_tcg_kick_timer();
- qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
+ qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
}
start_tcg_kick_timer();
- qemu_wait_io_event_common(cpu);
+ CPU_FOREACH(cpu) {
+ qemu_wait_io_event_common(cpu);
+ }
}
static void qemu_wait_io_event(CPUState *cpu)
qemu_wait_io_event(cpu);
} while (!cpu->unplug);
+ qemu_mutex_unlock_iothread();
rcu_unregister_thread();
return NULL;
#endif
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
#ifdef CONFIG_PROFILER
- tcg_time += profile_getclock() - ti;
+ atomic_set(&tcg_ctx->prof.cpu_exec_time,
+ tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
#endif
return ret;
}
atomic_mb_set(&cpu->exit_request, 0);
}
- qemu_tcg_rr_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
+ if (use_icount && all_cpu_threads_idle()) {
+ /*
+ * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
+ * in the main_loop, wake it up in order to start the warp timer.
+ */
+ qemu_notify_event();
+ }
+
+ qemu_tcg_rr_wait_io_event();
deal_with_unplugged_cpus();
}
}
cpu->thread_kicked = true;
err = pthread_kill(cpu->thread->thread, SIG_IPI);
- if (err) {
+ if (err && err != ESRCH) {
fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
exit(1);
}
return iothread_locked;
}
-void qemu_mutex_lock_iothread(void)
+/*
+ * The BQL is taken from so many places that it is worth profiling the
+ * callers directly, instead of funneling them all through a single function.
+ */
+void qemu_mutex_lock_iothread_impl(const char *file, int line)
{
+ QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
+
g_assert(!qemu_mutex_iothread_locked());
- qemu_mutex_lock(&qemu_global_mutex);
+ bql_lock(&qemu_global_mutex, file, line);
iothread_locked = true;
}
void cpu_stop_current(void)
{
if (current_cpu) {
- qemu_cpu_stop(current_cpu, true);
+ current_cpu->stop = true;
+ cpu_exit(current_cpu);
}
}
* the STOP event.
*/
if (runstate_is_running()) {
- qapi_event_send_stop(&error_abort);
- qapi_event_send_resume(&error_abort);
+ qapi_event_send_stop();
+ qapi_event_send_resume();
return -1;
}
/* We are sending this now, but the CPUs will be resumed shortly later */
- qapi_event_send_resume(&error_abort);
+ qapi_event_send_resume();
replay_enable_events();
cpu_enable_ticks();
}
}
-void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
+void list_cpus(const char *optarg)
{
/* XXX: implement xxx_cpu_list for targets that still miss it */
#if defined(cpu_list)
- cpu_list(f, cpu_fprintf);
+ cpu_list();
#endif
}
return CPU_INFO_ARCH_X86;
case SYS_EMU_TARGET_PPC:
- case SYS_EMU_TARGET_PPCEMB:
case SYS_EMU_TARGET_PPC64:
return CPU_INFO_ARCH_PPC;
nmi_monitor_handle(monitor_get_cpu_index(), errp);
}
-void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
+void dump_drift_info(void)
{
if (!use_icount) {
return;
}
- cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
+ qemu_printf("Host - Guest clock %"PRIi64" ms\n",
(cpu_get_clock() - cpu_get_icount())/SCALE_MS);
if (icount_align_option) {
- cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
- cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
+ qemu_printf("Max guest delay %"PRIi64" ms\n",
+ -max_delay / SCALE_MS);
+ qemu_printf("Max guest advance %"PRIi64" ms\n",
+ max_advance / SCALE_MS);
} else {
- cpu_fprintf(f, "Max guest delay NA\n");
- cpu_fprintf(f, "Max guest advance NA\n");
+ qemu_printf("Max guest delay NA\n");
+ qemu_printf("Max guest advance NA\n");
}
}