Git Repo - qemu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* QEMU System Emulator
	3	*
	4	* Copyright (c) 2003-2008 Fabrice Bellard
	5	*
	6	* Permission is hereby granted, free of charge, to any person obtaining a copy
	7	* of this software and associated documentation files (the "Software"), to deal
	8	* in the Software without restriction, including without limitation the rights
	9	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	10	* copies of the Software, and to permit persons to whom the Software is
	11	* furnished to do so, subject to the following conditions:
	12	*
	13	* The above copyright notice and this permission notice shall be included in
	14	* all copies or substantial portions of the Software.
	15	*
	16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	22	* THE SOFTWARE.
	23	*/
	24
	25	/* Needed early for CONFIG_BSD etc. */
	26	#include "qemu/osdep.h"
	27	#include "qemu-common.h"
	28	#include "qemu/config-file.h"
	29	#include "cpu.h"
	30	#include "monitor/monitor.h"
	31	#include "qapi/qmp/qerror.h"
	32	#include "qemu/error-report.h"
	33	#include "sysemu/sysemu.h"
	34	#include "sysemu/block-backend.h"
	35	#include "exec/gdbstub.h"
	36	#include "sysemu/dma.h"
	37	#include "sysemu/hw_accel.h"
	38	#include "sysemu/kvm.h"
	39	#include "sysemu/hax.h"
	40	#include "sysemu/hvf.h"
	41	#include "sysemu/whpx.h"
	42	#include "qmp-commands.h"
	43	#include "exec/exec-all.h"
	44
	45	#include "qemu/thread.h"
	46	#include "sysemu/cpus.h"
	47	#include "sysemu/qtest.h"
	48	#include "qemu/main-loop.h"
	49	#include "qemu/bitmap.h"
	50	#include "qemu/seqlock.h"
	51	#include "tcg.h"
	52	#include "qapi-event.h"
	53	#include "hw/nmi.h"
	54	#include "sysemu/replay.h"
	55	#include "hw/boards.h"
	56
	57	#ifdef CONFIG_LINUX
	58
	59	#include <sys/prctl.h>
	60
	61	#ifndef PR_MCE_KILL
	62	#define PR_MCE_KILL 33
	63	#endif
	64
	65	#ifndef PR_MCE_KILL_SET
	66	#define PR_MCE_KILL_SET 1
	67	#endif
	68
	69	#ifndef PR_MCE_KILL_EARLY
	70	#define PR_MCE_KILL_EARLY 1
	71	#endif
	72
	73	#endif /* CONFIG_LINUX */
	74
	75	int64_t max_delay;
	76	int64_t max_advance;
	77
	78	/* vcpu throttling controls */
	79	static QEMUTimer *throttle_timer;
	80	static unsigned int throttle_percentage;
	81
	82	#define CPU_THROTTLE_PCT_MIN 1
	83	#define CPU_THROTTLE_PCT_MAX 99
	84	#define CPU_THROTTLE_TIMESLICE_NS 10000000
	85
	86	bool cpu_is_stopped(CPUState *cpu)
	87	{
	88	return cpu->stopped \|\| !runstate_is_running();
	89	}
	90
	91	static bool cpu_thread_is_idle(CPUState *cpu)
	92	{
	93	if (cpu->stop \|\| cpu->queued_work_first) {
	94	return false;
	95	}
	96	if (cpu_is_stopped(cpu)) {
	97	return true;
	98	}
	99	if (!cpu->halted \|\| cpu_has_work(cpu) \|\|
	100	kvm_halt_in_kernel()) {
	101	return false;
	102	}
	103	return true;
	104	}
	105
	106	static bool all_cpu_threads_idle(void)
	107	{
	108	CPUState *cpu;
	109
	110	CPU_FOREACH(cpu) {
	111	if (!cpu_thread_is_idle(cpu)) {
	112	return false;
	113	}
	114	}
	115	return true;
	116	}
	117
	118	/***********************************************************/
	119	/* guest cycle counter */
	120
	121	/* Protected by TimersState seqlock */
	122
	123	static bool icount_sleep = true;
	124	/* Conversion factor from emulated instructions to virtual clock ticks. */
	125	static int icount_time_shift;
	126	/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
	127	#define MAX_ICOUNT_SHIFT 10
	128
	129	typedef struct TimersState {
	130	/* Protected by BQL. */
	131	int64_t cpu_ticks_prev;
	132	int64_t cpu_ticks_offset;
	133
	134	/* cpu_clock_offset can be read out of BQL, so protect it with
	135	* this lock.
	136	*/
	137	QemuSeqLock vm_clock_seqlock;
	138	int64_t cpu_clock_offset;
	139	int32_t cpu_ticks_enabled;
	140	int64_t dummy;
	141
	142	/* Compensate for varying guest execution speed. */
	143	int64_t qemu_icount_bias;
	144	/* Only written by TCG thread */
	145	int64_t qemu_icount;
	146	/* for adjusting icount */
	147	int64_t vm_clock_warp_start;
	148	QEMUTimer *icount_rt_timer;
	149	QEMUTimer *icount_vm_timer;
	150	QEMUTimer *icount_warp_timer;
	151	} TimersState;
	152
	153	static TimersState timers_state;
	154	bool mttcg_enabled;
	155
	156	/*
	157	* We default to false if we know other options have been enabled
	158	* which are currently incompatible with MTTCG. Otherwise when each
	159	* guest (target) has been updated to support:
	160	* - atomic instructions
	161	* - memory ordering primitives (barriers)
	162	* they can set the appropriate CONFIG flags in ${target}-softmmu.mak
	163	*
	164	* Once a guest architecture has been converted to the new primitives
	165	* there are two remaining limitations to check.
	166	*
	167	* - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
	168	* - The host must have a stronger memory order than the guest
	169	*
	170	* It may be possible in future to support strong guests on weak hosts
	171	* but that will require tagging all load/stores in a guest with their
	172	* implicit memory order requirements which would likely slow things
	173	* down a lot.
	174	*/
	175
	176	static bool check_tcg_memory_orders_compatible(void)
	177	{
	178	#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
	179	return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
	180	#else
	181	return false;
	182	#endif
	183	}
	184
	185	static bool default_mttcg_enabled(void)
	186	{
	187	if (use_icount \|\| TCG_OVERSIZED_GUEST) {
	188	return false;
	189	} else {
	190	#ifdef TARGET_SUPPORTS_MTTCG
	191	return check_tcg_memory_orders_compatible();
	192	#else
	193	return false;
	194	#endif
	195	}
	196	}
	197
	198	void qemu_tcg_configure(QemuOpts opts, Error *errp)
	199	{
	200	const char *t = qemu_opt_get(opts, "thread");
	201	if (t) {
	202	if (strcmp(t, "multi") == 0) {
	203	if (TCG_OVERSIZED_GUEST) {
	204	error_setg(errp, "No MTTCG when guest word size > hosts");
	205	} else if (use_icount) {
	206	error_setg(errp, "No MTTCG when icount is enabled");
	207	} else {
	208	#ifndef TARGET_SUPPORTS_MTTCG
	209	error_report("Guest not yet converted to MTTCG - "
	210	"you may get unexpected results");
	211	#endif
	212	if (!check_tcg_memory_orders_compatible()) {
	213	error_report("Guest expects a stronger memory ordering "
	214	"than the host provides");
	215	error_printf("This may cause strange/hard to debug errors\n");
	216	}
	217	mttcg_enabled = true;
	218	}
	219	} else if (strcmp(t, "single") == 0) {
	220	mttcg_enabled = false;
	221	} else {
	222	error_setg(errp, "Invalid 'thread' setting %s", t);
	223	}
	224	} else {
	225	mttcg_enabled = default_mttcg_enabled();
	226	}
	227	}
	228
	229	/* The current number of executed instructions is based on what we
	230	* originally budgeted minus the current state of the decrementing
	231	* icount counters in extra/u16.low.
	232	*/
	233	static int64_t cpu_get_icount_executed(CPUState *cpu)
	234	{
	235	return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
	236	}
	237
	238	/*
	239	* Update the global shared timer_state.qemu_icount to take into
	240	* account executed instructions. This is done by the TCG vCPU
	241	* thread so the main-loop can see time has moved forward.
	242	*/
	243	void cpu_update_icount(CPUState *cpu)
	244	{
	245	int64_t executed = cpu_get_icount_executed(cpu);
	246	cpu->icount_budget -= executed;
	247
	248	#ifdef CONFIG_ATOMIC64
	249	atomic_set__nocheck(&timers_state.qemu_icount,
	250	atomic_read__nocheck(&timers_state.qemu_icount) +
	251	executed);
	252	#else /* FIXME: we need 64bit atomics to do this safely */
	253	timers_state.qemu_icount += executed;
	254	#endif
	255	}
	256
	257	int64_t cpu_get_icount_raw(void)
	258	{
	259	CPUState *cpu = current_cpu;
	260
	261	if (cpu && cpu->running) {
	262	if (!cpu->can_do_io) {
	263	error_report("Bad icount read");
	264	exit(1);
	265	}
	266	/* Take into account what has run */
	267	cpu_update_icount(cpu);
	268	}
	269	#ifdef CONFIG_ATOMIC64
	270	return atomic_read__nocheck(&timers_state.qemu_icount);
	271	#else /* FIXME: we need 64bit atomics to do this safely */
	272	return timers_state.qemu_icount;
	273	#endif
	274	}
	275
	276	/* Return the virtual CPU time, based on the instruction counter. */
	277	static int64_t cpu_get_icount_locked(void)
	278	{
	279	int64_t icount = cpu_get_icount_raw();
	280	return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
	281	}
	282
	283	int64_t cpu_get_icount(void)
	284	{
	285	int64_t icount;
	286	unsigned start;
	287
	288	do {
	289	start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
	290	icount = cpu_get_icount_locked();
	291	} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
	292
	293	return icount;
	294	}
	295
	296	int64_t cpu_icount_to_ns(int64_t icount)
	297	{
	298	return icount << icount_time_shift;
	299	}
	300
	301	/* return the time elapsed in VM between vm_start and vm_stop. Unless
	302	* icount is active, cpu_get_ticks() uses units of the host CPU cycle
	303	* counter.
	304	*
	305	* Caller must hold the BQL
	306	*/
	307	int64_t cpu_get_ticks(void)
	308	{
	309	int64_t ticks;
	310
	311	if (use_icount) {
	312	return cpu_get_icount();
	313	}
	314
	315	ticks = timers_state.cpu_ticks_offset;
	316	if (timers_state.cpu_ticks_enabled) {
	317	ticks += cpu_get_host_ticks();
	318	}
	319
	320	if (timers_state.cpu_ticks_prev > ticks) {
	321	/* Note: non increasing ticks may happen if the host uses
	322	software suspend */
	323	timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
	324	ticks = timers_state.cpu_ticks_prev;
	325	}
	326
	327	timers_state.cpu_ticks_prev = ticks;
	328	return ticks;
	329	}
	330
	331	static int64_t cpu_get_clock_locked(void)
	332	{
	333	int64_t time;
	334
	335	time = timers_state.cpu_clock_offset;
	336	if (timers_state.cpu_ticks_enabled) {
	337	time += get_clock();
	338	}
	339
	340	return time;
	341	}
	342
	343	/* Return the monotonic time elapsed in VM, i.e.,
	344	* the time between vm_start and vm_stop
	345	*/
	346	int64_t cpu_get_clock(void)
	347	{
	348	int64_t ti;
	349	unsigned start;
	350
	351	do {
	352	start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
	353	ti = cpu_get_clock_locked();
	354	} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
	355
	356	return ti;
	357	}
	358
	359	/* enable cpu_get_ticks()
	360	* Caller must hold BQL which serves as mutex for vm_clock_seqlock.
	361	*/
	362	void cpu_enable_ticks(void)
	363	{
	364	/* Here, the really thing protected by seqlock is cpu_clock_offset. */
	365	seqlock_write_begin(&timers_state.vm_clock_seqlock);
	366	if (!timers_state.cpu_ticks_enabled) {
	367	timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
	368	timers_state.cpu_clock_offset -= get_clock();
	369	timers_state.cpu_ticks_enabled = 1;
	370	}
	371	seqlock_write_end(&timers_state.vm_clock_seqlock);
	372	}
	373
	374	/* disable cpu_get_ticks() : the clock is stopped. You must not call
	375	* cpu_get_ticks() after that.
	376	* Caller must hold BQL which serves as mutex for vm_clock_seqlock.
	377	*/
	378	void cpu_disable_ticks(void)
	379	{
	380	/* Here, the really thing protected by seqlock is cpu_clock_offset. */
	381	seqlock_write_begin(&timers_state.vm_clock_seqlock);
	382	if (timers_state.cpu_ticks_enabled) {
	383	timers_state.cpu_ticks_offset += cpu_get_host_ticks();
	384	timers_state.cpu_clock_offset = cpu_get_clock_locked();
	385	timers_state.cpu_ticks_enabled = 0;
	386	}
	387	seqlock_write_end(&timers_state.vm_clock_seqlock);
	388	}
	389
	390	/* Correlation between real and virtual time is always going to be
	391	fairly approximate, so ignore small variation.
	392	When the guest is idle real and virtual time will be aligned in
	393	the IO wait loop. */
	394	#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
	395
	396	static void icount_adjust(void)
	397	{
	398	int64_t cur_time;
	399	int64_t cur_icount;
	400	int64_t delta;
	401
	402	/* Protected by TimersState mutex. */
	403	static int64_t last_delta;
	404
	405	/* If the VM is not running, then do nothing. */
	406	if (!runstate_is_running()) {
	407	return;
	408	}
	409
	410	seqlock_write_begin(&timers_state.vm_clock_seqlock);
	411	cur_time = cpu_get_clock_locked();
	412	cur_icount = cpu_get_icount_locked();
	413
	414	delta = cur_icount - cur_time;
	415	/* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
	416	if (delta > 0
	417	&& last_delta + ICOUNT_WOBBLE < delta * 2
	418	&& icount_time_shift > 0) {
	419	/* The guest is getting too far ahead. Slow time down. */
	420	icount_time_shift--;
	421	}
	422	if (delta < 0
	423	&& last_delta - ICOUNT_WOBBLE > delta * 2
	424	&& icount_time_shift < MAX_ICOUNT_SHIFT) {
	425	/* The guest is getting too far behind. Speed time up. */
	426	icount_time_shift++;
	427	}
	428	last_delta = delta;
	429	timers_state.qemu_icount_bias = cur_icount
	430	- (timers_state.qemu_icount << icount_time_shift);
	431	seqlock_write_end(&timers_state.vm_clock_seqlock);
	432	}
	433
	434	static void icount_adjust_rt(void *opaque)
	435	{
	436	timer_mod(timers_state.icount_rt_timer,
	437	qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
	438	icount_adjust();
	439	}
	440
	441	static void icount_adjust_vm(void *opaque)
	442	{
	443	timer_mod(timers_state.icount_vm_timer,
	444	qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
	445	NANOSECONDS_PER_SECOND / 10);
	446	icount_adjust();
	447	}
	448
	449	static int64_t qemu_icount_round(int64_t count)
	450	{
	451	return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
	452	}
	453
	454	static void icount_warp_rt(void)
	455	{
	456	unsigned seq;
	457	int64_t warp_start;
	458
	459	/* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
	460	* changes from -1 to another value, so the race here is okay.
	461	*/
	462	do {
	463	seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
	464	warp_start = timers_state.vm_clock_warp_start;
	465	} while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
	466
	467	if (warp_start == -1) {
	468	return;
	469	}
	470
	471	seqlock_write_begin(&timers_state.vm_clock_seqlock);
	472	if (runstate_is_running()) {
	473	int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
	474	cpu_get_clock_locked());
	475	int64_t warp_delta;
	476
	477	warp_delta = clock - timers_state.vm_clock_warp_start;
	478	if (use_icount == 2) {
	479	/*
	480	* In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
	481	* far ahead of real time.
	482	*/
	483	int64_t cur_icount = cpu_get_icount_locked();
	484	int64_t delta = clock - cur_icount;
	485	warp_delta = MIN(warp_delta, delta);
	486	}
	487	timers_state.qemu_icount_bias += warp_delta;
	488	}
	489	timers_state.vm_clock_warp_start = -1;
	490	seqlock_write_end(&timers_state.vm_clock_seqlock);
	491
	492	if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
	493	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
	494	}
	495	}
	496
	497	static void icount_timer_cb(void *opaque)
	498	{
	499	/* No need for a checkpoint because the timer already synchronizes
	500	* with CHECKPOINT_CLOCK_VIRTUAL_RT.
	501	*/
	502	icount_warp_rt();
	503	}
	504
	505	void qtest_clock_warp(int64_t dest)
	506	{
	507	int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
	508	AioContext *aio_context;
	509	assert(qtest_enabled());
	510	aio_context = qemu_get_aio_context();
	511	while (clock < dest) {
	512	int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
	513	int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
	514
	515	seqlock_write_begin(&timers_state.vm_clock_seqlock);
	516	timers_state.qemu_icount_bias += warp;
	517	seqlock_write_end(&timers_state.vm_clock_seqlock);
	518
	519	qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
	520	timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
	521	clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
	522	}
	523	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
	524	}
	525
	526	void qemu_start_warp_timer(void)
	527	{
	528	int64_t clock;
	529	int64_t deadline;
	530
	531	if (!use_icount) {
	532	return;
	533	}
	534
	535	/* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
	536	* do not fire, so computing the deadline does not make sense.
	537	*/
	538	if (!runstate_is_running()) {
	539	return;
	540	}
	541
	542	/* warp clock deterministically in record/replay mode */
	543	if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
	544	return;
	545	}
	546
	547	if (!all_cpu_threads_idle()) {
	548	return;
	549	}
	550
	551	if (qtest_enabled()) {
	552	/* When testing, qtest commands advance icount. */
	553	return;
	554	}
	555
	556	/* We want to use the earliest deadline from ALL vm_clocks */
	557	clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
	558	deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
	559	if (deadline < 0) {
	560	static bool notified;
	561	if (!icount_sleep && !notified) {
	562	warn_report("icount sleep disabled and no active timers");
	563	notified = true;
	564	}
	565	return;
	566	}
	567
	568	if (deadline > 0) {
	569	/*
	570	* Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
	571	* sleep. Otherwise, the CPU might be waiting for a future timer
	572	* interrupt to wake it up, but the interrupt never comes because
	573	* the vCPU isn't running any insns and thus doesn't advance the
	574	* QEMU_CLOCK_VIRTUAL.
	575	*/
	576	if (!icount_sleep) {
	577	/*
	578	* We never let VCPUs sleep in no sleep icount mode.
	579	* If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
	580	* to the next QEMU_CLOCK_VIRTUAL event and notify it.
	581	* It is useful when we want a deterministic execution time,
	582	* isolated from host latencies.
	583	*/
	584	seqlock_write_begin(&timers_state.vm_clock_seqlock);
	585	timers_state.qemu_icount_bias += deadline;
	586	seqlock_write_end(&timers_state.vm_clock_seqlock);
	587	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
	588	} else {
	589	/*
	590	* We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
	591	* "real" time, (related to the time left until the next event) has
	592	* passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
	593	* This avoids that the warps are visible externally; for example,
	594	* you will not be sending network packets continuously instead of
	595	* every 100ms.
	596	*/
	597	seqlock_write_begin(&timers_state.vm_clock_seqlock);
	598	if (timers_state.vm_clock_warp_start == -1
	599	\|\| timers_state.vm_clock_warp_start > clock) {
	600	timers_state.vm_clock_warp_start = clock;
	601	}
	602	seqlock_write_end(&timers_state.vm_clock_seqlock);
	603	timer_mod_anticipate(timers_state.icount_warp_timer,
	604	clock + deadline);
	605	}
	606	} else if (deadline == 0) {
	607	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
	608	}
	609	}
	610
	611	static void qemu_account_warp_timer(void)
	612	{
	613	if (!use_icount \|\| !icount_sleep) {
	614	return;
	615	}
	616
	617	/* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
	618	* do not fire, so computing the deadline does not make sense.
	619	*/
	620	if (!runstate_is_running()) {
	621	return;
	622	}
	623
	624	/* warp clock deterministically in record/replay mode */
	625	if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
	626	return;
	627	}
	628
	629	timer_del(timers_state.icount_warp_timer);
	630	icount_warp_rt();
	631	}
	632
	633	static bool icount_state_needed(void *opaque)
	634	{
	635	return use_icount;
	636	}
	637
	638	static bool warp_timer_state_needed(void *opaque)
	639	{
	640	TimersState *s = opaque;
	641	return s->icount_warp_timer != NULL;
	642	}
	643
	644	static bool adjust_timers_state_needed(void *opaque)
	645	{
	646	TimersState *s = opaque;
	647	return s->icount_rt_timer != NULL;
	648	}
	649
	650	/*
	651	* Subsection for warp timer migration is optional, because may not be created
	652	*/
	653	static const VMStateDescription icount_vmstate_warp_timer = {
	654	.name = "timer/icount/warp_timer",
	655	.version_id = 1,
	656	.minimum_version_id = 1,
	657	.needed = warp_timer_state_needed,
	658	.fields = (VMStateField[]) {
	659	VMSTATE_INT64(vm_clock_warp_start, TimersState),
	660	VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
	661	VMSTATE_END_OF_LIST()
	662	}
	663	};
	664
	665	static const VMStateDescription icount_vmstate_adjust_timers = {
	666	.name = "timer/icount/timers",
	667	.version_id = 1,
	668	.minimum_version_id = 1,
	669	.needed = adjust_timers_state_needed,
	670	.fields = (VMStateField[]) {
	671	VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
	672	VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
	673	VMSTATE_END_OF_LIST()
	674	}
	675	};
	676
	677	/*
	678	* This is a subsection for icount migration.
	679	*/
	680	static const VMStateDescription icount_vmstate_timers = {
	681	.name = "timer/icount",
	682	.version_id = 1,
	683	.minimum_version_id = 1,
	684	.needed = icount_state_needed,
	685	.fields = (VMStateField[]) {
	686	VMSTATE_INT64(qemu_icount_bias, TimersState),
	687	VMSTATE_INT64(qemu_icount, TimersState),
	688	VMSTATE_END_OF_LIST()
	689	},
	690	.subsections = (const VMStateDescription*[]) {
	691	&icount_vmstate_warp_timer,
	692	&icount_vmstate_adjust_timers,
	693	NULL
	694	}
	695	};
	696
	697	static const VMStateDescription vmstate_timers = {
	698	.name = "timer",
	699	.version_id = 2,
	700	.minimum_version_id = 1,
	701	.fields = (VMStateField[]) {
	702	VMSTATE_INT64(cpu_ticks_offset, TimersState),
	703	VMSTATE_INT64(dummy, TimersState),
	704	VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
	705	VMSTATE_END_OF_LIST()
	706	},
	707	.subsections = (const VMStateDescription*[]) {
	708	&icount_vmstate_timers,
	709	NULL
	710	}
	711	};
	712
	713	static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
	714	{
	715	double pct;
	716	double throttle_ratio;
	717	long sleeptime_ns;
	718
	719	if (!cpu_throttle_get_percentage()) {
	720	return;
	721	}
	722
	723	pct = (double)cpu_throttle_get_percentage()/100;
	724	throttle_ratio = pct / (1 - pct);
	725	sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
	726
	727	qemu_mutex_unlock_iothread();
	728	g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
	729	qemu_mutex_lock_iothread();
	730	atomic_set(&cpu->throttle_thread_scheduled, 0);
	731	}
	732
	733	static void cpu_throttle_timer_tick(void *opaque)
	734	{
	735	CPUState *cpu;
	736	double pct;
	737
	738	/* Stop the timer if needed */
	739	if (!cpu_throttle_get_percentage()) {
	740	return;
	741	}
	742	CPU_FOREACH(cpu) {
	743	if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
	744	async_run_on_cpu(cpu, cpu_throttle_thread,
	745	RUN_ON_CPU_NULL);
	746	}
	747	}
	748
	749	pct = (double)cpu_throttle_get_percentage()/100;
	750	timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
	751	CPU_THROTTLE_TIMESLICE_NS / (1-pct));
	752	}
	753
	754	void cpu_throttle_set(int new_throttle_pct)
	755	{
	756	/* Ensure throttle percentage is within valid range */
	757	new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
	758	new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
	759
	760	atomic_set(&throttle_percentage, new_throttle_pct);
	761
	762	timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
	763	CPU_THROTTLE_TIMESLICE_NS);
	764	}
	765
	766	void cpu_throttle_stop(void)
	767	{
	768	atomic_set(&throttle_percentage, 0);
	769	}
	770
	771	bool cpu_throttle_active(void)
	772	{
	773	return (cpu_throttle_get_percentage() != 0);
	774	}
	775
	776	int cpu_throttle_get_percentage(void)
	777	{
	778	return atomic_read(&throttle_percentage);
	779	}
	780
	781	void cpu_ticks_init(void)
	782	{
	783	seqlock_init(&timers_state.vm_clock_seqlock);
	784	vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
	785	throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
	786	cpu_throttle_timer_tick, NULL);
	787	}
	788
	789	void configure_icount(QemuOpts opts, Error *errp)
	790	{
	791	const char *option;
	792	char *rem_str = NULL;
	793
	794	option = qemu_opt_get(opts, "shift");
	795	if (!option) {
	796	if (qemu_opt_get(opts, "align") != NULL) {
	797	error_setg(errp, "Please specify shift option when using align");
	798	}
	799	return;
	800	}
	801
	802	icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
	803	if (icount_sleep) {
	804	timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
	805	icount_timer_cb, NULL);
	806	}
	807
	808	icount_align_option = qemu_opt_get_bool(opts, "align", false);
	809
	810	if (icount_align_option && !icount_sleep) {
	811	error_setg(errp, "align=on and sleep=off are incompatible");
	812	}
	813	if (strcmp(option, "auto") != 0) {
	814	errno = 0;
	815	icount_time_shift = strtol(option, &rem_str, 0);
	816	if (errno != 0 \|\| *rem_str != '\0' \|\| !strlen(option)) {
	817	error_setg(errp, "icount: Invalid shift value");
	818	}
	819	use_icount = 1;
	820	return;
	821	} else if (icount_align_option) {
	822	error_setg(errp, "shift=auto and align=on are incompatible");
	823	} else if (!icount_sleep) {
	824	error_setg(errp, "shift=auto and sleep=off are incompatible");
	825	}
	826
	827	use_icount = 2;
	828
	829	/* 125MIPS seems a reasonable initial guess at the guest speed.
	830	It will be corrected fairly quickly anyway. */
	831	icount_time_shift = 3;
	832
	833	/* Have both realtime and virtual time triggers for speed adjustment.
	834	The realtime trigger catches emulated time passing too slowly,
	835	the virtual time trigger catches emulated time passing too fast.
	836	Realtime triggers occur even when idle, so use them less frequently
	837	than VM triggers. */
	838	timers_state.vm_clock_warp_start = -1;
	839	timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
	840	icount_adjust_rt, NULL);
	841	timer_mod(timers_state.icount_rt_timer,
	842	qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
	843	timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
	844	icount_adjust_vm, NULL);
	845	timer_mod(timers_state.icount_vm_timer,
	846	qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
	847	NANOSECONDS_PER_SECOND / 10);
	848	}
	849
	850	/***********************************************************/
	851	/* TCG vCPU kick timer
	852	*
	853	* The kick timer is responsible for moving single threaded vCPU
	854	* emulation on to the next vCPU. If more than one vCPU is running a
	855	* timer event with force a cpu->exit so the next vCPU can get
	856	* scheduled.
	857	*
	858	* The timer is removed if all vCPUs are idle and restarted again once
	859	* idleness is complete.
	860	*/
	861
	862	static QEMUTimer *tcg_kick_vcpu_timer;
	863	static CPUState *tcg_current_rr_cpu;
	864
	865	#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
	866
	867	static inline int64_t qemu_tcg_next_kick(void)
	868	{
	869	return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
	870	}
	871
	872	/* Kick the currently round-robin scheduled vCPU */
	873	static void qemu_cpu_kick_rr_cpu(void)
	874	{
	875	CPUState *cpu;
	876	do {
	877	cpu = atomic_mb_read(&tcg_current_rr_cpu);
	878	if (cpu) {
	879	cpu_exit(cpu);
	880	}
	881	} while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
	882	}
	883
	884	static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
	885	{
	886	}
	887
	888	void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
	889	{
	890	if (!use_icount \|\| type != QEMU_CLOCK_VIRTUAL) {
	891	qemu_notify_event();
	892	return;
	893	}
	894
	895	if (!qemu_in_vcpu_thread() && first_cpu) {
	896	/* qemu_cpu_kick is not enough to kick a halted CPU out of
	897	* qemu_tcg_wait_io_event. async_run_on_cpu, instead,
	898	* causes cpu_thread_is_idle to return false. This way,
	899	* handle_icount_deadline can run.
	900	*/
	901	async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
	902	}
	903	}
	904
	905	static void kick_tcg_thread(void *opaque)
	906	{
	907	timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
	908	qemu_cpu_kick_rr_cpu();
	909	}
	910
	911	static void start_tcg_kick_timer(void)
	912	{
	913	assert(!mttcg_enabled);
	914	if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
	915	tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
	916	kick_tcg_thread, NULL);
	917	timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
	918	}
	919	}
	920
	921	static void stop_tcg_kick_timer(void)
	922	{
	923	assert(!mttcg_enabled);
	924	if (tcg_kick_vcpu_timer) {
	925	timer_del(tcg_kick_vcpu_timer);
	926	tcg_kick_vcpu_timer = NULL;
	927	}
	928	}
	929
	930	/***********************************************************/
	931	void hw_error(const char *fmt, ...)
	932	{
	933	va_list ap;
	934	CPUState *cpu;
	935
	936	va_start(ap, fmt);
	937	fprintf(stderr, "qemu: hardware error: ");
	938	vfprintf(stderr, fmt, ap);
	939	fprintf(stderr, "\n");
	940	CPU_FOREACH(cpu) {
	941	fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
	942	cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
	943	}
	944	va_end(ap);
	945	abort();
	946	}
	947
	948	void cpu_synchronize_all_states(void)
	949	{
	950	CPUState *cpu;
	951
	952	CPU_FOREACH(cpu) {
	953	cpu_synchronize_state(cpu);
	954	/* TODO: move to cpu_synchronize_state() */
	955	if (hvf_enabled()) {
	956	hvf_cpu_synchronize_state(cpu);
	957	}
	958	}
	959	}
	960
	961	void cpu_synchronize_all_post_reset(void)
	962	{
	963	CPUState *cpu;
	964
	965	CPU_FOREACH(cpu) {
	966	cpu_synchronize_post_reset(cpu);
	967	/* TODO: move to cpu_synchronize_post_reset() */
	968	if (hvf_enabled()) {
	969	hvf_cpu_synchronize_post_reset(cpu);
	970	}
	971	}
	972	}
	973
	974	void cpu_synchronize_all_post_init(void)
	975	{
	976	CPUState *cpu;
	977
	978	CPU_FOREACH(cpu) {
	979	cpu_synchronize_post_init(cpu);
	980	/* TODO: move to cpu_synchronize_post_init() */
	981	if (hvf_enabled()) {
	982	hvf_cpu_synchronize_post_init(cpu);
	983	}
	984	}
	985	}
	986
	987	void cpu_synchronize_all_pre_loadvm(void)
	988	{
	989	CPUState *cpu;
	990
	991	CPU_FOREACH(cpu) {
	992	cpu_synchronize_pre_loadvm(cpu);
	993	}
	994	}
	995
	996	static int do_vm_stop(RunState state)
	997	{
	998	int ret = 0;
	999
	1000	if (runstate_is_running()) {
	1001	cpu_disable_ticks();
	1002	pause_all_vcpus();
	1003	runstate_set(state);
	1004	vm_state_notify(0, state);
	1005	qapi_event_send_stop(&error_abort);
	1006	}
	1007
	1008	bdrv_drain_all();
	1009	replay_disable_events();
	1010	ret = bdrv_flush_all();
	1011
	1012	return ret;
	1013	}
	1014
	1015	static bool cpu_can_run(CPUState *cpu)
	1016	{
	1017	if (cpu->stop) {
	1018	return false;
	1019	}
	1020	if (cpu_is_stopped(cpu)) {
	1021	return false;
	1022	}
	1023	return true;
	1024	}
	1025
	1026	static void cpu_handle_guest_debug(CPUState *cpu)
	1027	{
	1028	gdb_set_stop_cpu(cpu);
	1029	qemu_system_debug_request();
	1030	cpu->stopped = true;
	1031	}
	1032
	1033	#ifdef CONFIG_LINUX
	1034	static void sigbus_reraise(void)
	1035	{
	1036	sigset_t set;
	1037	struct sigaction action;
	1038
	1039	memset(&action, 0, sizeof(action));
	1040	action.sa_handler = SIG_DFL;
	1041	if (!sigaction(SIGBUS, &action, NULL)) {
	1042	raise(SIGBUS);
	1043	sigemptyset(&set);
	1044	sigaddset(&set, SIGBUS);
	1045	pthread_sigmask(SIG_UNBLOCK, &set, NULL);
	1046	}
	1047	perror("Failed to re-raise SIGBUS!\n");
	1048	abort();
	1049	}
	1050
	1051	static void sigbus_handler(int n, siginfo_t siginfo, void ctx)
	1052	{
	1053	if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
	1054	sigbus_reraise();
	1055	}
	1056
	1057	if (current_cpu) {
	1058	/* Called asynchronously in VCPU thread. */
	1059	if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
	1060	sigbus_reraise();
	1061	}
	1062	} else {
	1063	/* Called synchronously (via signalfd) in main thread. */
	1064	if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
	1065	sigbus_reraise();
	1066	}
	1067	}
	1068	}
	1069
	1070	static void qemu_init_sigbus(void)
	1071	{
	1072	struct sigaction action;
	1073
	1074	memset(&action, 0, sizeof(action));
	1075	action.sa_flags = SA_SIGINFO;
	1076	action.sa_sigaction = sigbus_handler;
	1077	sigaction(SIGBUS, &action, NULL);
	1078
	1079	prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
	1080	}
	1081	#else /* !CONFIG_LINUX */
	1082	static void qemu_init_sigbus(void)
	1083	{
	1084	}
	1085	#endif /* !CONFIG_LINUX */
	1086
	1087	static QemuMutex qemu_global_mutex;
	1088
	1089	static QemuThread io_thread;
	1090
	1091	/* cpu creation */
	1092	static QemuCond qemu_cpu_cond;
	1093	/* system init */
	1094	static QemuCond qemu_pause_cond;
	1095
	1096	void qemu_init_cpu_loop(void)
	1097	{
	1098	qemu_init_sigbus();
	1099	qemu_cond_init(&qemu_cpu_cond);
	1100	qemu_cond_init(&qemu_pause_cond);
	1101	qemu_mutex_init(&qemu_global_mutex);
	1102
	1103	qemu_thread_get_self(&io_thread);
	1104	}
	1105
	1106	void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
	1107	{
	1108	do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
	1109	}
	1110
	1111	static void qemu_kvm_destroy_vcpu(CPUState *cpu)
	1112	{
	1113	if (kvm_destroy_vcpu(cpu) < 0) {
	1114	error_report("kvm_destroy_vcpu failed");
	1115	exit(EXIT_FAILURE);
	1116	}
	1117	}
	1118
	1119	static void qemu_tcg_destroy_vcpu(CPUState *cpu)
	1120	{
	1121	}
	1122
	1123	static void qemu_cpu_stop(CPUState *cpu, bool exit)
	1124	{
	1125	g_assert(qemu_cpu_is_self(cpu));
	1126	cpu->stop = false;
	1127	cpu->stopped = true;
	1128	if (exit) {
	1129	cpu_exit(cpu);
	1130	}
	1131	qemu_cond_broadcast(&qemu_pause_cond);
	1132	}
	1133
	1134	static void qemu_wait_io_event_common(CPUState *cpu)
	1135	{
	1136	atomic_mb_set(&cpu->thread_kicked, false);
	1137	if (cpu->stop) {
	1138	qemu_cpu_stop(cpu, false);
	1139	}
	1140	process_queued_cpu_work(cpu);
	1141	}
	1142
	1143	static void qemu_tcg_rr_wait_io_event(CPUState *cpu)
	1144	{
	1145	while (all_cpu_threads_idle()) {
	1146	stop_tcg_kick_timer();
	1147	qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
	1148	}
	1149
	1150	start_tcg_kick_timer();
	1151
	1152	qemu_wait_io_event_common(cpu);
	1153	}
	1154
	1155	static void qemu_wait_io_event(CPUState *cpu)
	1156	{
	1157	while (cpu_thread_is_idle(cpu)) {
	1158	qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
	1159	}
	1160
	1161	#ifdef _WIN32
	1162	/* Eat dummy APC queued by qemu_cpu_kick_thread. */
	1163	if (!tcg_enabled()) {
	1164	SleepEx(0, TRUE);
	1165	}
	1166	#endif
	1167	qemu_wait_io_event_common(cpu);
	1168	}
	1169
	1170	static void qemu_kvm_cpu_thread_fn(void arg)
	1171	{
	1172	CPUState *cpu = arg;
	1173	int r;
	1174
	1175	rcu_register_thread();
	1176
	1177	qemu_mutex_lock_iothread();
	1178	qemu_thread_get_self(cpu->thread);
	1179	cpu->thread_id = qemu_get_thread_id();
	1180	cpu->can_do_io = 1;
	1181	current_cpu = cpu;
	1182
	1183	r = kvm_init_vcpu(cpu);
	1184	if (r < 0) {
	1185	error_report("kvm_init_vcpu failed: %s", strerror(-r));
	1186	exit(1);
	1187	}
	1188
	1189	kvm_init_cpu_signals(cpu);
	1190
	1191	/* signal CPU creation */
	1192	cpu->created = true;
	1193	qemu_cond_signal(&qemu_cpu_cond);
	1194
	1195	do {
	1196	if (cpu_can_run(cpu)) {
	1197	r = kvm_cpu_exec(cpu);
	1198	if (r == EXCP_DEBUG) {
	1199	cpu_handle_guest_debug(cpu);
	1200	}
	1201	}
	1202	qemu_wait_io_event(cpu);
	1203	} while (!cpu->unplug \|\| cpu_can_run(cpu));
	1204
	1205	qemu_kvm_destroy_vcpu(cpu);
	1206	cpu->created = false;
	1207	qemu_cond_signal(&qemu_cpu_cond);
	1208	qemu_mutex_unlock_iothread();
	1209	rcu_unregister_thread();
	1210	return NULL;
	1211	}
	1212
	1213	static void qemu_dummy_cpu_thread_fn(void arg)
	1214	{
	1215	#ifdef _WIN32
	1216	error_report("qtest is not supported under Windows");
	1217	exit(1);
	1218	#else
	1219	CPUState *cpu = arg;
	1220	sigset_t waitset;
	1221	int r;
	1222
	1223	rcu_register_thread();
	1224
	1225	qemu_mutex_lock_iothread();
	1226	qemu_thread_get_self(cpu->thread);
	1227	cpu->thread_id = qemu_get_thread_id();
	1228	cpu->can_do_io = 1;
	1229	current_cpu = cpu;
	1230
	1231	sigemptyset(&waitset);
	1232	sigaddset(&waitset, SIG_IPI);
	1233
	1234	/* signal CPU creation */
	1235	cpu->created = true;
	1236	qemu_cond_signal(&qemu_cpu_cond);
	1237
	1238	do {
	1239	qemu_mutex_unlock_iothread();
	1240	do {
	1241	int sig;
	1242	r = sigwait(&waitset, &sig);
	1243	} while (r == -1 && (errno == EAGAIN \|\| errno == EINTR));
	1244	if (r == -1) {
	1245	perror("sigwait");
	1246	exit(1);
	1247	}
	1248	qemu_mutex_lock_iothread();
	1249	qemu_wait_io_event(cpu);
	1250	} while (!cpu->unplug);
	1251
	1252	rcu_unregister_thread();
	1253	return NULL;
	1254	#endif
	1255	}
	1256
	1257	static int64_t tcg_get_icount_limit(void)
	1258	{
	1259	int64_t deadline;
	1260
	1261	if (replay_mode != REPLAY_MODE_PLAY) {
	1262	deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
	1263
	1264	/* Maintain prior (possibly buggy) behaviour where if no deadline
	1265	* was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
	1266	* INT32_MAX nanoseconds ahead, we still use INT32_MAX
	1267	* nanoseconds.
	1268	*/
	1269	if ((deadline < 0) \|\| (deadline > INT32_MAX)) {
	1270	deadline = INT32_MAX;
	1271	}
	1272
	1273	return qemu_icount_round(deadline);
	1274	} else {
	1275	return replay_get_instructions();
	1276	}
	1277	}
	1278
	1279	static void handle_icount_deadline(void)
	1280	{
	1281	assert(qemu_in_vcpu_thread());
	1282	if (use_icount) {
	1283	int64_t deadline =
	1284	qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
	1285
	1286	if (deadline == 0) {
	1287	/* Wake up other AioContexts. */
	1288	qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
	1289	qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
	1290	}
	1291	}
	1292	}
	1293
	1294	static void prepare_icount_for_run(CPUState *cpu)
	1295	{
	1296	if (use_icount) {
	1297	int insns_left;
	1298
	1299	/* These should always be cleared by process_icount_data after
	1300	* each vCPU execution. However u16.high can be raised
	1301	* asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
	1302	*/
	1303	g_assert(cpu->icount_decr.u16.low == 0);
	1304	g_assert(cpu->icount_extra == 0);
	1305
	1306	cpu->icount_budget = tcg_get_icount_limit();
	1307	insns_left = MIN(0xffff, cpu->icount_budget);
	1308	cpu->icount_decr.u16.low = insns_left;
	1309	cpu->icount_extra = cpu->icount_budget - insns_left;
	1310	}
	1311	}
	1312
	1313	static void process_icount_data(CPUState *cpu)
	1314	{
	1315	if (use_icount) {
	1316	/* Account for executed instructions */
	1317	cpu_update_icount(cpu);
	1318
	1319	/* Reset the counters */
	1320	cpu->icount_decr.u16.low = 0;
	1321	cpu->icount_extra = 0;
	1322	cpu->icount_budget = 0;
	1323
	1324	replay_account_executed_instructions();
	1325	}
	1326	}
	1327
	1328
	1329	static int tcg_cpu_exec(CPUState *cpu)
	1330	{
	1331	int ret;
	1332	#ifdef CONFIG_PROFILER
	1333	int64_t ti;
	1334	#endif
	1335
	1336	#ifdef CONFIG_PROFILER
	1337	ti = profile_getclock();
	1338	#endif
	1339	qemu_mutex_unlock_iothread();
	1340	cpu_exec_start(cpu);
	1341	ret = cpu_exec(cpu);
	1342	cpu_exec_end(cpu);
	1343	qemu_mutex_lock_iothread();
	1344	#ifdef CONFIG_PROFILER
	1345	tcg_time += profile_getclock() - ti;
	1346	#endif
	1347	return ret;
	1348	}
	1349
	1350	/* Destroy any remaining vCPUs which have been unplugged and have
	1351	* finished running
	1352	*/
	1353	static void deal_with_unplugged_cpus(void)
	1354	{
	1355	CPUState *cpu;
	1356
	1357	CPU_FOREACH(cpu) {
	1358	if (cpu->unplug && !cpu_can_run(cpu)) {
	1359	qemu_tcg_destroy_vcpu(cpu);
	1360	cpu->created = false;
	1361	qemu_cond_signal(&qemu_cpu_cond);
	1362	break;
	1363	}
	1364	}
	1365	}
	1366
	1367	/* Single-threaded TCG
	1368	*
	1369	* In the single-threaded case each vCPU is simulated in turn. If
	1370	* there is more than a single vCPU we create a simple timer to kick
	1371	* the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
	1372	* This is done explicitly rather than relying on side-effects
	1373	* elsewhere.
	1374	*/
	1375
	1376	static void qemu_tcg_rr_cpu_thread_fn(void arg)
	1377	{
	1378	CPUState *cpu = arg;
	1379
	1380	rcu_register_thread();
	1381	tcg_register_thread();
	1382
	1383	qemu_mutex_lock_iothread();
	1384	qemu_thread_get_self(cpu->thread);
	1385
	1386	CPU_FOREACH(cpu) {
	1387	cpu->thread_id = qemu_get_thread_id();
	1388	cpu->created = true;
	1389	cpu->can_do_io = 1;
	1390	}
	1391	qemu_cond_signal(&qemu_cpu_cond);
	1392
	1393	/* wait for initial kick-off after machine start */
	1394	while (first_cpu->stopped) {
	1395	qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
	1396
	1397	/* process any pending work */
	1398	CPU_FOREACH(cpu) {
	1399	current_cpu = cpu;
	1400	qemu_wait_io_event_common(cpu);
	1401	}
	1402	}
	1403
	1404	start_tcg_kick_timer();
	1405
	1406	cpu = first_cpu;
	1407
	1408	/* process any pending work */
	1409	cpu->exit_request = 1;
	1410
	1411	while (1) {
	1412	/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
	1413	qemu_account_warp_timer();
	1414
	1415	/* Run the timers here. This is much more efficient than
	1416	* waking up the I/O thread and waiting for completion.
	1417	*/
	1418	handle_icount_deadline();
	1419
	1420	if (!cpu) {
	1421	cpu = first_cpu;
	1422	}
	1423
	1424	while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
	1425
	1426	atomic_mb_set(&tcg_current_rr_cpu, cpu);
	1427	current_cpu = cpu;
	1428
	1429	qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
	1430	(cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
	1431
	1432	if (cpu_can_run(cpu)) {
	1433	int r;
	1434
	1435	prepare_icount_for_run(cpu);
	1436
	1437	r = tcg_cpu_exec(cpu);
	1438
	1439	process_icount_data(cpu);
	1440
	1441	if (r == EXCP_DEBUG) {
	1442	cpu_handle_guest_debug(cpu);
	1443	break;
	1444	} else if (r == EXCP_ATOMIC) {
	1445	qemu_mutex_unlock_iothread();
	1446	cpu_exec_step_atomic(cpu);
	1447	qemu_mutex_lock_iothread();
	1448	break;
	1449	}
	1450	} else if (cpu->stop) {
	1451	if (cpu->unplug) {
	1452	cpu = CPU_NEXT(cpu);
	1453	}
	1454	break;
	1455	}
	1456
	1457	cpu = CPU_NEXT(cpu);
	1458	} /* while (cpu && !cpu->exit_request).. */
	1459
	1460	/* Does not need atomic_mb_set because a spurious wakeup is okay. */
	1461	atomic_set(&tcg_current_rr_cpu, NULL);
	1462
	1463	if (cpu && cpu->exit_request) {
	1464	atomic_mb_set(&cpu->exit_request, 0);
	1465	}
	1466
	1467	qemu_tcg_rr_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
	1468	deal_with_unplugged_cpus();
	1469	}
	1470
	1471	rcu_unregister_thread();
	1472	return NULL;
	1473	}
	1474
	1475	static void qemu_hax_cpu_thread_fn(void arg)
	1476	{
	1477	CPUState *cpu = arg;
	1478	int r;
	1479
	1480	rcu_register_thread();
	1481	qemu_mutex_lock_iothread();
	1482	qemu_thread_get_self(cpu->thread);
	1483
	1484	cpu->thread_id = qemu_get_thread_id();
	1485	cpu->created = true;
	1486	cpu->halted = 0;
	1487	current_cpu = cpu;
	1488
	1489	hax_init_vcpu(cpu);
	1490	qemu_cond_signal(&qemu_cpu_cond);
	1491
	1492	do {
	1493	if (cpu_can_run(cpu)) {
	1494	r = hax_smp_cpu_exec(cpu);
	1495	if (r == EXCP_DEBUG) {
	1496	cpu_handle_guest_debug(cpu);
	1497	}
	1498	}
	1499
	1500	qemu_wait_io_event(cpu);
	1501	} while (!cpu->unplug \|\| cpu_can_run(cpu));
	1502	rcu_unregister_thread();
	1503	return NULL;
	1504	}
	1505
	1506	/* The HVF-specific vCPU thread function. This one should only run when the host
	1507	* CPU supports the VMX "unrestricted guest" feature. */
	1508	static void qemu_hvf_cpu_thread_fn(void arg)
	1509	{
	1510	CPUState *cpu = arg;
	1511
	1512	int r;
	1513
	1514	assert(hvf_enabled());
	1515
	1516	rcu_register_thread();
	1517
	1518	qemu_mutex_lock_iothread();
	1519	qemu_thread_get_self(cpu->thread);
	1520
	1521	cpu->thread_id = qemu_get_thread_id();
	1522	cpu->can_do_io = 1;
	1523	current_cpu = cpu;
	1524
	1525	hvf_init_vcpu(cpu);
	1526
	1527	/* signal CPU creation */
	1528	cpu->created = true;
	1529	qemu_cond_signal(&qemu_cpu_cond);
	1530
	1531	do {
	1532	if (cpu_can_run(cpu)) {
	1533	r = hvf_vcpu_exec(cpu);
	1534	if (r == EXCP_DEBUG) {
	1535	cpu_handle_guest_debug(cpu);
	1536	}
	1537	}
	1538	qemu_wait_io_event(cpu);
	1539	} while (!cpu->unplug \|\| cpu_can_run(cpu));
	1540
	1541	hvf_vcpu_destroy(cpu);
	1542	cpu->created = false;
	1543	qemu_cond_signal(&qemu_cpu_cond);
	1544	qemu_mutex_unlock_iothread();
	1545	rcu_unregister_thread();
	1546	return NULL;
	1547	}
	1548
	1549	static void qemu_whpx_cpu_thread_fn(void arg)
	1550	{
	1551	CPUState *cpu = arg;
	1552	int r;
	1553
	1554	rcu_register_thread();
	1555
	1556	qemu_mutex_lock_iothread();
	1557	qemu_thread_get_self(cpu->thread);
	1558	cpu->thread_id = qemu_get_thread_id();
	1559	current_cpu = cpu;
	1560
	1561	r = whpx_init_vcpu(cpu);
	1562	if (r < 0) {
	1563	fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
	1564	exit(1);
	1565	}
	1566
	1567	/* signal CPU creation */
	1568	cpu->created = true;
	1569	qemu_cond_signal(&qemu_cpu_cond);
	1570
	1571	do {
	1572	if (cpu_can_run(cpu)) {
	1573	r = whpx_vcpu_exec(cpu);
	1574	if (r == EXCP_DEBUG) {
	1575	cpu_handle_guest_debug(cpu);
	1576	}
	1577	}
	1578	while (cpu_thread_is_idle(cpu)) {
	1579	qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
	1580	}
	1581	qemu_wait_io_event_common(cpu);
	1582	} while (!cpu->unplug \|\| cpu_can_run(cpu));
	1583
	1584	whpx_destroy_vcpu(cpu);
	1585	cpu->created = false;
	1586	qemu_cond_signal(&qemu_cpu_cond);
	1587	qemu_mutex_unlock_iothread();
	1588	rcu_unregister_thread();
	1589	return NULL;
	1590	}
	1591
	1592	#ifdef _WIN32
	1593	static void CALLBACK dummy_apc_func(ULONG_PTR unused)
	1594	{
	1595	}
	1596	#endif
	1597
	1598	/* Multi-threaded TCG
	1599	*
	1600	* In the multi-threaded case each vCPU has its own thread. The TLS
	1601	* variable current_cpu can be used deep in the code to find the
	1602	* current CPUState for a given thread.
	1603	*/
	1604
	1605	static void qemu_tcg_cpu_thread_fn(void arg)
	1606	{
	1607	CPUState *cpu = arg;
	1608
	1609	g_assert(!use_icount);
	1610
	1611	rcu_register_thread();
	1612	tcg_register_thread();
	1613
	1614	qemu_mutex_lock_iothread();
	1615	qemu_thread_get_self(cpu->thread);
	1616
	1617	cpu->thread_id = qemu_get_thread_id();
	1618	cpu->created = true;
	1619	cpu->can_do_io = 1;
	1620	current_cpu = cpu;
	1621	qemu_cond_signal(&qemu_cpu_cond);
	1622
	1623	/* process any pending work */
	1624	cpu->exit_request = 1;
	1625
	1626	while (1) {
	1627	if (cpu_can_run(cpu)) {
	1628	int r;
	1629	r = tcg_cpu_exec(cpu);
	1630	switch (r) {
	1631	case EXCP_DEBUG:
	1632	cpu_handle_guest_debug(cpu);
	1633	break;
	1634	case EXCP_HALTED:
	1635	/* during start-up the vCPU is reset and the thread is
	1636	* kicked several times. If we don't ensure we go back
	1637	* to sleep in the halted state we won't cleanly
	1638	* start-up when the vCPU is enabled.
	1639	*
	1640	* cpu->halted should ensure we sleep in wait_io_event
	1641	*/
	1642	g_assert(cpu->halted);
	1643	break;
	1644	case EXCP_ATOMIC:
	1645	qemu_mutex_unlock_iothread();
	1646	cpu_exec_step_atomic(cpu);
	1647	qemu_mutex_lock_iothread();
	1648	default:
	1649	/* Ignore everything else? */
	1650	break;
	1651	}
	1652	}
	1653
	1654	atomic_mb_set(&cpu->exit_request, 0);
	1655	qemu_wait_io_event(cpu);
	1656	} while (!cpu->unplug \|\| cpu_can_run(cpu));
	1657
	1658	qemu_tcg_destroy_vcpu(cpu);
	1659	cpu->created = false;
	1660	qemu_cond_signal(&qemu_cpu_cond);
	1661	qemu_mutex_unlock_iothread();
	1662	rcu_unregister_thread();
	1663	return NULL;
	1664	}
	1665
	1666	static void qemu_cpu_kick_thread(CPUState *cpu)
	1667	{
	1668	#ifndef _WIN32
	1669	int err;
	1670
	1671	if (cpu->thread_kicked) {
	1672	return;
	1673	}
	1674	cpu->thread_kicked = true;
	1675	err = pthread_kill(cpu->thread->thread, SIG_IPI);
	1676	if (err) {
	1677	fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
	1678	exit(1);
	1679	}
	1680	#else /* _WIN32 */
	1681	if (!qemu_cpu_is_self(cpu)) {
	1682	if (whpx_enabled()) {
	1683	whpx_vcpu_kick(cpu);
	1684	} else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
	1685	fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
	1686	__func__, GetLastError());
	1687	exit(1);
	1688	}
	1689	}
	1690	#endif
	1691	}
	1692
	1693	void qemu_cpu_kick(CPUState *cpu)
	1694	{
	1695	qemu_cond_broadcast(cpu->halt_cond);
	1696	if (tcg_enabled()) {
	1697	cpu_exit(cpu);
	1698	/* NOP unless doing single-thread RR */
	1699	qemu_cpu_kick_rr_cpu();
	1700	} else {
	1701	if (hax_enabled()) {
	1702	/*
	1703	* FIXME: race condition with the exit_request check in
	1704	* hax_vcpu_hax_exec
	1705	*/
	1706	cpu->exit_request = 1;
	1707	}
	1708	qemu_cpu_kick_thread(cpu);
	1709	}
	1710	}
	1711
	1712	void qemu_cpu_kick_self(void)
	1713	{
	1714	assert(current_cpu);
	1715	qemu_cpu_kick_thread(current_cpu);
	1716	}
	1717
	1718	bool qemu_cpu_is_self(CPUState *cpu)
	1719	{
	1720	return qemu_thread_is_self(cpu->thread);
	1721	}
	1722
	1723	bool qemu_in_vcpu_thread(void)
	1724	{
	1725	return current_cpu && qemu_cpu_is_self(current_cpu);
	1726	}
	1727
	1728	static __thread bool iothread_locked = false;
	1729
	1730	bool qemu_mutex_iothread_locked(void)
	1731	{
	1732	return iothread_locked;
	1733	}
	1734
	1735	void qemu_mutex_lock_iothread(void)
	1736	{
	1737	g_assert(!qemu_mutex_iothread_locked());
	1738	qemu_mutex_lock(&qemu_global_mutex);
	1739	iothread_locked = true;
	1740	}
	1741
	1742	void qemu_mutex_unlock_iothread(void)
	1743	{
	1744	g_assert(qemu_mutex_iothread_locked());
	1745	iothread_locked = false;
	1746	qemu_mutex_unlock(&qemu_global_mutex);
	1747	}
	1748
	1749	static bool all_vcpus_paused(void)
	1750	{
	1751	CPUState *cpu;
	1752
	1753	CPU_FOREACH(cpu) {
	1754	if (!cpu->stopped) {
	1755	return false;
	1756	}
	1757	}
	1758
	1759	return true;
	1760	}
	1761
	1762	void pause_all_vcpus(void)
	1763	{
	1764	CPUState *cpu;
	1765
	1766	qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
	1767	CPU_FOREACH(cpu) {
	1768	if (qemu_cpu_is_self(cpu)) {
	1769	qemu_cpu_stop(cpu, true);
	1770	} else {
	1771	cpu->stop = true;
	1772	qemu_cpu_kick(cpu);
	1773	}
	1774	}
	1775
	1776	while (!all_vcpus_paused()) {
	1777	qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
	1778	CPU_FOREACH(cpu) {
	1779	qemu_cpu_kick(cpu);
	1780	}
	1781	}
	1782	}
	1783
	1784	void cpu_resume(CPUState *cpu)
	1785	{
	1786	cpu->stop = false;
	1787	cpu->stopped = false;
	1788	qemu_cpu_kick(cpu);
	1789	}
	1790
	1791	void resume_all_vcpus(void)
	1792	{
	1793	CPUState *cpu;
	1794
	1795	qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
	1796	CPU_FOREACH(cpu) {
	1797	cpu_resume(cpu);
	1798	}
	1799	}
	1800
	1801	void cpu_remove_sync(CPUState *cpu)
	1802	{
	1803	cpu->stop = true;
	1804	cpu->unplug = true;
	1805	qemu_cpu_kick(cpu);
	1806	qemu_mutex_unlock_iothread();
	1807	qemu_thread_join(cpu->thread);
	1808	qemu_mutex_lock_iothread();
	1809	}
	1810
	1811	/* For temporary buffers for forming a name */
	1812	#define VCPU_THREAD_NAME_SIZE 16
	1813
	1814	static void qemu_tcg_init_vcpu(CPUState *cpu)
	1815	{
	1816	char thread_name[VCPU_THREAD_NAME_SIZE];
	1817	static QemuCond *single_tcg_halt_cond;
	1818	static QemuThread *single_tcg_cpu_thread;
	1819	static int tcg_region_inited;
	1820
	1821	/*
	1822	* Initialize TCG regions--once. Now is a good time, because:
	1823	* (1) TCG's init context, prologue and target globals have been set up.
	1824	* (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
	1825	* -accel flag is processed, so the check doesn't work then).
	1826	*/
	1827	if (!tcg_region_inited) {
	1828	tcg_region_inited = 1;
	1829	tcg_region_init();
	1830	}
	1831
	1832	if (qemu_tcg_mttcg_enabled() \|\| !single_tcg_cpu_thread) {
	1833	cpu->thread = g_malloc0(sizeof(QemuThread));
	1834	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
	1835	qemu_cond_init(cpu->halt_cond);
	1836
	1837	if (qemu_tcg_mttcg_enabled()) {
	1838	/* create a thread per vCPU with TCG (MTTCG) */
	1839	parallel_cpus = true;
	1840	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
	1841	cpu->cpu_index);
	1842
	1843	qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
	1844	cpu, QEMU_THREAD_JOINABLE);
	1845
	1846	} else {
	1847	/* share a single thread for all cpus with TCG */
	1848	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
	1849	qemu_thread_create(cpu->thread, thread_name,
	1850	qemu_tcg_rr_cpu_thread_fn,
	1851	cpu, QEMU_THREAD_JOINABLE);
	1852
	1853	single_tcg_halt_cond = cpu->halt_cond;
	1854	single_tcg_cpu_thread = cpu->thread;
	1855	}
	1856	#ifdef _WIN32
	1857	cpu->hThread = qemu_thread_get_handle(cpu->thread);
	1858	#endif
	1859	while (!cpu->created) {
	1860	qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
	1861	}
	1862	} else {
	1863	/* For non-MTTCG cases we share the thread */
	1864	cpu->thread = single_tcg_cpu_thread;
	1865	cpu->halt_cond = single_tcg_halt_cond;
	1866	}
	1867	}
	1868
	1869	static void qemu_hax_start_vcpu(CPUState *cpu)
	1870	{
	1871	char thread_name[VCPU_THREAD_NAME_SIZE];
	1872
	1873	cpu->thread = g_malloc0(sizeof(QemuThread));
	1874	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
	1875	qemu_cond_init(cpu->halt_cond);
	1876
	1877	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
	1878	cpu->cpu_index);
	1879	qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
	1880	cpu, QEMU_THREAD_JOINABLE);
	1881	#ifdef _WIN32
	1882	cpu->hThread = qemu_thread_get_handle(cpu->thread);
	1883	#endif
	1884	while (!cpu->created) {
	1885	qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
	1886	}
	1887	}
	1888
	1889	static void qemu_kvm_start_vcpu(CPUState *cpu)
	1890	{
	1891	char thread_name[VCPU_THREAD_NAME_SIZE];
	1892
	1893	cpu->thread = g_malloc0(sizeof(QemuThread));
	1894	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
	1895	qemu_cond_init(cpu->halt_cond);
	1896	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
	1897	cpu->cpu_index);
	1898	qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
	1899	cpu, QEMU_THREAD_JOINABLE);
	1900	while (!cpu->created) {
	1901	qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
	1902	}
	1903	}
	1904
	1905	static void qemu_hvf_start_vcpu(CPUState *cpu)
	1906	{
	1907	char thread_name[VCPU_THREAD_NAME_SIZE];
	1908
	1909	/* HVF currently does not support TCG, and only runs in
	1910	* unrestricted-guest mode. */
	1911	assert(hvf_enabled());
	1912
	1913	cpu->thread = g_malloc0(sizeof(QemuThread));
	1914	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
	1915	qemu_cond_init(cpu->halt_cond);
	1916
	1917	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
	1918	cpu->cpu_index);
	1919	qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
	1920	cpu, QEMU_THREAD_JOINABLE);
	1921	while (!cpu->created) {
	1922	qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
	1923	}
	1924	}
	1925
	1926	static void qemu_whpx_start_vcpu(CPUState *cpu)
	1927	{
	1928	char thread_name[VCPU_THREAD_NAME_SIZE];
	1929
	1930	cpu->thread = g_malloc0(sizeof(QemuThread));
	1931	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
	1932	qemu_cond_init(cpu->halt_cond);
	1933	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
	1934	cpu->cpu_index);
	1935	qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
	1936	cpu, QEMU_THREAD_JOINABLE);
	1937	#ifdef _WIN32
	1938	cpu->hThread = qemu_thread_get_handle(cpu->thread);
	1939	#endif
	1940	while (!cpu->created) {
	1941	qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
	1942	}
	1943	}
	1944
	1945	static void qemu_dummy_start_vcpu(CPUState *cpu)
	1946	{
	1947	char thread_name[VCPU_THREAD_NAME_SIZE];
	1948
	1949	cpu->thread = g_malloc0(sizeof(QemuThread));
	1950	cpu->halt_cond = g_malloc0(sizeof(QemuCond));
	1951	qemu_cond_init(cpu->halt_cond);
	1952	snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
	1953	cpu->cpu_index);
	1954	qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
	1955	QEMU_THREAD_JOINABLE);
	1956	while (!cpu->created) {
	1957	qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
	1958	}
	1959	}
	1960
	1961	void qemu_init_vcpu(CPUState *cpu)
	1962	{
	1963	cpu->nr_cores = smp_cores;
	1964	cpu->nr_threads = smp_threads;
	1965	cpu->stopped = true;
	1966
	1967	if (!cpu->as) {
	1968	/* If the target cpu hasn't set up any address spaces itself,
	1969	* give it the default one.
	1970	*/
	1971	cpu->num_ases = 1;
	1972	cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
	1973	}
	1974
	1975	if (kvm_enabled()) {
	1976	qemu_kvm_start_vcpu(cpu);
	1977	} else if (hax_enabled()) {
	1978	qemu_hax_start_vcpu(cpu);
	1979	} else if (hvf_enabled()) {
	1980	qemu_hvf_start_vcpu(cpu);
	1981	} else if (tcg_enabled()) {
	1982	qemu_tcg_init_vcpu(cpu);
	1983	} else if (whpx_enabled()) {
	1984	qemu_whpx_start_vcpu(cpu);
	1985	} else {
	1986	qemu_dummy_start_vcpu(cpu);
	1987	}
	1988	}
	1989
	1990	void cpu_stop_current(void)
	1991	{
	1992	if (current_cpu) {
	1993	qemu_cpu_stop(current_cpu, true);
	1994	}
	1995	}
	1996
	1997	int vm_stop(RunState state)
	1998	{
	1999	if (qemu_in_vcpu_thread()) {
	2000	qemu_system_vmstop_request_prepare();
	2001	qemu_system_vmstop_request(state);
	2002	/*
	2003	* FIXME: should not return to device code in case
	2004	* vm_stop() has been requested.
	2005	*/
	2006	cpu_stop_current();
	2007	return 0;
	2008	}
	2009
	2010	return do_vm_stop(state);
	2011	}
	2012
	2013	/**
	2014	* Prepare for (re)starting the VM.
	2015	* Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
	2016	* running or in case of an error condition), 0 otherwise.
	2017	*/
	2018	int vm_prepare_start(void)
	2019	{
	2020	RunState requested;
	2021	int res = 0;
	2022
	2023	qemu_vmstop_requested(&requested);
	2024	if (runstate_is_running() && requested == RUN_STATE__MAX) {
	2025	return -1;
	2026	}
	2027
	2028	/* Ensure that a STOP/RESUME pair of events is emitted if a
	2029	* vmstop request was pending. The BLOCK_IO_ERROR event, for
	2030	* example, according to documentation is always followed by
	2031	* the STOP event.
	2032	*/
	2033	if (runstate_is_running()) {
	2034	qapi_event_send_stop(&error_abort);
	2035	res = -1;
	2036	} else {
	2037	replay_enable_events();
	2038	cpu_enable_ticks();
	2039	runstate_set(RUN_STATE_RUNNING);
	2040	vm_state_notify(1, RUN_STATE_RUNNING);
	2041	}
	2042
	2043	/* We are sending this now, but the CPUs will be resumed shortly later */
	2044	qapi_event_send_resume(&error_abort);
	2045	return res;
	2046	}
	2047
	2048	void vm_start(void)
	2049	{
	2050	if (!vm_prepare_start()) {
	2051	resume_all_vcpus();
	2052	}
	2053	}
	2054
	2055	/* does a state transition even if the VM is already stopped,
	2056	current state is forgotten forever */
	2057	int vm_stop_force_state(RunState state)
	2058	{
	2059	if (runstate_is_running()) {
	2060	return vm_stop(state);
	2061	} else {
	2062	runstate_set(state);
	2063
	2064	bdrv_drain_all();
	2065	/* Make sure to return an error if the flush in a previous vm_stop()
	2066	* failed. */
	2067	return bdrv_flush_all();
	2068	}
	2069	}
	2070
	2071	void list_cpus(FILE f, fprintf_function cpu_fprintf, const char optarg)
	2072	{
	2073	/* XXX: implement xxx_cpu_list for targets that still miss it */
	2074	#if defined(cpu_list)
	2075	cpu_list(f, cpu_fprintf);
	2076	#endif
	2077	}
	2078
	2079	CpuInfoList qmp_query_cpus(Error *errp)
	2080	{
	2081	MachineState *ms = MACHINE(qdev_get_machine());
	2082	MachineClass *mc = MACHINE_GET_CLASS(ms);
	2083	CpuInfoList head = NULL, cur_item = NULL;
	2084	CPUState *cpu;
	2085
	2086	CPU_FOREACH(cpu) {
	2087	CpuInfoList *info;
	2088	#if defined(TARGET_I386)
	2089	X86CPU *x86_cpu = X86_CPU(cpu);
	2090	CPUX86State *env = &x86_cpu->env;
	2091	#elif defined(TARGET_PPC)
	2092	PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
	2093	CPUPPCState *env = &ppc_cpu->env;
	2094	#elif defined(TARGET_SPARC)
	2095	SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
	2096	CPUSPARCState *env = &sparc_cpu->env;
	2097	#elif defined(TARGET_MIPS)
	2098	MIPSCPU *mips_cpu = MIPS_CPU(cpu);
	2099	CPUMIPSState *env = &mips_cpu->env;
	2100	#elif defined(TARGET_TRICORE)
	2101	TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
	2102	CPUTriCoreState *env = &tricore_cpu->env;
	2103	#endif
	2104
	2105	cpu_synchronize_state(cpu);
	2106
	2107	info = g_malloc0(sizeof(*info));
	2108	info->value = g_malloc0(sizeof(*info->value));
	2109	info->value->CPU = cpu->cpu_index;
	2110	info->value->current = (cpu == first_cpu);
	2111	info->value->halted = cpu->halted;
	2112	info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
	2113	info->value->thread_id = cpu->thread_id;
	2114	#if defined(TARGET_I386)
	2115	info->value->arch = CPU_INFO_ARCH_X86;
	2116	info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
	2117	#elif defined(TARGET_PPC)
	2118	info->value->arch = CPU_INFO_ARCH_PPC;
	2119	info->value->u.ppc.nip = env->nip;
	2120	#elif defined(TARGET_SPARC)
	2121	info->value->arch = CPU_INFO_ARCH_SPARC;
	2122	info->value->u.q_sparc.pc = env->pc;
	2123	info->value->u.q_sparc.npc = env->npc;
	2124	#elif defined(TARGET_MIPS)
	2125	info->value->arch = CPU_INFO_ARCH_MIPS;
	2126	info->value->u.q_mips.PC = env->active_tc.PC;
	2127	#elif defined(TARGET_TRICORE)
	2128	info->value->arch = CPU_INFO_ARCH_TRICORE;
	2129	info->value->u.tricore.PC = env->PC;
	2130	#else
	2131	info->value->arch = CPU_INFO_ARCH_OTHER;
	2132	#endif
	2133	info->value->has_props = !!mc->cpu_index_to_instance_props;
	2134	if (info->value->has_props) {
	2135	CpuInstanceProperties *props;
	2136	props = g_malloc0(sizeof(*props));
	2137	*props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
	2138	info->value->props = props;
	2139	}
	2140
	2141	/* XXX: waiting for the qapi to support GSList */
	2142	if (!cur_item) {
	2143	head = cur_item = info;
	2144	} else {
	2145	cur_item->next = info;
	2146	cur_item = info;
	2147	}
	2148	}
	2149
	2150	return head;
	2151	}
	2152
	2153	void qmp_memsave(int64_t addr, int64_t size, const char *filename,
	2154	bool has_cpu, int64_t cpu_index, Error **errp)
	2155	{
	2156	FILE *f;
	2157	uint32_t l;
	2158	CPUState *cpu;
	2159	uint8_t buf[1024];
	2160	int64_t orig_addr = addr, orig_size = size;
	2161
	2162	if (!has_cpu) {
	2163	cpu_index = 0;
	2164	}
	2165
	2166	cpu = qemu_get_cpu(cpu_index);
	2167	if (cpu == NULL) {
	2168	error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
	2169	"a CPU number");
	2170	return;
	2171	}
	2172
	2173	f = fopen(filename, "wb");
	2174	if (!f) {
	2175	error_setg_file_open(errp, errno, filename);
	2176	return;
	2177	}
	2178
	2179	while (size != 0) {
	2180	l = sizeof(buf);
	2181	if (l > size)
	2182	l = size;
	2183	if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
	2184	error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
	2185	" specified", orig_addr, orig_size);
	2186	goto exit;
	2187	}
	2188	if (fwrite(buf, 1, l, f) != l) {
	2189	error_setg(errp, QERR_IO_ERROR);
	2190	goto exit;
	2191	}
	2192	addr += l;
	2193	size -= l;
	2194	}
	2195
	2196	exit:
	2197	fclose(f);
	2198	}
	2199
	2200	void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
	2201	Error **errp)
	2202	{
	2203	FILE *f;
	2204	uint32_t l;
	2205	uint8_t buf[1024];
	2206
	2207	f = fopen(filename, "wb");
	2208	if (!f) {
	2209	error_setg_file_open(errp, errno, filename);
	2210	return;
	2211	}
	2212
	2213	while (size != 0) {
	2214	l = sizeof(buf);
	2215	if (l > size)
	2216	l = size;
	2217	cpu_physical_memory_read(addr, buf, l);
	2218	if (fwrite(buf, 1, l, f) != l) {
	2219	error_setg(errp, QERR_IO_ERROR);
	2220	goto exit;
	2221	}
	2222	addr += l;
	2223	size -= l;
	2224	}
	2225
	2226	exit:
	2227	fclose(f);
	2228	}
	2229
	2230	void qmp_inject_nmi(Error **errp)
	2231	{
	2232	nmi_monitor_handle(monitor_get_cpu_index(), errp);
	2233	}
	2234
	2235	void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
	2236	{
	2237	if (!use_icount) {
	2238	return;
	2239	}
	2240
	2241	cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
	2242	(cpu_get_clock() - cpu_get_icount())/SCALE_MS);
	2243	if (icount_align_option) {
	2244	cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
	2245	cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
	2246	} else {
	2247	cpu_fprintf(f, "Max guest delay NA\n");
	2248	cpu_fprintf(f, "Max guest advance NA\n");
	2249	}
	2250	}