qapi: Restrict balloon-related commands to machine code
[qemu.git] / softmmu / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
7b31bbc2 25#include "qemu/osdep.h"
a8d25326 26#include "qemu-common.h"
8d4e9146 27#include "qemu/config-file.h"
9ec374a7 28#include "qemu/cutils.h"
d6454270 29#include "migration/vmstate.h"
83c9089e 30#include "monitor/monitor.h"
e688df6b 31#include "qapi/error.h"
112ed241 32#include "qapi/qapi-commands-misc.h"
9af23989 33#include "qapi/qapi-events-run-state.h"
a4e15de9 34#include "qapi/qmp/qerror.h"
d49b6836 35#include "qemu/error-report.h"
76c86615 36#include "qemu/qemu-print.h"
14a48c1d 37#include "sysemu/tcg.h"
da31d594 38#include "sysemu/block-backend.h"
022c62cb 39#include "exec/gdbstub.h"
9c17d615 40#include "sysemu/dma.h"
b3946626 41#include "sysemu/hw_accel.h"
9c17d615 42#include "sysemu/kvm.h"
b0cb0a66 43#include "sysemu/hax.h"
c97d6d2c 44#include "sysemu/hvf.h"
19306806 45#include "sysemu/whpx.h"
63c91552 46#include "exec/exec-all.h"
296af7c9 47
1de7afc9 48#include "qemu/thread.h"
30865f31 49#include "qemu/plugin.h"
9c17d615
PB
50#include "sysemu/cpus.h"
51#include "sysemu/qtest.h"
1de7afc9 52#include "qemu/main-loop.h"
922a01a0 53#include "qemu/option.h"
1de7afc9 54#include "qemu/bitmap.h"
cb365646 55#include "qemu/seqlock.h"
9c09a251 56#include "qemu/guest-random.h"
dcb32f1d 57#include "tcg/tcg.h"
9cb805fd 58#include "hw/nmi.h"
8b427044 59#include "sysemu/replay.h"
54d31236 60#include "sysemu/runstate.h"
5cc8767d 61#include "hw/boards.h"
650d103d 62#include "hw/hw.h"
0ff0fc19 63
b0c3cf94
CF
64#include "sysemu/cpu-throttle.h"
65
6d9cb73c
JK
66#ifdef CONFIG_LINUX
67
68#include <sys/prctl.h>
69
c0532a76
MT
70#ifndef PR_MCE_KILL
71#define PR_MCE_KILL 33
72#endif
73
6d9cb73c
JK
74#ifndef PR_MCE_KILL_SET
75#define PR_MCE_KILL_SET 1
76#endif
77
78#ifndef PR_MCE_KILL_EARLY
79#define PR_MCE_KILL_EARLY 1
80#endif
81
82#endif /* CONFIG_LINUX */
83
bd1f7ff4
YK
84static QemuMutex qemu_global_mutex;
85
27498bef
ST
86int64_t max_delay;
87int64_t max_advance;
296af7c9 88
321bc0b2
TC
89bool cpu_is_stopped(CPUState *cpu)
90{
91 return cpu->stopped || !runstate_is_running();
92}
93
0c0fcc20
EC
94static inline bool cpu_work_list_empty(CPUState *cpu)
95{
96 bool ret;
97
98 qemu_mutex_lock(&cpu->work_mutex);
99 ret = QSIMPLEQ_EMPTY(&cpu->work_list);
100 qemu_mutex_unlock(&cpu->work_mutex);
101 return ret;
102}
103
a98ae1d8 104static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 105{
0c0fcc20 106 if (cpu->stop || !cpu_work_list_empty(cpu)) {
ac873f1e
PM
107 return false;
108 }
321bc0b2 109 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
110 return true;
111 }
8c2e1b00 112 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 113 kvm_halt_in_kernel()) {
ac873f1e
PM
114 return false;
115 }
116 return true;
117}
118
119static bool all_cpu_threads_idle(void)
120{
182735ef 121 CPUState *cpu;
ac873f1e 122
bdc44640 123 CPU_FOREACH(cpu) {
182735ef 124 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
125 return false;
126 }
127 }
128 return true;
129}
130
946fb27c
PB
131/***********************************************************/
132/* guest cycle counter */
133
a3270e19
PB
134/* Protected by TimersState seqlock */
135
5045e9d9 136static bool icount_sleep = true;
946fb27c
PB
137/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
138#define MAX_ICOUNT_SHIFT 10
a3270e19 139
946fb27c 140typedef struct TimersState {
cb365646 141 /* Protected by BQL. */
946fb27c
PB
142 int64_t cpu_ticks_prev;
143 int64_t cpu_ticks_offset;
cb365646 144
94377115
PB
145 /* Protect fields that can be respectively read outside the
146 * BQL, and written from multiple threads.
cb365646
LPF
147 */
148 QemuSeqLock vm_clock_seqlock;
94377115
PB
149 QemuSpin vm_clock_lock;
150
151 int16_t cpu_ticks_enabled;
c96778bb 152
c1ff073c 153 /* Conversion factor from emulated instructions to virtual clock ticks. */
94377115
PB
154 int16_t icount_time_shift;
155
c96778bb
FK
156 /* Compensate for varying guest execution speed. */
157 int64_t qemu_icount_bias;
94377115
PB
158
159 int64_t vm_clock_warp_start;
160 int64_t cpu_clock_offset;
161
c96778bb
FK
162 /* Only written by TCG thread */
163 int64_t qemu_icount;
94377115 164
b39e3f34 165 /* for adjusting icount */
b39e3f34
PD
166 QEMUTimer *icount_rt_timer;
167 QEMUTimer *icount_vm_timer;
168 QEMUTimer *icount_warp_timer;
946fb27c
PB
169} TimersState;
170
d9cd4007 171static TimersState timers_state;
8d4e9146
FK
172bool mttcg_enabled;
173
946fb27c 174
e4cd9657
AB
175/* The current number of executed instructions is based on what we
176 * originally budgeted minus the current state of the decrementing
177 * icount counters in extra/u16.low.
178 */
179static int64_t cpu_get_icount_executed(CPUState *cpu)
180{
5e140196
RH
181 return (cpu->icount_budget -
182 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
e4cd9657
AB
183}
184
512d3c80
AB
185/*
186 * Update the global shared timer_state.qemu_icount to take into
187 * account executed instructions. This is done by the TCG vCPU
188 * thread so the main-loop can see time has moved forward.
189 */
9b4e6f49 190static void cpu_update_icount_locked(CPUState *cpu)
512d3c80
AB
191{
192 int64_t executed = cpu_get_icount_executed(cpu);
193 cpu->icount_budget -= executed;
194
d73415a3 195 qatomic_set_i64(&timers_state.qemu_icount,
38adcb6e 196 timers_state.qemu_icount + executed);
9b4e6f49
PB
197}
198
199/*
200 * Update the global shared timer_state.qemu_icount to take into
201 * account executed instructions. This is done by the TCG vCPU
202 * thread so the main-loop can see time has moved forward.
203 */
204void cpu_update_icount(CPUState *cpu)
205{
206 seqlock_write_lock(&timers_state.vm_clock_seqlock,
207 &timers_state.vm_clock_lock);
208 cpu_update_icount_locked(cpu);
94377115
PB
209 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
210 &timers_state.vm_clock_lock);
512d3c80
AB
211}
212
c1ff073c 213static int64_t cpu_get_icount_raw_locked(void)
946fb27c 214{
4917cf44 215 CPUState *cpu = current_cpu;
946fb27c 216
243c5f77 217 if (cpu && cpu->running) {
414b15c9 218 if (!cpu->can_do_io) {
493d89bf 219 error_report("Bad icount read");
2a62914b 220 exit(1);
946fb27c 221 }
e4cd9657 222 /* Take into account what has run */
9b4e6f49 223 cpu_update_icount_locked(cpu);
946fb27c 224 }
38adcb6e 225 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
d73415a3 226 return qatomic_read_i64(&timers_state.qemu_icount);
2a62914b
PD
227}
228
2a62914b
PD
229static int64_t cpu_get_icount_locked(void)
230{
c1ff073c 231 int64_t icount = cpu_get_icount_raw_locked();
d73415a3 232 return qatomic_read_i64(&timers_state.qemu_icount_bias) +
c97595d1 233 cpu_icount_to_ns(icount);
c1ff073c
PB
234}
235
236int64_t cpu_get_icount_raw(void)
237{
238 int64_t icount;
239 unsigned start;
240
241 do {
242 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
243 icount = cpu_get_icount_raw_locked();
244 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
245
246 return icount;
946fb27c
PB
247}
248
c1ff073c 249/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b
PB
250int64_t cpu_get_icount(void)
251{
252 int64_t icount;
253 unsigned start;
254
255 do {
256 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
257 icount = cpu_get_icount_locked();
258 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
259
260 return icount;
261}
262
3f031313
FK
263int64_t cpu_icount_to_ns(int64_t icount)
264{
d73415a3 265 return icount << qatomic_read(&timers_state.icount_time_shift);
3f031313
FK
266}
267
f2a4ad6d
PB
268static int64_t cpu_get_ticks_locked(void)
269{
270 int64_t ticks = timers_state.cpu_ticks_offset;
271 if (timers_state.cpu_ticks_enabled) {
272 ticks += cpu_get_host_ticks();
273 }
274
275 if (timers_state.cpu_ticks_prev > ticks) {
276 /* Non increasing ticks may happen if the host uses software suspend. */
277 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
278 ticks = timers_state.cpu_ticks_prev;
279 }
280
281 timers_state.cpu_ticks_prev = ticks;
282 return ticks;
283}
284
d90f3cca
C
285/* return the time elapsed in VM between vm_start and vm_stop. Unless
286 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
287 * counter.
d90f3cca 288 */
946fb27c
PB
289int64_t cpu_get_ticks(void)
290{
5f3e3101
PB
291 int64_t ticks;
292
946fb27c
PB
293 if (use_icount) {
294 return cpu_get_icount();
295 }
5f3e3101 296
f2a4ad6d
PB
297 qemu_spin_lock(&timers_state.vm_clock_lock);
298 ticks = cpu_get_ticks_locked();
299 qemu_spin_unlock(&timers_state.vm_clock_lock);
5f3e3101 300 return ticks;
946fb27c
PB
301}
302
cb365646 303static int64_t cpu_get_clock_locked(void)
946fb27c 304{
1d45cea5 305 int64_t time;
cb365646 306
1d45cea5 307 time = timers_state.cpu_clock_offset;
5f3e3101 308 if (timers_state.cpu_ticks_enabled) {
1d45cea5 309 time += get_clock();
946fb27c 310 }
cb365646 311
1d45cea5 312 return time;
cb365646
LPF
313}
314
d90f3cca 315/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
316 * the time between vm_start and vm_stop
317 */
cb365646
LPF
318int64_t cpu_get_clock(void)
319{
320 int64_t ti;
321 unsigned start;
322
323 do {
324 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
325 ti = cpu_get_clock_locked();
326 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
327
328 return ti;
946fb27c
PB
329}
330
cb365646 331/* enable cpu_get_ticks()
3224e878 332 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 333 */
946fb27c
PB
334void cpu_enable_ticks(void)
335{
94377115
PB
336 seqlock_write_lock(&timers_state.vm_clock_seqlock,
337 &timers_state.vm_clock_lock);
946fb27c 338 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 339 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
340 timers_state.cpu_clock_offset -= get_clock();
341 timers_state.cpu_ticks_enabled = 1;
342 }
94377115
PB
343 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
344 &timers_state.vm_clock_lock);
946fb27c
PB
345}
346
347/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 348 * cpu_get_ticks() after that.
3224e878 349 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 350 */
946fb27c
PB
351void cpu_disable_ticks(void)
352{
94377115
PB
353 seqlock_write_lock(&timers_state.vm_clock_seqlock,
354 &timers_state.vm_clock_lock);
946fb27c 355 if (timers_state.cpu_ticks_enabled) {
4a7428c5 356 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 357 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
358 timers_state.cpu_ticks_enabled = 0;
359 }
94377115
PB
360 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
361 &timers_state.vm_clock_lock);
946fb27c
PB
362}
363
364/* Correlation between real and virtual time is always going to be
365 fairly approximate, so ignore small variation.
366 When the guest is idle real and virtual time will be aligned in
367 the IO wait loop. */
73bcb24d 368#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
369
370static void icount_adjust(void)
371{
372 int64_t cur_time;
373 int64_t cur_icount;
374 int64_t delta;
a3270e19
PB
375
376 /* Protected by TimersState mutex. */
946fb27c 377 static int64_t last_delta;
468cc7cf 378
946fb27c
PB
379 /* If the VM is not running, then do nothing. */
380 if (!runstate_is_running()) {
381 return;
382 }
468cc7cf 383
94377115
PB
384 seqlock_write_lock(&timers_state.vm_clock_seqlock,
385 &timers_state.vm_clock_lock);
b8164e68
PD
386 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
387 cpu_get_clock_locked());
17a15f1b 388 cur_icount = cpu_get_icount_locked();
468cc7cf 389
946fb27c
PB
390 delta = cur_icount - cur_time;
391 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
392 if (delta > 0
393 && last_delta + ICOUNT_WOBBLE < delta * 2
c1ff073c 394 && timers_state.icount_time_shift > 0) {
946fb27c 395 /* The guest is getting too far ahead. Slow time down. */
d73415a3 396 qatomic_set(&timers_state.icount_time_shift,
c1ff073c 397 timers_state.icount_time_shift - 1);
946fb27c
PB
398 }
399 if (delta < 0
400 && last_delta - ICOUNT_WOBBLE > delta * 2
c1ff073c 401 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
946fb27c 402 /* The guest is getting too far behind. Speed time up. */
d73415a3 403 qatomic_set(&timers_state.icount_time_shift,
c1ff073c 404 timers_state.icount_time_shift + 1);
946fb27c
PB
405 }
406 last_delta = delta;
d73415a3 407 qatomic_set_i64(&timers_state.qemu_icount_bias,
c97595d1
EC
408 cur_icount - (timers_state.qemu_icount
409 << timers_state.icount_time_shift));
94377115
PB
410 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
411 &timers_state.vm_clock_lock);
946fb27c
PB
412}
413
414static void icount_adjust_rt(void *opaque)
415{
b39e3f34 416 timer_mod(timers_state.icount_rt_timer,
1979b908 417 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
418 icount_adjust();
419}
420
421static void icount_adjust_vm(void *opaque)
422{
b39e3f34 423 timer_mod(timers_state.icount_vm_timer,
40daca54 424 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 425 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
426 icount_adjust();
427}
428
429static int64_t qemu_icount_round(int64_t count)
430{
d73415a3 431 int shift = qatomic_read(&timers_state.icount_time_shift);
c1ff073c 432 return (count + (1 << shift) - 1) >> shift;
946fb27c
PB
433}
434
efab87cf 435static void icount_warp_rt(void)
946fb27c 436{
ccffff48
AB
437 unsigned seq;
438 int64_t warp_start;
439
17a15f1b
PB
440 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
441 * changes from -1 to another value, so the race here is okay.
442 */
ccffff48
AB
443 do {
444 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
b39e3f34 445 warp_start = timers_state.vm_clock_warp_start;
ccffff48
AB
446 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
447
448 if (warp_start == -1) {
946fb27c
PB
449 return;
450 }
451
94377115
PB
452 seqlock_write_lock(&timers_state.vm_clock_seqlock,
453 &timers_state.vm_clock_lock);
946fb27c 454 if (runstate_is_running()) {
74c0b816
PB
455 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
456 cpu_get_clock_locked());
8ed961d9
PB
457 int64_t warp_delta;
458
b39e3f34 459 warp_delta = clock - timers_state.vm_clock_warp_start;
8ed961d9 460 if (use_icount == 2) {
946fb27c 461 /*
40daca54 462 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
463 * far ahead of real time.
464 */
17a15f1b 465 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 466 int64_t delta = clock - cur_icount;
8ed961d9 467 warp_delta = MIN(warp_delta, delta);
946fb27c 468 }
d73415a3 469 qatomic_set_i64(&timers_state.qemu_icount_bias,
c97595d1 470 timers_state.qemu_icount_bias + warp_delta);
946fb27c 471 }
b39e3f34 472 timers_state.vm_clock_warp_start = -1;
94377115
PB
473 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
474 &timers_state.vm_clock_lock);
8ed961d9
PB
475
476 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
477 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
478 }
946fb27c
PB
479}
480
e76d1798 481static void icount_timer_cb(void *opaque)
efab87cf 482{
e76d1798
PD
483 /* No need for a checkpoint because the timer already synchronizes
484 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
485 */
486 icount_warp_rt();
efab87cf
PD
487}
488
8156be56
PB
489void qtest_clock_warp(int64_t dest)
490{
40daca54 491 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 492 AioContext *aio_context;
8156be56 493 assert(qtest_enabled());
efef88b3 494 aio_context = qemu_get_aio_context();
8156be56 495 while (clock < dest) {
dcb15780
PD
496 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
497 QEMU_TIMER_ATTR_ALL);
c9299e2f 498 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 499
94377115
PB
500 seqlock_write_lock(&timers_state.vm_clock_seqlock,
501 &timers_state.vm_clock_lock);
d73415a3 502 qatomic_set_i64(&timers_state.qemu_icount_bias,
c97595d1 503 timers_state.qemu_icount_bias + warp);
94377115
PB
504 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
505 &timers_state.vm_clock_lock);
17a15f1b 506
40daca54 507 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 508 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 509 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 510 }
40daca54 511 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
512}
513
e76d1798 514void qemu_start_warp_timer(void)
946fb27c 515{
ce78d18c 516 int64_t clock;
946fb27c
PB
517 int64_t deadline;
518
e76d1798 519 if (!use_icount) {
946fb27c
PB
520 return;
521 }
522
8bd7f71d
PD
523 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
524 * do not fire, so computing the deadline does not make sense.
525 */
526 if (!runstate_is_running()) {
527 return;
528 }
529
0c08185f
PD
530 if (replay_mode != REPLAY_MODE_PLAY) {
531 if (!all_cpu_threads_idle()) {
532 return;
533 }
8bd7f71d 534
0c08185f
PD
535 if (qtest_enabled()) {
536 /* When testing, qtest commands advance icount. */
537 return;
538 }
946fb27c 539
0c08185f
PD
540 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
541 } else {
542 /* warp clock deterministically in record/replay mode */
543 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
544 /* vCPU is sleeping and warp can't be started.
545 It is probably a race condition: notification sent
546 to vCPU was processed in advance and vCPU went to sleep.
547 Therefore we have to wake it up for doing someting. */
548 if (replay_has_checkpoint()) {
549 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
550 }
551 return;
552 }
8156be56
PB
553 }
554
ac70aafc 555 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 556 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
dcb15780
PD
557 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
558 ~QEMU_TIMER_ATTR_EXTERNAL);
ce78d18c 559 if (deadline < 0) {
d7a0f71d
VC
560 static bool notified;
561 if (!icount_sleep && !notified) {
3dc6f869 562 warn_report("icount sleep disabled and no active timers");
d7a0f71d
VC
563 notified = true;
564 }
ce78d18c 565 return;
ac70aafc
AB
566 }
567
946fb27c
PB
568 if (deadline > 0) {
569 /*
40daca54 570 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
571 * sleep. Otherwise, the CPU might be waiting for a future timer
572 * interrupt to wake it up, but the interrupt never comes because
573 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 574 * QEMU_CLOCK_VIRTUAL.
946fb27c 575 */
5045e9d9
VC
576 if (!icount_sleep) {
577 /*
578 * We never let VCPUs sleep in no sleep icount mode.
579 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
580 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
581 * It is useful when we want a deterministic execution time,
582 * isolated from host latencies.
583 */
94377115
PB
584 seqlock_write_lock(&timers_state.vm_clock_seqlock,
585 &timers_state.vm_clock_lock);
d73415a3 586 qatomic_set_i64(&timers_state.qemu_icount_bias,
c97595d1 587 timers_state.qemu_icount_bias + deadline);
94377115
PB
588 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
589 &timers_state.vm_clock_lock);
5045e9d9
VC
590 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
591 } else {
592 /*
593 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
594 * "real" time, (related to the time left until the next event) has
595 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
596 * This avoids that the warps are visible externally; for example,
597 * you will not be sending network packets continuously instead of
598 * every 100ms.
599 */
94377115
PB
600 seqlock_write_lock(&timers_state.vm_clock_seqlock,
601 &timers_state.vm_clock_lock);
b39e3f34
PD
602 if (timers_state.vm_clock_warp_start == -1
603 || timers_state.vm_clock_warp_start > clock) {
604 timers_state.vm_clock_warp_start = clock;
5045e9d9 605 }
94377115
PB
606 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
607 &timers_state.vm_clock_lock);
b39e3f34
PD
608 timer_mod_anticipate(timers_state.icount_warp_timer,
609 clock + deadline);
ce78d18c 610 }
ac70aafc 611 } else if (deadline == 0) {
40daca54 612 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
613 }
614}
615
e76d1798
PD
616static void qemu_account_warp_timer(void)
617{
618 if (!use_icount || !icount_sleep) {
619 return;
620 }
621
622 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
623 * do not fire, so computing the deadline does not make sense.
624 */
625 if (!runstate_is_running()) {
626 return;
627 }
628
629 /* warp clock deterministically in record/replay mode */
630 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
631 return;
632 }
633
b39e3f34 634 timer_del(timers_state.icount_warp_timer);
e76d1798
PD
635 icount_warp_rt();
636}
637
d09eae37
FK
638static bool icount_state_needed(void *opaque)
639{
640 return use_icount;
641}
642
b39e3f34
PD
643static bool warp_timer_state_needed(void *opaque)
644{
645 TimersState *s = opaque;
646 return s->icount_warp_timer != NULL;
647}
648
649static bool adjust_timers_state_needed(void *opaque)
650{
651 TimersState *s = opaque;
652 return s->icount_rt_timer != NULL;
653}
654
b8164e68
PD
655static bool shift_state_needed(void *opaque)
656{
657 return use_icount == 2;
658}
659
b39e3f34
PD
660/*
661 * Subsection for warp timer migration is optional, because may not be created
662 */
663static const VMStateDescription icount_vmstate_warp_timer = {
664 .name = "timer/icount/warp_timer",
665 .version_id = 1,
666 .minimum_version_id = 1,
667 .needed = warp_timer_state_needed,
668 .fields = (VMStateField[]) {
669 VMSTATE_INT64(vm_clock_warp_start, TimersState),
670 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
671 VMSTATE_END_OF_LIST()
672 }
673};
674
675static const VMStateDescription icount_vmstate_adjust_timers = {
676 .name = "timer/icount/timers",
677 .version_id = 1,
678 .minimum_version_id = 1,
679 .needed = adjust_timers_state_needed,
680 .fields = (VMStateField[]) {
681 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
682 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
683 VMSTATE_END_OF_LIST()
684 }
685};
686
b8164e68
PD
687static const VMStateDescription icount_vmstate_shift = {
688 .name = "timer/icount/shift",
689 .version_id = 1,
690 .minimum_version_id = 1,
691 .needed = shift_state_needed,
692 .fields = (VMStateField[]) {
693 VMSTATE_INT16(icount_time_shift, TimersState),
694 VMSTATE_END_OF_LIST()
695 }
696};
697
d09eae37
FK
698/*
699 * This is a subsection for icount migration.
700 */
701static const VMStateDescription icount_vmstate_timers = {
702 .name = "timer/icount",
703 .version_id = 1,
704 .minimum_version_id = 1,
5cd8cada 705 .needed = icount_state_needed,
d09eae37
FK
706 .fields = (VMStateField[]) {
707 VMSTATE_INT64(qemu_icount_bias, TimersState),
708 VMSTATE_INT64(qemu_icount, TimersState),
709 VMSTATE_END_OF_LIST()
b39e3f34
PD
710 },
711 .subsections = (const VMStateDescription*[]) {
712 &icount_vmstate_warp_timer,
713 &icount_vmstate_adjust_timers,
b8164e68 714 &icount_vmstate_shift,
b39e3f34 715 NULL
d09eae37
FK
716 }
717};
718
946fb27c
PB
719static const VMStateDescription vmstate_timers = {
720 .name = "timer",
721 .version_id = 2,
722 .minimum_version_id = 1,
35d08458 723 .fields = (VMStateField[]) {
946fb27c 724 VMSTATE_INT64(cpu_ticks_offset, TimersState),
c1ff073c 725 VMSTATE_UNUSED(8),
946fb27c
PB
726 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
727 VMSTATE_END_OF_LIST()
d09eae37 728 },
5cd8cada
JQ
729 .subsections = (const VMStateDescription*[]) {
730 &icount_vmstate_timers,
731 NULL
946fb27c
PB
732 }
733};
734
4603ea01
PD
735void cpu_ticks_init(void)
736{
ccdb3c1f 737 seqlock_init(&timers_state.vm_clock_seqlock);
87a09cdc 738 qemu_spin_init(&timers_state.vm_clock_lock);
4603ea01 739 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
b0c3cf94 740 cpu_throttle_init();
4603ea01
PD
741}
742
1ad9580b 743void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 744{
abc9bf69
MA
745 const char *option = qemu_opt_get(opts, "shift");
746 bool sleep = qemu_opt_get_bool(opts, "sleep", true);
747 bool align = qemu_opt_get_bool(opts, "align", false);
748 long time_shift = -1;
1ad9580b 749
6c1ddc36
MA
750 if (!option) {
751 if (qemu_opt_get(opts, "align") != NULL) {
752 error_setg(errp, "Please specify shift option when using align");
753 }
946fb27c
PB
754 return;
755 }
f1f4b57e 756
abc9bf69 757 if (align && !sleep) {
778d9f9b 758 error_setg(errp, "align=on and sleep=off are incompatible");
abc9bf69 759 return;
f1f4b57e 760 }
abc9bf69 761
946fb27c 762 if (strcmp(option, "auto") != 0) {
9ec374a7
MA
763 if (qemu_strtol(option, NULL, 0, &time_shift) < 0
764 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
a8bfac37 765 error_setg(errp, "icount: Invalid shift value");
abc9bf69 766 return;
a8bfac37 767 }
a8bfac37
ST
768 } else if (icount_align_option) {
769 error_setg(errp, "shift=auto and align=on are incompatible");
abc9bf69 770 return;
f1f4b57e 771 } else if (!icount_sleep) {
778d9f9b 772 error_setg(errp, "shift=auto and sleep=off are incompatible");
abc9bf69
MA
773 return;
774 }
775
776 icount_sleep = sleep;
777 if (icount_sleep) {
778 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
779 icount_timer_cb, NULL);
780 }
781
782 icount_align_option = align;
783
784 if (time_shift >= 0) {
785 timers_state.icount_time_shift = time_shift;
786 use_icount = 1;
787 return;
946fb27c
PB
788 }
789
790 use_icount = 2;
791
792 /* 125MIPS seems a reasonable initial guess at the guest speed.
793 It will be corrected fairly quickly anyway. */
c1ff073c 794 timers_state.icount_time_shift = 3;
946fb27c
PB
795
796 /* Have both realtime and virtual time triggers for speed adjustment.
797 The realtime trigger catches emulated time passing too slowly,
798 the virtual time trigger catches emulated time passing too fast.
799 Realtime triggers occur even when idle, so use them less frequently
800 than VM triggers. */
b39e3f34
PD
801 timers_state.vm_clock_warp_start = -1;
802 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
bf2a7ddb 803 icount_adjust_rt, NULL);
b39e3f34 804 timer_mod(timers_state.icount_rt_timer,
bf2a7ddb 805 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
b39e3f34 806 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
40daca54 807 icount_adjust_vm, NULL);
b39e3f34 808 timer_mod(timers_state.icount_vm_timer,
40daca54 809 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 810 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
811}
812
6546706d
AB
813/***********************************************************/
814/* TCG vCPU kick timer
815 *
816 * The kick timer is responsible for moving single threaded vCPU
817 * emulation on to the next vCPU. If more than one vCPU is running a
818 * timer event with force a cpu->exit so the next vCPU can get
819 * scheduled.
820 *
821 * The timer is removed if all vCPUs are idle and restarted again once
822 * idleness is complete.
823 */
824
825static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 826static CPUState *tcg_current_rr_cpu;
6546706d
AB
827
828#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
829
830static inline int64_t qemu_tcg_next_kick(void)
831{
832 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
833}
834
e8f22f76
AB
835/* Kick the currently round-robin scheduled vCPU to next */
836static void qemu_cpu_kick_rr_next_cpu(void)
791158d9
AB
837{
838 CPUState *cpu;
791158d9 839 do {
d73415a3 840 cpu = qatomic_mb_read(&tcg_current_rr_cpu);
791158d9
AB
841 if (cpu) {
842 cpu_exit(cpu);
843 }
d73415a3 844 } while (cpu != qatomic_mb_read(&tcg_current_rr_cpu));
791158d9
AB
845}
846
e8f22f76
AB
847/* Kick all RR vCPUs */
848static void qemu_cpu_kick_rr_cpus(void)
849{
850 CPUState *cpu;
851
852 CPU_FOREACH(cpu) {
853 cpu_exit(cpu);
854 };
855}
856
6b8f0187
PB
857static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
858{
859}
860
3f53bc61
PB
861void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
862{
6b8f0187
PB
863 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
864 qemu_notify_event();
865 return;
866 }
867
c52e7132
PM
868 if (qemu_in_vcpu_thread()) {
869 /* A CPU is currently running; kick it back out to the
870 * tcg_cpu_exec() loop so it will recalculate its
871 * icount deadline immediately.
872 */
873 qemu_cpu_kick(current_cpu);
874 } else if (first_cpu) {
6b8f0187
PB
875 /* qemu_cpu_kick is not enough to kick a halted CPU out of
876 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
877 * causes cpu_thread_is_idle to return false. This way,
878 * handle_icount_deadline can run.
c52e7132
PM
879 * If we have no CPUs at all for some reason, we don't
880 * need to do anything.
6b8f0187
PB
881 */
882 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
883 }
3f53bc61
PB
884}
885
6546706d
AB
886static void kick_tcg_thread(void *opaque)
887{
888 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
e8f22f76 889 qemu_cpu_kick_rr_next_cpu();
6546706d
AB
890}
891
892static void start_tcg_kick_timer(void)
893{
db08b687
PB
894 assert(!mttcg_enabled);
895 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
896 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
897 kick_tcg_thread, NULL);
1926ab27
AB
898 }
899 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
6546706d
AB
900 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
901 }
902}
903
904static void stop_tcg_kick_timer(void)
905{
db08b687 906 assert(!mttcg_enabled);
1926ab27 907 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
6546706d 908 timer_del(tcg_kick_vcpu_timer);
6546706d
AB
909 }
910}
911
296af7c9
BS
912/***********************************************************/
913void hw_error(const char *fmt, ...)
914{
915 va_list ap;
55e5c285 916 CPUState *cpu;
296af7c9
BS
917
918 va_start(ap, fmt);
919 fprintf(stderr, "qemu: hardware error: ");
920 vfprintf(stderr, fmt, ap);
921 fprintf(stderr, "\n");
bdc44640 922 CPU_FOREACH(cpu) {
55e5c285 923 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
90c84c56 924 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
296af7c9
BS
925 }
926 va_end(ap);
927 abort();
928}
929
930void cpu_synchronize_all_states(void)
931{
182735ef 932 CPUState *cpu;
296af7c9 933
bdc44640 934 CPU_FOREACH(cpu) {
182735ef 935 cpu_synchronize_state(cpu);
296af7c9
BS
936 }
937}
938
939void cpu_synchronize_all_post_reset(void)
940{
182735ef 941 CPUState *cpu;
296af7c9 942
bdc44640 943 CPU_FOREACH(cpu) {
182735ef 944 cpu_synchronize_post_reset(cpu);
296af7c9
BS
945 }
946}
947
948void cpu_synchronize_all_post_init(void)
949{
182735ef 950 CPUState *cpu;
296af7c9 951
bdc44640 952 CPU_FOREACH(cpu) {
182735ef 953 cpu_synchronize_post_init(cpu);
296af7c9
BS
954 }
955}
956
75e972da
DG
957void cpu_synchronize_all_pre_loadvm(void)
958{
959 CPUState *cpu;
960
961 CPU_FOREACH(cpu) {
962 cpu_synchronize_pre_loadvm(cpu);
963 }
964}
965
4486e89c 966static int do_vm_stop(RunState state, bool send_stop)
296af7c9 967{
56983463
KW
968 int ret = 0;
969
1354869c 970 if (runstate_is_running()) {
f962cac4 971 runstate_set(state);
296af7c9 972 cpu_disable_ticks();
296af7c9 973 pause_all_vcpus();
1dfb4dd9 974 vm_state_notify(0, state);
4486e89c 975 if (send_stop) {
3ab72385 976 qapi_event_send_stop();
4486e89c 977 }
296af7c9 978 }
56983463 979
594a45ce 980 bdrv_drain_all();
22af08ea 981 ret = bdrv_flush_all();
594a45ce 982
56983463 983 return ret;
296af7c9
BS
984}
985
4486e89c
SH
986/* Special vm_stop() variant for terminating the process. Historically clients
987 * did not expect a QMP STOP event and so we need to retain compatibility.
988 */
989int vm_shutdown(void)
990{
991 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
992}
993
a1fcaa73 994static bool cpu_can_run(CPUState *cpu)
296af7c9 995{
4fdeee7c 996 if (cpu->stop) {
a1fcaa73 997 return false;
0ab07c62 998 }
321bc0b2 999 if (cpu_is_stopped(cpu)) {
a1fcaa73 1000 return false;
0ab07c62 1001 }
a1fcaa73 1002 return true;
296af7c9
BS
1003}
1004
91325046 1005static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 1006{
64f6b346 1007 gdb_set_stop_cpu(cpu);
8cf71710 1008 qemu_system_debug_request();
f324e766 1009 cpu->stopped = true;
3c638d06
JK
1010}
1011
6d9cb73c
JK
1012#ifdef CONFIG_LINUX
1013static void sigbus_reraise(void)
1014{
1015 sigset_t set;
1016 struct sigaction action;
1017
1018 memset(&action, 0, sizeof(action));
1019 action.sa_handler = SIG_DFL;
1020 if (!sigaction(SIGBUS, &action, NULL)) {
1021 raise(SIGBUS);
1022 sigemptyset(&set);
1023 sigaddset(&set, SIGBUS);
a2d1761d 1024 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
1025 }
1026 perror("Failed to re-raise SIGBUS!\n");
1027 abort();
1028}
1029
d98d4072 1030static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 1031{
a16fc07e
PB
1032 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1033 sigbus_reraise();
1034 }
1035
2ae41db2
PB
1036 if (current_cpu) {
1037 /* Called asynchronously in VCPU thread. */
1038 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1039 sigbus_reraise();
1040 }
1041 } else {
1042 /* Called synchronously (via signalfd) in main thread. */
1043 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1044 sigbus_reraise();
1045 }
6d9cb73c
JK
1046 }
1047}
1048
1049static void qemu_init_sigbus(void)
1050{
1051 struct sigaction action;
1052
1053 memset(&action, 0, sizeof(action));
1054 action.sa_flags = SA_SIGINFO;
d98d4072 1055 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1056 sigaction(SIGBUS, &action, NULL);
1057
1058 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1059}
6d9cb73c 1060#else /* !CONFIG_LINUX */
6d9cb73c
JK
1061static void qemu_init_sigbus(void)
1062{
1063}
a16fc07e 1064#endif /* !CONFIG_LINUX */
ff48eb5f 1065
296af7c9
BS
1066static QemuThread io_thread;
1067
296af7c9
BS
1068/* cpu creation */
1069static QemuCond qemu_cpu_cond;
1070/* system init */
296af7c9
BS
1071static QemuCond qemu_pause_cond;
1072
d3b12f5d 1073void qemu_init_cpu_loop(void)
296af7c9 1074{
6d9cb73c 1075 qemu_init_sigbus();
ed94592b 1076 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1077 qemu_cond_init(&qemu_pause_cond);
296af7c9 1078 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1079
b7680cb6 1080 qemu_thread_get_self(&io_thread);
296af7c9
BS
1081}
1082
14e6fe12 1083void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1084{
d148d90e 1085 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1086}
1087
4c055ab5
GZ
1088static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1089{
1090 if (kvm_destroy_vcpu(cpu) < 0) {
1091 error_report("kvm_destroy_vcpu failed");
1092 exit(EXIT_FAILURE);
1093 }
1094}
1095
1096static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1097{
1098}
1099
ebd05fea
DH
1100static void qemu_cpu_stop(CPUState *cpu, bool exit)
1101{
1102 g_assert(qemu_cpu_is_self(cpu));
1103 cpu->stop = false;
1104 cpu->stopped = true;
1105 if (exit) {
1106 cpu_exit(cpu);
1107 }
1108 qemu_cond_broadcast(&qemu_pause_cond);
1109}
1110
509a0d78 1111static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1112{
d73415a3 1113 qatomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c 1114 if (cpu->stop) {
ebd05fea 1115 qemu_cpu_stop(cpu, false);
296af7c9 1116 }
a5403c69 1117 process_queued_cpu_work(cpu);
37257942
AB
1118}
1119
a8efa606 1120static void qemu_tcg_rr_wait_io_event(void)
37257942 1121{
a8efa606
PB
1122 CPUState *cpu;
1123
db08b687 1124 while (all_cpu_threads_idle()) {
6546706d 1125 stop_tcg_kick_timer();
a8efa606 1126 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
16400322 1127 }
296af7c9 1128
6546706d
AB
1129 start_tcg_kick_timer();
1130
a8efa606
PB
1131 CPU_FOREACH(cpu) {
1132 qemu_wait_io_event_common(cpu);
1133 }
296af7c9
BS
1134}
1135
db08b687 1136static void qemu_wait_io_event(CPUState *cpu)
296af7c9 1137{
30865f31
EC
1138 bool slept = false;
1139
a98ae1d8 1140 while (cpu_thread_is_idle(cpu)) {
30865f31
EC
1141 if (!slept) {
1142 slept = true;
1143 qemu_plugin_vcpu_idle_cb(cpu);
1144 }
f5c121b8 1145 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1146 }
30865f31
EC
1147 if (slept) {
1148 qemu_plugin_vcpu_resume_cb(cpu);
1149 }
296af7c9 1150
db08b687
PB
1151#ifdef _WIN32
1152 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1153 if (!tcg_enabled()) {
1154 SleepEx(0, TRUE);
c97d6d2c 1155 }
db08b687 1156#endif
c97d6d2c
SAGDR
1157 qemu_wait_io_event_common(cpu);
1158}
1159
7e97cd88 1160static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1161{
48a106bd 1162 CPUState *cpu = arg;
84b4915d 1163 int r;
296af7c9 1164
ab28bd23
PB
1165 rcu_register_thread();
1166
2e7f7a3c 1167 qemu_mutex_lock_iothread();
814e612e 1168 qemu_thread_get_self(cpu->thread);
9f09e18a 1169 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1170 cpu->can_do_io = 1;
4917cf44 1171 current_cpu = cpu;
296af7c9 1172
504134d2 1173 r = kvm_init_vcpu(cpu);
84b4915d 1174 if (r < 0) {
493d89bf 1175 error_report("kvm_init_vcpu failed: %s", strerror(-r));
84b4915d
JK
1176 exit(1);
1177 }
296af7c9 1178
18268b60 1179 kvm_init_cpu_signals(cpu);
296af7c9
BS
1180
1181 /* signal CPU creation */
61a46217 1182 cpu->created = true;
296af7c9 1183 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1184 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1185
4c055ab5 1186 do {
a1fcaa73 1187 if (cpu_can_run(cpu)) {
1458c363 1188 r = kvm_cpu_exec(cpu);
83f338f7 1189 if (r == EXCP_DEBUG) {
91325046 1190 cpu_handle_guest_debug(cpu);
83f338f7 1191 }
0ab07c62 1192 }
db08b687 1193 qemu_wait_io_event(cpu);
4c055ab5 1194 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1195
4c055ab5 1196 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1197 cpu->created = false;
1198 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1199 qemu_mutex_unlock_iothread();
57615ed5 1200 rcu_unregister_thread();
296af7c9
BS
1201 return NULL;
1202}
1203
c7f0f3b1
AL
1204static void *qemu_dummy_cpu_thread_fn(void *arg)
1205{
1206#ifdef _WIN32
493d89bf 1207 error_report("qtest is not supported under Windows");
c7f0f3b1
AL
1208 exit(1);
1209#else
10a9021d 1210 CPUState *cpu = arg;
c7f0f3b1
AL
1211 sigset_t waitset;
1212 int r;
1213
ab28bd23
PB
1214 rcu_register_thread();
1215
c7f0f3b1 1216 qemu_mutex_lock_iothread();
814e612e 1217 qemu_thread_get_self(cpu->thread);
9f09e18a 1218 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1219 cpu->can_do_io = 1;
37257942 1220 current_cpu = cpu;
c7f0f3b1
AL
1221
1222 sigemptyset(&waitset);
1223 sigaddset(&waitset, SIG_IPI);
1224
1225 /* signal CPU creation */
61a46217 1226 cpu->created = true;
c7f0f3b1 1227 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1228 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c7f0f3b1 1229
d2831ab0 1230 do {
c7f0f3b1
AL
1231 qemu_mutex_unlock_iothread();
1232 do {
1233 int sig;
1234 r = sigwait(&waitset, &sig);
1235 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1236 if (r == -1) {
1237 perror("sigwait");
1238 exit(1);
1239 }
1240 qemu_mutex_lock_iothread();
db08b687 1241 qemu_wait_io_event(cpu);
d2831ab0 1242 } while (!cpu->unplug);
c7f0f3b1 1243
d40bfcbb 1244 qemu_mutex_unlock_iothread();
d2831ab0 1245 rcu_unregister_thread();
c7f0f3b1
AL
1246 return NULL;
1247#endif
1248}
1249
1be7fcb8
AB
1250static int64_t tcg_get_icount_limit(void)
1251{
1252 int64_t deadline;
1253
1254 if (replay_mode != REPLAY_MODE_PLAY) {
dcb15780
PD
1255 /*
1256 * Include all the timers, because they may need an attention.
1257 * Too long CPU execution may create unnecessary delay in UI.
1258 */
1259 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1260 QEMU_TIMER_ATTR_ALL);
fc6b2dba
PD
1261 /* Check realtime timers, because they help with input processing */
1262 deadline = qemu_soonest_timeout(deadline,
1263 qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
1264 QEMU_TIMER_ATTR_ALL));
1be7fcb8
AB
1265
1266 /* Maintain prior (possibly buggy) behaviour where if no deadline
1267 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1268 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1269 * nanoseconds.
1270 */
1271 if ((deadline < 0) || (deadline > INT32_MAX)) {
1272 deadline = INT32_MAX;
1273 }
1274
1275 return qemu_icount_round(deadline);
1276 } else {
1277 return replay_get_instructions();
1278 }
1279}
1280
255ae6e2
PD
1281static void notify_aio_contexts(void)
1282{
1283 /* Wake up other AioContexts. */
1284 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1285 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1286}
1287
12e9700d
AB
1288static void handle_icount_deadline(void)
1289{
6b8f0187 1290 assert(qemu_in_vcpu_thread());
12e9700d 1291 if (use_icount) {
dcb15780
PD
1292 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1293 QEMU_TIMER_ATTR_ALL);
12e9700d
AB
1294
1295 if (deadline == 0) {
255ae6e2 1296 notify_aio_contexts();
12e9700d
AB
1297 }
1298 }
1299}
1300
05248382 1301static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1302{
1be7fcb8 1303 if (use_icount) {
eda5f7c6 1304 int insns_left;
05248382
AB
1305
1306 /* These should always be cleared by process_icount_data after
1307 * each vCPU execution. However u16.high can be raised
1308 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1309 */
5e140196 1310 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
05248382
AB
1311 g_assert(cpu->icount_extra == 0);
1312
eda5f7c6
AB
1313 cpu->icount_budget = tcg_get_icount_limit();
1314 insns_left = MIN(0xffff, cpu->icount_budget);
5e140196 1315 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
eda5f7c6 1316 cpu->icount_extra = cpu->icount_budget - insns_left;
d759c951
AB
1317
1318 replay_mutex_lock();
255ae6e2
PD
1319
1320 if (cpu->icount_budget == 0 && replay_has_checkpoint()) {
1321 notify_aio_contexts();
1322 }
1be7fcb8 1323 }
05248382
AB
1324}
1325
1326static void process_icount_data(CPUState *cpu)
1327{
1be7fcb8 1328 if (use_icount) {
e4cd9657 1329 /* Account for executed instructions */
512d3c80 1330 cpu_update_icount(cpu);
05248382
AB
1331
1332 /* Reset the counters */
5e140196 1333 cpu_neg(cpu)->icount_decr.u16.low = 0;
1be7fcb8 1334 cpu->icount_extra = 0;
e4cd9657
AB
1335 cpu->icount_budget = 0;
1336
1be7fcb8 1337 replay_account_executed_instructions();
d759c951
AB
1338
1339 replay_mutex_unlock();
1be7fcb8 1340 }
05248382
AB
1341}
1342
1343
1344static int tcg_cpu_exec(CPUState *cpu)
1345{
1346 int ret;
1347#ifdef CONFIG_PROFILER
1348 int64_t ti;
1349#endif
1350
f28d0dfd 1351 assert(tcg_enabled());
05248382
AB
1352#ifdef CONFIG_PROFILER
1353 ti = profile_getclock();
1354#endif
05248382
AB
1355 cpu_exec_start(cpu);
1356 ret = cpu_exec(cpu);
1357 cpu_exec_end(cpu);
05248382 1358#ifdef CONFIG_PROFILER
d73415a3 1359 qatomic_set(&tcg_ctx->prof.cpu_exec_time,
72fd2efb 1360 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
05248382 1361#endif
1be7fcb8
AB
1362 return ret;
1363}
1364
c93bbbef
AB
1365/* Destroy any remaining vCPUs which have been unplugged and have
1366 * finished running
1367 */
1368static void deal_with_unplugged_cpus(void)
1be7fcb8 1369{
c93bbbef 1370 CPUState *cpu;
1be7fcb8 1371
c93bbbef
AB
1372 CPU_FOREACH(cpu) {
1373 if (cpu->unplug && !cpu_can_run(cpu)) {
1374 qemu_tcg_destroy_vcpu(cpu);
1375 cpu->created = false;
1376 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1377 break;
1378 }
1379 }
1be7fcb8 1380}
bdb7ca67 1381
6546706d
AB
1382/* Single-threaded TCG
1383 *
1384 * In the single-threaded case each vCPU is simulated in turn. If
1385 * there is more than a single vCPU we create a simple timer to kick
1386 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1387 * This is done explicitly rather than relying on side-effects
1388 * elsewhere.
1389 */
1390
37257942 1391static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1392{
c3586ba7 1393 CPUState *cpu = arg;
296af7c9 1394
f28d0dfd 1395 assert(tcg_enabled());
ab28bd23 1396 rcu_register_thread();
3468b59e 1397 tcg_register_thread();
ab28bd23 1398
2e7f7a3c 1399 qemu_mutex_lock_iothread();
814e612e 1400 qemu_thread_get_self(cpu->thread);
296af7c9 1401
5a9c973b
DH
1402 cpu->thread_id = qemu_get_thread_id();
1403 cpu->created = true;
1404 cpu->can_do_io = 1;
296af7c9 1405 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1406 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1407
fa7d1867 1408 /* wait for initial kick-off after machine start */
c28e399c 1409 while (first_cpu->stopped) {
d5f8d613 1410 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1411
1412 /* process any pending work */
bdc44640 1413 CPU_FOREACH(cpu) {
37257942 1414 current_cpu = cpu;
182735ef 1415 qemu_wait_io_event_common(cpu);
8e564b4e 1416 }
0ab07c62 1417 }
296af7c9 1418
6546706d
AB
1419 start_tcg_kick_timer();
1420
c93bbbef
AB
1421 cpu = first_cpu;
1422
e5143e30
AB
1423 /* process any pending work */
1424 cpu->exit_request = 1;
1425
296af7c9 1426 while (1) {
d759c951
AB
1427 qemu_mutex_unlock_iothread();
1428 replay_mutex_lock();
1429 qemu_mutex_lock_iothread();
c93bbbef
AB
1430 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1431 qemu_account_warp_timer();
1432
6b8f0187
PB
1433 /* Run the timers here. This is much more efficient than
1434 * waking up the I/O thread and waiting for completion.
1435 */
1436 handle_icount_deadline();
1437
d759c951
AB
1438 replay_mutex_unlock();
1439
c93bbbef
AB
1440 if (!cpu) {
1441 cpu = first_cpu;
1442 }
1443
0c0fcc20 1444 while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
e5143e30 1445
d73415a3 1446 qatomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1447 current_cpu = cpu;
c93bbbef
AB
1448
1449 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1450 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1451
1452 if (cpu_can_run(cpu)) {
1453 int r;
05248382 1454
d759c951 1455 qemu_mutex_unlock_iothread();
05248382
AB
1456 prepare_icount_for_run(cpu);
1457
c93bbbef 1458 r = tcg_cpu_exec(cpu);
05248382
AB
1459
1460 process_icount_data(cpu);
d759c951 1461 qemu_mutex_lock_iothread();
05248382 1462
c93bbbef
AB
1463 if (r == EXCP_DEBUG) {
1464 cpu_handle_guest_debug(cpu);
1465 break;
08e73c48
PK
1466 } else if (r == EXCP_ATOMIC) {
1467 qemu_mutex_unlock_iothread();
1468 cpu_exec_step_atomic(cpu);
1469 qemu_mutex_lock_iothread();
1470 break;
c93bbbef 1471 }
37257942 1472 } else if (cpu->stop) {
c93bbbef
AB
1473 if (cpu->unplug) {
1474 cpu = CPU_NEXT(cpu);
1475 }
1476 break;
1477 }
1478
e5143e30
AB
1479 cpu = CPU_NEXT(cpu);
1480 } /* while (cpu && !cpu->exit_request).. */
1481
d73415a3
SH
1482 /* Does not need qatomic_mb_set because a spurious wakeup is okay. */
1483 qatomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1484
e5143e30 1485 if (cpu && cpu->exit_request) {
d73415a3 1486 qatomic_mb_set(&cpu->exit_request, 0);
e5143e30 1487 }
ac70aafc 1488
013aabdc
CD
1489 if (use_icount && all_cpu_threads_idle()) {
1490 /*
1491 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1492 * in the main_loop, wake it up in order to start the warp timer.
1493 */
1494 qemu_notify_event();
1495 }
1496
a8efa606 1497 qemu_tcg_rr_wait_io_event();
c93bbbef 1498 deal_with_unplugged_cpus();
296af7c9
BS
1499 }
1500
9b0605f9 1501 rcu_unregister_thread();
296af7c9
BS
1502 return NULL;
1503}
1504
b0cb0a66
VP
1505static void *qemu_hax_cpu_thread_fn(void *arg)
1506{
1507 CPUState *cpu = arg;
1508 int r;
b3d3a426 1509
9857c2d2 1510 rcu_register_thread();
b3d3a426 1511 qemu_mutex_lock_iothread();
b0cb0a66 1512 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1513
1514 cpu->thread_id = qemu_get_thread_id();
1515 cpu->created = true;
b0cb0a66
VP
1516 current_cpu = cpu;
1517
1518 hax_init_vcpu(cpu);
1519 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1520 qemu_guest_random_seed_thread_part2(cpu->random_seed);
b0cb0a66 1521
9857c2d2 1522 do {
b0cb0a66
VP
1523 if (cpu_can_run(cpu)) {
1524 r = hax_smp_cpu_exec(cpu);
1525 if (r == EXCP_DEBUG) {
1526 cpu_handle_guest_debug(cpu);
1527 }
1528 }
1529
db08b687 1530 qemu_wait_io_event(cpu);
9857c2d2
PB
1531 } while (!cpu->unplug || cpu_can_run(cpu));
1532 rcu_unregister_thread();
b0cb0a66
VP
1533 return NULL;
1534}
1535
c97d6d2c
SAGDR
1536/* The HVF-specific vCPU thread function. This one should only run when the host
1537 * CPU supports the VMX "unrestricted guest" feature. */
1538static void *qemu_hvf_cpu_thread_fn(void *arg)
1539{
1540 CPUState *cpu = arg;
1541
1542 int r;
1543
1544 assert(hvf_enabled());
1545
1546 rcu_register_thread();
1547
1548 qemu_mutex_lock_iothread();
1549 qemu_thread_get_self(cpu->thread);
1550
1551 cpu->thread_id = qemu_get_thread_id();
1552 cpu->can_do_io = 1;
1553 current_cpu = cpu;
1554
1555 hvf_init_vcpu(cpu);
1556
1557 /* signal CPU creation */
1558 cpu->created = true;
1559 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1560 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c97d6d2c
SAGDR
1561
1562 do {
1563 if (cpu_can_run(cpu)) {
1564 r = hvf_vcpu_exec(cpu);
1565 if (r == EXCP_DEBUG) {
1566 cpu_handle_guest_debug(cpu);
1567 }
1568 }
db08b687 1569 qemu_wait_io_event(cpu);
c97d6d2c
SAGDR
1570 } while (!cpu->unplug || cpu_can_run(cpu));
1571
1572 hvf_vcpu_destroy(cpu);
1573 cpu->created = false;
1574 qemu_cond_signal(&qemu_cpu_cond);
1575 qemu_mutex_unlock_iothread();
8178e637 1576 rcu_unregister_thread();
c97d6d2c
SAGDR
1577 return NULL;
1578}
1579
19306806
JTV
1580static void *qemu_whpx_cpu_thread_fn(void *arg)
1581{
1582 CPUState *cpu = arg;
1583 int r;
1584
1585 rcu_register_thread();
1586
1587 qemu_mutex_lock_iothread();
1588 qemu_thread_get_self(cpu->thread);
1589 cpu->thread_id = qemu_get_thread_id();
1590 current_cpu = cpu;
1591
1592 r = whpx_init_vcpu(cpu);
1593 if (r < 0) {
1594 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1595 exit(1);
1596 }
1597
1598 /* signal CPU creation */
1599 cpu->created = true;
1600 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1601 qemu_guest_random_seed_thread_part2(cpu->random_seed);
19306806
JTV
1602
1603 do {
1604 if (cpu_can_run(cpu)) {
1605 r = whpx_vcpu_exec(cpu);
1606 if (r == EXCP_DEBUG) {
1607 cpu_handle_guest_debug(cpu);
1608 }
1609 }
1610 while (cpu_thread_is_idle(cpu)) {
1611 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1612 }
1613 qemu_wait_io_event_common(cpu);
1614 } while (!cpu->unplug || cpu_can_run(cpu));
1615
1616 whpx_destroy_vcpu(cpu);
1617 cpu->created = false;
1618 qemu_cond_signal(&qemu_cpu_cond);
1619 qemu_mutex_unlock_iothread();
1620 rcu_unregister_thread();
c97d6d2c
SAGDR
1621 return NULL;
1622}
1623
b0cb0a66
VP
1624#ifdef _WIN32
1625static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1626{
1627}
1628#endif
1629
37257942
AB
1630/* Multi-threaded TCG
1631 *
1632 * In the multi-threaded case each vCPU has its own thread. The TLS
1633 * variable current_cpu can be used deep in the code to find the
1634 * current CPUState for a given thread.
1635 */
1636
1637static void *qemu_tcg_cpu_thread_fn(void *arg)
1638{
1639 CPUState *cpu = arg;
1640
f28d0dfd 1641 assert(tcg_enabled());
bf51c720
AB
1642 g_assert(!use_icount);
1643
37257942 1644 rcu_register_thread();
3468b59e 1645 tcg_register_thread();
37257942
AB
1646
1647 qemu_mutex_lock_iothread();
1648 qemu_thread_get_self(cpu->thread);
1649
1650 cpu->thread_id = qemu_get_thread_id();
1651 cpu->created = true;
1652 cpu->can_do_io = 1;
1653 current_cpu = cpu;
1654 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1655 qemu_guest_random_seed_thread_part2(cpu->random_seed);
37257942
AB
1656
1657 /* process any pending work */
1658 cpu->exit_request = 1;
1659
54961aac 1660 do {
37257942
AB
1661 if (cpu_can_run(cpu)) {
1662 int r;
d759c951 1663 qemu_mutex_unlock_iothread();
37257942 1664 r = tcg_cpu_exec(cpu);
d759c951 1665 qemu_mutex_lock_iothread();
37257942
AB
1666 switch (r) {
1667 case EXCP_DEBUG:
1668 cpu_handle_guest_debug(cpu);
1669 break;
1670 case EXCP_HALTED:
1671 /* during start-up the vCPU is reset and the thread is
1672 * kicked several times. If we don't ensure we go back
1673 * to sleep in the halted state we won't cleanly
1674 * start-up when the vCPU is enabled.
1675 *
1676 * cpu->halted should ensure we sleep in wait_io_event
1677 */
1678 g_assert(cpu->halted);
1679 break;
08e73c48
PK
1680 case EXCP_ATOMIC:
1681 qemu_mutex_unlock_iothread();
1682 cpu_exec_step_atomic(cpu);
1683 qemu_mutex_lock_iothread();
37257942
AB
1684 default:
1685 /* Ignore everything else? */
1686 break;
1687 }
1688 }
1689
d73415a3 1690 qatomic_mb_set(&cpu->exit_request, 0);
db08b687 1691 qemu_wait_io_event(cpu);
9b0605f9 1692 } while (!cpu->unplug || cpu_can_run(cpu));
37257942 1693
9b0605f9
PB
1694 qemu_tcg_destroy_vcpu(cpu);
1695 cpu->created = false;
1696 qemu_cond_signal(&qemu_cpu_cond);
1697 qemu_mutex_unlock_iothread();
1698 rcu_unregister_thread();
37257942
AB
1699 return NULL;
1700}
1701
2ff09a40 1702static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1703{
1704#ifndef _WIN32
1705 int err;
1706
e0c38211
PB
1707 if (cpu->thread_kicked) {
1708 return;
9102deda 1709 }
e0c38211 1710 cpu->thread_kicked = true;
814e612e 1711 err = pthread_kill(cpu->thread->thread, SIG_IPI);
d455ebc4 1712 if (err && err != ESRCH) {
cc015e9a
PB
1713 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1714 exit(1);
1715 }
1716#else /* _WIN32 */
b0cb0a66 1717 if (!qemu_cpu_is_self(cpu)) {
19306806
JTV
1718 if (whpx_enabled()) {
1719 whpx_vcpu_kick(cpu);
1720 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
b0cb0a66
VP
1721 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1722 __func__, GetLastError());
1723 exit(1);
1724 }
1725 }
e0c38211
PB
1726#endif
1727}
ed9164a3 1728
c08d7424 1729void qemu_cpu_kick(CPUState *cpu)
296af7c9 1730{
f5c121b8 1731 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1732 if (tcg_enabled()) {
e8f22f76
AB
1733 if (qemu_tcg_mttcg_enabled()) {
1734 cpu_exit(cpu);
1735 } else {
1736 qemu_cpu_kick_rr_cpus();
1737 }
e0c38211 1738 } else {
b0cb0a66
VP
1739 if (hax_enabled()) {
1740 /*
1741 * FIXME: race condition with the exit_request check in
1742 * hax_vcpu_hax_exec
1743 */
1744 cpu->exit_request = 1;
1745 }
e0c38211
PB
1746 qemu_cpu_kick_thread(cpu);
1747 }
296af7c9
BS
1748}
1749
46d62fac 1750void qemu_cpu_kick_self(void)
296af7c9 1751{
4917cf44 1752 assert(current_cpu);
9102deda 1753 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1754}
1755
60e82579 1756bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1757{
814e612e 1758 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1759}
1760
79e2b9ae 1761bool qemu_in_vcpu_thread(void)
aa723c23 1762{
4917cf44 1763 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1764}
1765
afbe7053
PB
1766static __thread bool iothread_locked = false;
1767
1768bool qemu_mutex_iothread_locked(void)
1769{
1770 return iothread_locked;
1771}
1772
cb764d06
EC
1773/*
1774 * The BQL is taken from so many places that it is worth profiling the
1775 * callers directly, instead of funneling them all through a single function.
1776 */
1777void qemu_mutex_lock_iothread_impl(const char *file, int line)
296af7c9 1778{
d73415a3 1779 QemuMutexLockFunc bql_lock = qatomic_read(&qemu_bql_mutex_lock_func);
cb764d06 1780
8d04fb55 1781 g_assert(!qemu_mutex_iothread_locked());
cb764d06 1782 bql_lock(&qemu_global_mutex, file, line);
afbe7053 1783 iothread_locked = true;
296af7c9
BS
1784}
1785
1786void qemu_mutex_unlock_iothread(void)
1787{
8d04fb55 1788 g_assert(qemu_mutex_iothread_locked());
afbe7053 1789 iothread_locked = false;
296af7c9
BS
1790 qemu_mutex_unlock(&qemu_global_mutex);
1791}
1792
19e067e0
AP
1793void qemu_cond_wait_iothread(QemuCond *cond)
1794{
1795 qemu_cond_wait(cond, &qemu_global_mutex);
1796}
1797
b0c3cf94
CF
1798void qemu_cond_timedwait_iothread(QemuCond *cond, int ms)
1799{
1800 qemu_cond_timedwait(cond, &qemu_global_mutex, ms);
1801}
1802
e8faee06 1803static bool all_vcpus_paused(void)
296af7c9 1804{
bdc44640 1805 CPUState *cpu;
296af7c9 1806
bdc44640 1807 CPU_FOREACH(cpu) {
182735ef 1808 if (!cpu->stopped) {
e8faee06 1809 return false;
0ab07c62 1810 }
296af7c9
BS
1811 }
1812
e8faee06 1813 return true;
296af7c9
BS
1814}
1815
1816void pause_all_vcpus(void)
1817{
bdc44640 1818 CPUState *cpu;
296af7c9 1819
40daca54 1820 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1821 CPU_FOREACH(cpu) {
ebd05fea
DH
1822 if (qemu_cpu_is_self(cpu)) {
1823 qemu_cpu_stop(cpu, true);
1824 } else {
1825 cpu->stop = true;
1826 qemu_cpu_kick(cpu);
1827 }
d798e974
JK
1828 }
1829
d759c951
AB
1830 /* We need to drop the replay_lock so any vCPU threads woken up
1831 * can finish their replay tasks
1832 */
1833 replay_mutex_unlock();
1834
296af7c9 1835 while (!all_vcpus_paused()) {
be7d6c57 1836 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1837 CPU_FOREACH(cpu) {
182735ef 1838 qemu_cpu_kick(cpu);
296af7c9
BS
1839 }
1840 }
d759c951
AB
1841
1842 qemu_mutex_unlock_iothread();
1843 replay_mutex_lock();
1844 qemu_mutex_lock_iothread();
296af7c9
BS
1845}
1846
2993683b
IM
1847void cpu_resume(CPUState *cpu)
1848{
1849 cpu->stop = false;
1850 cpu->stopped = false;
1851 qemu_cpu_kick(cpu);
1852}
1853
296af7c9
BS
1854void resume_all_vcpus(void)
1855{
bdc44640 1856 CPUState *cpu;
296af7c9 1857
f962cac4
LM
1858 if (!runstate_is_running()) {
1859 return;
1860 }
1861
40daca54 1862 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1863 CPU_FOREACH(cpu) {
182735ef 1864 cpu_resume(cpu);
296af7c9
BS
1865 }
1866}
1867
dbadee4f 1868void cpu_remove_sync(CPUState *cpu)
4c055ab5
GZ
1869{
1870 cpu->stop = true;
1871 cpu->unplug = true;
1872 qemu_cpu_kick(cpu);
dbadee4f
PB
1873 qemu_mutex_unlock_iothread();
1874 qemu_thread_join(cpu->thread);
1875 qemu_mutex_lock_iothread();
2c579042
BR
1876}
1877
4900116e
DDAG
1878/* For temporary buffers for forming a name */
1879#define VCPU_THREAD_NAME_SIZE 16
1880
e5ab30a2 1881static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1882{
4900116e 1883 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1884 static QemuCond *single_tcg_halt_cond;
1885 static QemuThread *single_tcg_cpu_thread;
e8feb96f
EC
1886 static int tcg_region_inited;
1887
f28d0dfd 1888 assert(tcg_enabled());
e8feb96f
EC
1889 /*
1890 * Initialize TCG regions--once. Now is a good time, because:
1891 * (1) TCG's init context, prologue and target globals have been set up.
1892 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1893 * -accel flag is processed, so the check doesn't work then).
1894 */
1895 if (!tcg_region_inited) {
1896 tcg_region_inited = 1;
1897 tcg_region_init();
4ca3d09c
RH
1898 /*
1899 * If MTTCG, and we will create multiple cpus,
1900 * then we will have cpus running in parallel.
1901 */
1902 if (qemu_tcg_mttcg_enabled()) {
1903 MachineState *ms = MACHINE(qdev_get_machine());
1904 if (ms->smp.max_cpus > 1) {
1905 parallel_cpus = true;
1906 }
1907 }
e8feb96f 1908 }
4900116e 1909
37257942 1910 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1911 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1912 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1913 qemu_cond_init(cpu->halt_cond);
37257942
AB
1914
1915 if (qemu_tcg_mttcg_enabled()) {
1916 /* create a thread per vCPU with TCG (MTTCG) */
37257942 1917 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1918 cpu->cpu_index);
37257942
AB
1919
1920 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1921 cpu, QEMU_THREAD_JOINABLE);
1922
1923 } else {
1924 /* share a single thread for all cpus with TCG */
1925 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1926 qemu_thread_create(cpu->thread, thread_name,
1927 qemu_tcg_rr_cpu_thread_fn,
1928 cpu, QEMU_THREAD_JOINABLE);
1929
1930 single_tcg_halt_cond = cpu->halt_cond;
1931 single_tcg_cpu_thread = cpu->thread;
1932 }
1ecf47bf 1933#ifdef _WIN32
814e612e 1934 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1935#endif
296af7c9 1936 } else {
37257942
AB
1937 /* For non-MTTCG cases we share the thread */
1938 cpu->thread = single_tcg_cpu_thread;
1939 cpu->halt_cond = single_tcg_halt_cond;
a342173a
DH
1940 cpu->thread_id = first_cpu->thread_id;
1941 cpu->can_do_io = 1;
1942 cpu->created = true;
296af7c9
BS
1943 }
1944}
1945
b0cb0a66
VP
1946static void qemu_hax_start_vcpu(CPUState *cpu)
1947{
1948 char thread_name[VCPU_THREAD_NAME_SIZE];
1949
1950 cpu->thread = g_malloc0(sizeof(QemuThread));
1951 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1952 qemu_cond_init(cpu->halt_cond);
1953
1954 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1955 cpu->cpu_index);
1956 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1957 cpu, QEMU_THREAD_JOINABLE);
1958#ifdef _WIN32
1959 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1960#endif
b0cb0a66
VP
1961}
1962
48a106bd 1963static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1964{
4900116e
DDAG
1965 char thread_name[VCPU_THREAD_NAME_SIZE];
1966
814e612e 1967 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1968 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1969 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1970 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1971 cpu->cpu_index);
1972 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1973 cpu, QEMU_THREAD_JOINABLE);
296af7c9
BS
1974}
1975
c97d6d2c
SAGDR
1976static void qemu_hvf_start_vcpu(CPUState *cpu)
1977{
1978 char thread_name[VCPU_THREAD_NAME_SIZE];
1979
1980 /* HVF currently does not support TCG, and only runs in
1981 * unrestricted-guest mode. */
1982 assert(hvf_enabled());
1983
1984 cpu->thread = g_malloc0(sizeof(QemuThread));
1985 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1986 qemu_cond_init(cpu->halt_cond);
1987
1988 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
1989 cpu->cpu_index);
1990 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
1991 cpu, QEMU_THREAD_JOINABLE);
c97d6d2c
SAGDR
1992}
1993
19306806
JTV
1994static void qemu_whpx_start_vcpu(CPUState *cpu)
1995{
1996 char thread_name[VCPU_THREAD_NAME_SIZE];
1997
1998 cpu->thread = g_malloc0(sizeof(QemuThread));
1999 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2000 qemu_cond_init(cpu->halt_cond);
2001 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2002 cpu->cpu_index);
2003 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2004 cpu, QEMU_THREAD_JOINABLE);
2005#ifdef _WIN32
2006 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2007#endif
19306806
JTV
2008}
2009
10a9021d 2010static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 2011{
4900116e
DDAG
2012 char thread_name[VCPU_THREAD_NAME_SIZE];
2013
814e612e 2014 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2015 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2016 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2017 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2018 cpu->cpu_index);
2019 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 2020 QEMU_THREAD_JOINABLE);
c7f0f3b1
AL
2021}
2022
c643bed9 2023void qemu_init_vcpu(CPUState *cpu)
296af7c9 2024{
5cc8767d
LX
2025 MachineState *ms = MACHINE(qdev_get_machine());
2026
2027 cpu->nr_cores = ms->smp.cores;
2028 cpu->nr_threads = ms->smp.threads;
f324e766 2029 cpu->stopped = true;
9c09a251 2030 cpu->random_seed = qemu_guest_random_seed_thread_part1();
56943e8c
PM
2031
2032 if (!cpu->as) {
2033 /* If the target cpu hasn't set up any address spaces itself,
2034 * give it the default one.
2035 */
12ebc9a7 2036 cpu->num_ases = 1;
80ceb07a 2037 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
56943e8c
PM
2038 }
2039
0ab07c62 2040 if (kvm_enabled()) {
48a106bd 2041 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
2042 } else if (hax_enabled()) {
2043 qemu_hax_start_vcpu(cpu);
c97d6d2c
SAGDR
2044 } else if (hvf_enabled()) {
2045 qemu_hvf_start_vcpu(cpu);
c7f0f3b1 2046 } else if (tcg_enabled()) {
e5ab30a2 2047 qemu_tcg_init_vcpu(cpu);
19306806
JTV
2048 } else if (whpx_enabled()) {
2049 qemu_whpx_start_vcpu(cpu);
c7f0f3b1 2050 } else {
10a9021d 2051 qemu_dummy_start_vcpu(cpu);
0ab07c62 2052 }
81e96311
DH
2053
2054 while (!cpu->created) {
2055 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2056 }
296af7c9
BS
2057}
2058
b4a3d965 2059void cpu_stop_current(void)
296af7c9 2060{
4917cf44 2061 if (current_cpu) {
0ec7e677
PM
2062 current_cpu->stop = true;
2063 cpu_exit(current_cpu);
b4a3d965 2064 }
296af7c9
BS
2065}
2066
56983463 2067int vm_stop(RunState state)
296af7c9 2068{
aa723c23 2069 if (qemu_in_vcpu_thread()) {
74892d24 2070 qemu_system_vmstop_request_prepare();
1dfb4dd9 2071 qemu_system_vmstop_request(state);
296af7c9
BS
2072 /*
2073 * FIXME: should not return to device code in case
2074 * vm_stop() has been requested.
2075 */
b4a3d965 2076 cpu_stop_current();
56983463 2077 return 0;
296af7c9 2078 }
56983463 2079
4486e89c 2080 return do_vm_stop(state, true);
296af7c9
BS
2081}
2082
2d76e823
CI
2083/**
2084 * Prepare for (re)starting the VM.
2085 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2086 * running or in case of an error condition), 0 otherwise.
2087 */
2088int vm_prepare_start(void)
2089{
2090 RunState requested;
2d76e823
CI
2091
2092 qemu_vmstop_requested(&requested);
2093 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2094 return -1;
2095 }
2096
2097 /* Ensure that a STOP/RESUME pair of events is emitted if a
2098 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2099 * example, according to documentation is always followed by
2100 * the STOP event.
2101 */
2102 if (runstate_is_running()) {
3ab72385
PX
2103 qapi_event_send_stop();
2104 qapi_event_send_resume();
f056158d 2105 return -1;
2d76e823
CI
2106 }
2107
2108 /* We are sending this now, but the CPUs will be resumed shortly later */
3ab72385 2109 qapi_event_send_resume();
f056158d 2110
f056158d
MA
2111 cpu_enable_ticks();
2112 runstate_set(RUN_STATE_RUNNING);
2113 vm_state_notify(1, RUN_STATE_RUNNING);
2114 return 0;
2d76e823
CI
2115}
2116
2117void vm_start(void)
2118{
2119 if (!vm_prepare_start()) {
2120 resume_all_vcpus();
2121 }
2122}
2123
8a9236f1
LC
2124/* does a state transition even if the VM is already stopped,
2125 current state is forgotten forever */
56983463 2126int vm_stop_force_state(RunState state)
8a9236f1
LC
2127{
2128 if (runstate_is_running()) {
56983463 2129 return vm_stop(state);
8a9236f1
LC
2130 } else {
2131 runstate_set(state);
b2780d32
WC
2132
2133 bdrv_drain_all();
594a45ce
KW
2134 /* Make sure to return an error if the flush in a previous vm_stop()
2135 * failed. */
22af08ea 2136 return bdrv_flush_all();
8a9236f1
LC
2137 }
2138}
2139
0442428a 2140void list_cpus(const char *optarg)
262353cb
BS
2141{
2142 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8 2143#if defined(cpu_list)
0442428a 2144 cpu_list();
262353cb
BS
2145#endif
2146}
de0b36b6 2147
0cfd6a9a
LC
2148void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2149 bool has_cpu, int64_t cpu_index, Error **errp)
2150{
2151 FILE *f;
2152 uint32_t l;
55e5c285 2153 CPUState *cpu;
0cfd6a9a 2154 uint8_t buf[1024];
0dc9daf0 2155 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
2156
2157 if (!has_cpu) {
2158 cpu_index = 0;
2159 }
2160
151d1322
AF
2161 cpu = qemu_get_cpu(cpu_index);
2162 if (cpu == NULL) {
c6bd8c70
MA
2163 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2164 "a CPU number");
0cfd6a9a
LC
2165 return;
2166 }
2167
2168 f = fopen(filename, "wb");
2169 if (!f) {
618da851 2170 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
2171 return;
2172 }
2173
2174 while (size != 0) {
2175 l = sizeof(buf);
2176 if (l > size)
2177 l = size;
2f4d0f59 2178 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
2179 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2180 " specified", orig_addr, orig_size);
2f4d0f59
AK
2181 goto exit;
2182 }
0cfd6a9a 2183 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2184 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
2185 goto exit;
2186 }
2187 addr += l;
2188 size -= l;
2189 }
2190
2191exit:
2192 fclose(f);
2193}
6d3962bf
LC
2194
2195void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2196 Error **errp)
2197{
2198 FILE *f;
2199 uint32_t l;
2200 uint8_t buf[1024];
2201
2202 f = fopen(filename, "wb");
2203 if (!f) {
618da851 2204 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2205 return;
2206 }
2207
2208 while (size != 0) {
2209 l = sizeof(buf);
2210 if (l > size)
2211 l = size;
eb6282f2 2212 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2213 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2214 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2215 goto exit;
2216 }
2217 addr += l;
2218 size -= l;
2219 }
2220
2221exit:
2222 fclose(f);
2223}
ab49ab5c
LC
2224
2225void qmp_inject_nmi(Error **errp)
2226{
9cb805fd 2227 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2228}
27498bef 2229
76c86615 2230void dump_drift_info(void)
27498bef
ST
2231{
2232 if (!use_icount) {
2233 return;
2234 }
2235
76c86615 2236 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
27498bef
ST
2237 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2238 if (icount_align_option) {
76c86615
MA
2239 qemu_printf("Max guest delay %"PRIi64" ms\n",
2240 -max_delay / SCALE_MS);
2241 qemu_printf("Max guest advance %"PRIi64" ms\n",
2242 max_advance / SCALE_MS);
27498bef 2243 } else {
76c86615
MA
2244 qemu_printf("Max guest delay NA\n");
2245 qemu_printf("Max guest advance NA\n");
27498bef
ST
2246 }
2247}
This page took 0.987731 seconds and 4 git commands to generate.