]> Git Repo - qemu.git/blame - cpus.c
cpus: Fix configure_icount() error API violation
[qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
7b31bbc2 25#include "qemu/osdep.h"
a8d25326 26#include "qemu-common.h"
8d4e9146 27#include "qemu/config-file.h"
d6454270 28#include "migration/vmstate.h"
83c9089e 29#include "monitor/monitor.h"
e688df6b 30#include "qapi/error.h"
112ed241 31#include "qapi/qapi-commands-misc.h"
9af23989 32#include "qapi/qapi-events-run-state.h"
a4e15de9 33#include "qapi/qmp/qerror.h"
d49b6836 34#include "qemu/error-report.h"
76c86615 35#include "qemu/qemu-print.h"
14a48c1d 36#include "sysemu/tcg.h"
da31d594 37#include "sysemu/block-backend.h"
022c62cb 38#include "exec/gdbstub.h"
9c17d615 39#include "sysemu/dma.h"
b3946626 40#include "sysemu/hw_accel.h"
9c17d615 41#include "sysemu/kvm.h"
b0cb0a66 42#include "sysemu/hax.h"
c97d6d2c 43#include "sysemu/hvf.h"
19306806 44#include "sysemu/whpx.h"
63c91552 45#include "exec/exec-all.h"
296af7c9 46
1de7afc9 47#include "qemu/thread.h"
30865f31 48#include "qemu/plugin.h"
9c17d615
PB
49#include "sysemu/cpus.h"
50#include "sysemu/qtest.h"
1de7afc9 51#include "qemu/main-loop.h"
922a01a0 52#include "qemu/option.h"
1de7afc9 53#include "qemu/bitmap.h"
cb365646 54#include "qemu/seqlock.h"
9c09a251 55#include "qemu/guest-random.h"
dcb32f1d 56#include "tcg/tcg.h"
9cb805fd 57#include "hw/nmi.h"
8b427044 58#include "sysemu/replay.h"
54d31236 59#include "sysemu/runstate.h"
5cc8767d 60#include "hw/boards.h"
650d103d 61#include "hw/hw.h"
0ff0fc19 62
6d9cb73c
JK
63#ifdef CONFIG_LINUX
64
65#include <sys/prctl.h>
66
c0532a76
MT
67#ifndef PR_MCE_KILL
68#define PR_MCE_KILL 33
69#endif
70
6d9cb73c
JK
71#ifndef PR_MCE_KILL_SET
72#define PR_MCE_KILL_SET 1
73#endif
74
75#ifndef PR_MCE_KILL_EARLY
76#define PR_MCE_KILL_EARLY 1
77#endif
78
79#endif /* CONFIG_LINUX */
80
bd1f7ff4
YK
81static QemuMutex qemu_global_mutex;
82
27498bef
ST
83int64_t max_delay;
84int64_t max_advance;
296af7c9 85
2adcc85d
JH
86/* vcpu throttling controls */
87static QEMUTimer *throttle_timer;
88static unsigned int throttle_percentage;
89
90#define CPU_THROTTLE_PCT_MIN 1
91#define CPU_THROTTLE_PCT_MAX 99
92#define CPU_THROTTLE_TIMESLICE_NS 10000000
93
321bc0b2
TC
94bool cpu_is_stopped(CPUState *cpu)
95{
96 return cpu->stopped || !runstate_is_running();
97}
98
a98ae1d8 99static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 100{
c64ca814 101 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
102 return false;
103 }
321bc0b2 104 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
105 return true;
106 }
8c2e1b00 107 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 108 kvm_halt_in_kernel()) {
ac873f1e
PM
109 return false;
110 }
111 return true;
112}
113
114static bool all_cpu_threads_idle(void)
115{
182735ef 116 CPUState *cpu;
ac873f1e 117
bdc44640 118 CPU_FOREACH(cpu) {
182735ef 119 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
120 return false;
121 }
122 }
123 return true;
124}
125
946fb27c
PB
126/***********************************************************/
127/* guest cycle counter */
128
a3270e19
PB
129/* Protected by TimersState seqlock */
130
5045e9d9 131static bool icount_sleep = true;
946fb27c
PB
132/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
133#define MAX_ICOUNT_SHIFT 10
a3270e19 134
946fb27c 135typedef struct TimersState {
cb365646 136 /* Protected by BQL. */
946fb27c
PB
137 int64_t cpu_ticks_prev;
138 int64_t cpu_ticks_offset;
cb365646 139
94377115
PB
140 /* Protect fields that can be respectively read outside the
141 * BQL, and written from multiple threads.
cb365646
LPF
142 */
143 QemuSeqLock vm_clock_seqlock;
94377115
PB
144 QemuSpin vm_clock_lock;
145
146 int16_t cpu_ticks_enabled;
c96778bb 147
c1ff073c 148 /* Conversion factor from emulated instructions to virtual clock ticks. */
94377115
PB
149 int16_t icount_time_shift;
150
c96778bb
FK
151 /* Compensate for varying guest execution speed. */
152 int64_t qemu_icount_bias;
94377115
PB
153
154 int64_t vm_clock_warp_start;
155 int64_t cpu_clock_offset;
156
c96778bb
FK
157 /* Only written by TCG thread */
158 int64_t qemu_icount;
94377115 159
b39e3f34 160 /* for adjusting icount */
b39e3f34
PD
161 QEMUTimer *icount_rt_timer;
162 QEMUTimer *icount_vm_timer;
163 QEMUTimer *icount_warp_timer;
946fb27c
PB
164} TimersState;
165
d9cd4007 166static TimersState timers_state;
8d4e9146
FK
167bool mttcg_enabled;
168
946fb27c 169
e4cd9657
AB
170/* The current number of executed instructions is based on what we
171 * originally budgeted minus the current state of the decrementing
172 * icount counters in extra/u16.low.
173 */
174static int64_t cpu_get_icount_executed(CPUState *cpu)
175{
5e140196
RH
176 return (cpu->icount_budget -
177 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
e4cd9657
AB
178}
179
512d3c80
AB
180/*
181 * Update the global shared timer_state.qemu_icount to take into
182 * account executed instructions. This is done by the TCG vCPU
183 * thread so the main-loop can see time has moved forward.
184 */
9b4e6f49 185static void cpu_update_icount_locked(CPUState *cpu)
512d3c80
AB
186{
187 int64_t executed = cpu_get_icount_executed(cpu);
188 cpu->icount_budget -= executed;
189
38adcb6e
EC
190 atomic_set_i64(&timers_state.qemu_icount,
191 timers_state.qemu_icount + executed);
9b4e6f49
PB
192}
193
194/*
195 * Update the global shared timer_state.qemu_icount to take into
196 * account executed instructions. This is done by the TCG vCPU
197 * thread so the main-loop can see time has moved forward.
198 */
199void cpu_update_icount(CPUState *cpu)
200{
201 seqlock_write_lock(&timers_state.vm_clock_seqlock,
202 &timers_state.vm_clock_lock);
203 cpu_update_icount_locked(cpu);
94377115
PB
204 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
205 &timers_state.vm_clock_lock);
512d3c80
AB
206}
207
c1ff073c 208static int64_t cpu_get_icount_raw_locked(void)
946fb27c 209{
4917cf44 210 CPUState *cpu = current_cpu;
946fb27c 211
243c5f77 212 if (cpu && cpu->running) {
414b15c9 213 if (!cpu->can_do_io) {
493d89bf 214 error_report("Bad icount read");
2a62914b 215 exit(1);
946fb27c 216 }
e4cd9657 217 /* Take into account what has run */
9b4e6f49 218 cpu_update_icount_locked(cpu);
946fb27c 219 }
38adcb6e
EC
220 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
221 return atomic_read_i64(&timers_state.qemu_icount);
2a62914b
PD
222}
223
2a62914b
PD
224static int64_t cpu_get_icount_locked(void)
225{
c1ff073c 226 int64_t icount = cpu_get_icount_raw_locked();
c97595d1
EC
227 return atomic_read_i64(&timers_state.qemu_icount_bias) +
228 cpu_icount_to_ns(icount);
c1ff073c
PB
229}
230
231int64_t cpu_get_icount_raw(void)
232{
233 int64_t icount;
234 unsigned start;
235
236 do {
237 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
238 icount = cpu_get_icount_raw_locked();
239 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
240
241 return icount;
946fb27c
PB
242}
243
c1ff073c 244/* Return the virtual CPU time, based on the instruction counter. */
17a15f1b
PB
245int64_t cpu_get_icount(void)
246{
247 int64_t icount;
248 unsigned start;
249
250 do {
251 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
252 icount = cpu_get_icount_locked();
253 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
254
255 return icount;
256}
257
3f031313
FK
258int64_t cpu_icount_to_ns(int64_t icount)
259{
c1ff073c 260 return icount << atomic_read(&timers_state.icount_time_shift);
3f031313
FK
261}
262
f2a4ad6d
PB
263static int64_t cpu_get_ticks_locked(void)
264{
265 int64_t ticks = timers_state.cpu_ticks_offset;
266 if (timers_state.cpu_ticks_enabled) {
267 ticks += cpu_get_host_ticks();
268 }
269
270 if (timers_state.cpu_ticks_prev > ticks) {
271 /* Non increasing ticks may happen if the host uses software suspend. */
272 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
273 ticks = timers_state.cpu_ticks_prev;
274 }
275
276 timers_state.cpu_ticks_prev = ticks;
277 return ticks;
278}
279
d90f3cca
C
280/* return the time elapsed in VM between vm_start and vm_stop. Unless
281 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
282 * counter.
d90f3cca 283 */
946fb27c
PB
284int64_t cpu_get_ticks(void)
285{
5f3e3101
PB
286 int64_t ticks;
287
946fb27c
PB
288 if (use_icount) {
289 return cpu_get_icount();
290 }
5f3e3101 291
f2a4ad6d
PB
292 qemu_spin_lock(&timers_state.vm_clock_lock);
293 ticks = cpu_get_ticks_locked();
294 qemu_spin_unlock(&timers_state.vm_clock_lock);
5f3e3101 295 return ticks;
946fb27c
PB
296}
297
cb365646 298static int64_t cpu_get_clock_locked(void)
946fb27c 299{
1d45cea5 300 int64_t time;
cb365646 301
1d45cea5 302 time = timers_state.cpu_clock_offset;
5f3e3101 303 if (timers_state.cpu_ticks_enabled) {
1d45cea5 304 time += get_clock();
946fb27c 305 }
cb365646 306
1d45cea5 307 return time;
cb365646
LPF
308}
309
d90f3cca 310/* Return the monotonic time elapsed in VM, i.e.,
8212ff86
PM
311 * the time between vm_start and vm_stop
312 */
cb365646
LPF
313int64_t cpu_get_clock(void)
314{
315 int64_t ti;
316 unsigned start;
317
318 do {
319 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
320 ti = cpu_get_clock_locked();
321 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
322
323 return ti;
946fb27c
PB
324}
325
cb365646 326/* enable cpu_get_ticks()
3224e878 327 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 328 */
946fb27c
PB
329void cpu_enable_ticks(void)
330{
94377115
PB
331 seqlock_write_lock(&timers_state.vm_clock_seqlock,
332 &timers_state.vm_clock_lock);
946fb27c 333 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 334 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
335 timers_state.cpu_clock_offset -= get_clock();
336 timers_state.cpu_ticks_enabled = 1;
337 }
94377115
PB
338 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
339 &timers_state.vm_clock_lock);
946fb27c
PB
340}
341
342/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646 343 * cpu_get_ticks() after that.
3224e878 344 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
cb365646 345 */
946fb27c
PB
346void cpu_disable_ticks(void)
347{
94377115
PB
348 seqlock_write_lock(&timers_state.vm_clock_seqlock,
349 &timers_state.vm_clock_lock);
946fb27c 350 if (timers_state.cpu_ticks_enabled) {
4a7428c5 351 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 352 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
353 timers_state.cpu_ticks_enabled = 0;
354 }
94377115
PB
355 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
356 &timers_state.vm_clock_lock);
946fb27c
PB
357}
358
359/* Correlation between real and virtual time is always going to be
360 fairly approximate, so ignore small variation.
361 When the guest is idle real and virtual time will be aligned in
362 the IO wait loop. */
73bcb24d 363#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
364
365static void icount_adjust(void)
366{
367 int64_t cur_time;
368 int64_t cur_icount;
369 int64_t delta;
a3270e19
PB
370
371 /* Protected by TimersState mutex. */
946fb27c 372 static int64_t last_delta;
468cc7cf 373
946fb27c
PB
374 /* If the VM is not running, then do nothing. */
375 if (!runstate_is_running()) {
376 return;
377 }
468cc7cf 378
94377115
PB
379 seqlock_write_lock(&timers_state.vm_clock_seqlock,
380 &timers_state.vm_clock_lock);
17a15f1b
PB
381 cur_time = cpu_get_clock_locked();
382 cur_icount = cpu_get_icount_locked();
468cc7cf 383
946fb27c
PB
384 delta = cur_icount - cur_time;
385 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
386 if (delta > 0
387 && last_delta + ICOUNT_WOBBLE < delta * 2
c1ff073c 388 && timers_state.icount_time_shift > 0) {
946fb27c 389 /* The guest is getting too far ahead. Slow time down. */
c1ff073c
PB
390 atomic_set(&timers_state.icount_time_shift,
391 timers_state.icount_time_shift - 1);
946fb27c
PB
392 }
393 if (delta < 0
394 && last_delta - ICOUNT_WOBBLE > delta * 2
c1ff073c 395 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
946fb27c 396 /* The guest is getting too far behind. Speed time up. */
c1ff073c
PB
397 atomic_set(&timers_state.icount_time_shift,
398 timers_state.icount_time_shift + 1);
946fb27c
PB
399 }
400 last_delta = delta;
c97595d1
EC
401 atomic_set_i64(&timers_state.qemu_icount_bias,
402 cur_icount - (timers_state.qemu_icount
403 << timers_state.icount_time_shift));
94377115
PB
404 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
405 &timers_state.vm_clock_lock);
946fb27c
PB
406}
407
408static void icount_adjust_rt(void *opaque)
409{
b39e3f34 410 timer_mod(timers_state.icount_rt_timer,
1979b908 411 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
412 icount_adjust();
413}
414
415static void icount_adjust_vm(void *opaque)
416{
b39e3f34 417 timer_mod(timers_state.icount_vm_timer,
40daca54 418 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 419 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
420 icount_adjust();
421}
422
423static int64_t qemu_icount_round(int64_t count)
424{
c1ff073c
PB
425 int shift = atomic_read(&timers_state.icount_time_shift);
426 return (count + (1 << shift) - 1) >> shift;
946fb27c
PB
427}
428
efab87cf 429static void icount_warp_rt(void)
946fb27c 430{
ccffff48
AB
431 unsigned seq;
432 int64_t warp_start;
433
17a15f1b
PB
434 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
435 * changes from -1 to another value, so the race here is okay.
436 */
ccffff48
AB
437 do {
438 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
b39e3f34 439 warp_start = timers_state.vm_clock_warp_start;
ccffff48
AB
440 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
441
442 if (warp_start == -1) {
946fb27c
PB
443 return;
444 }
445
94377115
PB
446 seqlock_write_lock(&timers_state.vm_clock_seqlock,
447 &timers_state.vm_clock_lock);
946fb27c 448 if (runstate_is_running()) {
74c0b816
PB
449 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
450 cpu_get_clock_locked());
8ed961d9
PB
451 int64_t warp_delta;
452
b39e3f34 453 warp_delta = clock - timers_state.vm_clock_warp_start;
8ed961d9 454 if (use_icount == 2) {
946fb27c 455 /*
40daca54 456 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
457 * far ahead of real time.
458 */
17a15f1b 459 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 460 int64_t delta = clock - cur_icount;
8ed961d9 461 warp_delta = MIN(warp_delta, delta);
946fb27c 462 }
c97595d1
EC
463 atomic_set_i64(&timers_state.qemu_icount_bias,
464 timers_state.qemu_icount_bias + warp_delta);
946fb27c 465 }
b39e3f34 466 timers_state.vm_clock_warp_start = -1;
94377115
PB
467 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
468 &timers_state.vm_clock_lock);
8ed961d9
PB
469
470 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
471 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
472 }
946fb27c
PB
473}
474
e76d1798 475static void icount_timer_cb(void *opaque)
efab87cf 476{
e76d1798
PD
477 /* No need for a checkpoint because the timer already synchronizes
478 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
479 */
480 icount_warp_rt();
efab87cf
PD
481}
482
8156be56
PB
483void qtest_clock_warp(int64_t dest)
484{
40daca54 485 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 486 AioContext *aio_context;
8156be56 487 assert(qtest_enabled());
efef88b3 488 aio_context = qemu_get_aio_context();
8156be56 489 while (clock < dest) {
dcb15780
PD
490 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
491 QEMU_TIMER_ATTR_ALL);
c9299e2f 492 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 493
94377115
PB
494 seqlock_write_lock(&timers_state.vm_clock_seqlock,
495 &timers_state.vm_clock_lock);
c97595d1
EC
496 atomic_set_i64(&timers_state.qemu_icount_bias,
497 timers_state.qemu_icount_bias + warp);
94377115
PB
498 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
499 &timers_state.vm_clock_lock);
17a15f1b 500
40daca54 501 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 502 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 503 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 504 }
40daca54 505 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
506}
507
e76d1798 508void qemu_start_warp_timer(void)
946fb27c 509{
ce78d18c 510 int64_t clock;
946fb27c
PB
511 int64_t deadline;
512
e76d1798 513 if (!use_icount) {
946fb27c
PB
514 return;
515 }
516
8bd7f71d
PD
517 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
518 * do not fire, so computing the deadline does not make sense.
519 */
520 if (!runstate_is_running()) {
521 return;
522 }
523
0c08185f
PD
524 if (replay_mode != REPLAY_MODE_PLAY) {
525 if (!all_cpu_threads_idle()) {
526 return;
527 }
8bd7f71d 528
0c08185f
PD
529 if (qtest_enabled()) {
530 /* When testing, qtest commands advance icount. */
531 return;
532 }
946fb27c 533
0c08185f
PD
534 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
535 } else {
536 /* warp clock deterministically in record/replay mode */
537 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
538 /* vCPU is sleeping and warp can't be started.
539 It is probably a race condition: notification sent
540 to vCPU was processed in advance and vCPU went to sleep.
541 Therefore we have to wake it up for doing someting. */
542 if (replay_has_checkpoint()) {
543 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
544 }
545 return;
546 }
8156be56
PB
547 }
548
ac70aafc 549 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 550 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
dcb15780
PD
551 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
552 ~QEMU_TIMER_ATTR_EXTERNAL);
ce78d18c 553 if (deadline < 0) {
d7a0f71d
VC
554 static bool notified;
555 if (!icount_sleep && !notified) {
3dc6f869 556 warn_report("icount sleep disabled and no active timers");
d7a0f71d
VC
557 notified = true;
558 }
ce78d18c 559 return;
ac70aafc
AB
560 }
561
946fb27c
PB
562 if (deadline > 0) {
563 /*
40daca54 564 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
565 * sleep. Otherwise, the CPU might be waiting for a future timer
566 * interrupt to wake it up, but the interrupt never comes because
567 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 568 * QEMU_CLOCK_VIRTUAL.
946fb27c 569 */
5045e9d9
VC
570 if (!icount_sleep) {
571 /*
572 * We never let VCPUs sleep in no sleep icount mode.
573 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
574 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
575 * It is useful when we want a deterministic execution time,
576 * isolated from host latencies.
577 */
94377115
PB
578 seqlock_write_lock(&timers_state.vm_clock_seqlock,
579 &timers_state.vm_clock_lock);
c97595d1
EC
580 atomic_set_i64(&timers_state.qemu_icount_bias,
581 timers_state.qemu_icount_bias + deadline);
94377115
PB
582 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
583 &timers_state.vm_clock_lock);
5045e9d9
VC
584 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
585 } else {
586 /*
587 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
588 * "real" time, (related to the time left until the next event) has
589 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
590 * This avoids that the warps are visible externally; for example,
591 * you will not be sending network packets continuously instead of
592 * every 100ms.
593 */
94377115
PB
594 seqlock_write_lock(&timers_state.vm_clock_seqlock,
595 &timers_state.vm_clock_lock);
b39e3f34
PD
596 if (timers_state.vm_clock_warp_start == -1
597 || timers_state.vm_clock_warp_start > clock) {
598 timers_state.vm_clock_warp_start = clock;
5045e9d9 599 }
94377115
PB
600 seqlock_write_unlock(&timers_state.vm_clock_seqlock,
601 &timers_state.vm_clock_lock);
b39e3f34
PD
602 timer_mod_anticipate(timers_state.icount_warp_timer,
603 clock + deadline);
ce78d18c 604 }
ac70aafc 605 } else if (deadline == 0) {
40daca54 606 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
607 }
608}
609
e76d1798
PD
610static void qemu_account_warp_timer(void)
611{
612 if (!use_icount || !icount_sleep) {
613 return;
614 }
615
616 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
617 * do not fire, so computing the deadline does not make sense.
618 */
619 if (!runstate_is_running()) {
620 return;
621 }
622
623 /* warp clock deterministically in record/replay mode */
624 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
625 return;
626 }
627
b39e3f34 628 timer_del(timers_state.icount_warp_timer);
e76d1798
PD
629 icount_warp_rt();
630}
631
d09eae37
FK
632static bool icount_state_needed(void *opaque)
633{
634 return use_icount;
635}
636
b39e3f34
PD
637static bool warp_timer_state_needed(void *opaque)
638{
639 TimersState *s = opaque;
640 return s->icount_warp_timer != NULL;
641}
642
643static bool adjust_timers_state_needed(void *opaque)
644{
645 TimersState *s = opaque;
646 return s->icount_rt_timer != NULL;
647}
648
649/*
650 * Subsection for warp timer migration is optional, because may not be created
651 */
652static const VMStateDescription icount_vmstate_warp_timer = {
653 .name = "timer/icount/warp_timer",
654 .version_id = 1,
655 .minimum_version_id = 1,
656 .needed = warp_timer_state_needed,
657 .fields = (VMStateField[]) {
658 VMSTATE_INT64(vm_clock_warp_start, TimersState),
659 VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
660 VMSTATE_END_OF_LIST()
661 }
662};
663
664static const VMStateDescription icount_vmstate_adjust_timers = {
665 .name = "timer/icount/timers",
666 .version_id = 1,
667 .minimum_version_id = 1,
668 .needed = adjust_timers_state_needed,
669 .fields = (VMStateField[]) {
670 VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
671 VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
672 VMSTATE_END_OF_LIST()
673 }
674};
675
d09eae37
FK
676/*
677 * This is a subsection for icount migration.
678 */
679static const VMStateDescription icount_vmstate_timers = {
680 .name = "timer/icount",
681 .version_id = 1,
682 .minimum_version_id = 1,
5cd8cada 683 .needed = icount_state_needed,
d09eae37
FK
684 .fields = (VMStateField[]) {
685 VMSTATE_INT64(qemu_icount_bias, TimersState),
686 VMSTATE_INT64(qemu_icount, TimersState),
687 VMSTATE_END_OF_LIST()
b39e3f34
PD
688 },
689 .subsections = (const VMStateDescription*[]) {
690 &icount_vmstate_warp_timer,
691 &icount_vmstate_adjust_timers,
692 NULL
d09eae37
FK
693 }
694};
695
946fb27c
PB
696static const VMStateDescription vmstate_timers = {
697 .name = "timer",
698 .version_id = 2,
699 .minimum_version_id = 1,
35d08458 700 .fields = (VMStateField[]) {
946fb27c 701 VMSTATE_INT64(cpu_ticks_offset, TimersState),
c1ff073c 702 VMSTATE_UNUSED(8),
946fb27c
PB
703 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
704 VMSTATE_END_OF_LIST()
d09eae37 705 },
5cd8cada
JQ
706 .subsections = (const VMStateDescription*[]) {
707 &icount_vmstate_timers,
708 NULL
946fb27c
PB
709 }
710};
711
14e6fe12 712static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
2adcc85d 713{
2adcc85d
JH
714 double pct;
715 double throttle_ratio;
bd1f7ff4 716 int64_t sleeptime_ns, endtime_ns;
2adcc85d
JH
717
718 if (!cpu_throttle_get_percentage()) {
719 return;
720 }
721
722 pct = (double)cpu_throttle_get_percentage()/100;
723 throttle_ratio = pct / (1 - pct);
bd1f7ff4
YK
724 /* Add 1ns to fix double's rounding error (like 0.9999999...) */
725 sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
726 endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
727 while (sleeptime_ns > 0 && !cpu->stop) {
728 if (sleeptime_ns > SCALE_MS) {
729 qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
730 sleeptime_ns / SCALE_MS);
731 } else {
732 qemu_mutex_unlock_iothread();
733 g_usleep(sleeptime_ns / SCALE_US);
734 qemu_mutex_lock_iothread();
735 }
736 sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
737 }
90bb0c04 738 atomic_set(&cpu->throttle_thread_scheduled, 0);
2adcc85d
JH
739}
740
741static void cpu_throttle_timer_tick(void *opaque)
742{
743 CPUState *cpu;
744 double pct;
745
746 /* Stop the timer if needed */
747 if (!cpu_throttle_get_percentage()) {
748 return;
749 }
750 CPU_FOREACH(cpu) {
751 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
14e6fe12
PB
752 async_run_on_cpu(cpu, cpu_throttle_thread,
753 RUN_ON_CPU_NULL);
2adcc85d
JH
754 }
755 }
756
757 pct = (double)cpu_throttle_get_percentage()/100;
758 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
759 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
760}
761
762void cpu_throttle_set(int new_throttle_pct)
763{
764 /* Ensure throttle percentage is within valid range */
765 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
766 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
767
768 atomic_set(&throttle_percentage, new_throttle_pct);
769
770 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
771 CPU_THROTTLE_TIMESLICE_NS);
772}
773
774void cpu_throttle_stop(void)
775{
776 atomic_set(&throttle_percentage, 0);
777}
778
779bool cpu_throttle_active(void)
780{
781 return (cpu_throttle_get_percentage() != 0);
782}
783
784int cpu_throttle_get_percentage(void)
785{
786 return atomic_read(&throttle_percentage);
787}
788
4603ea01
PD
789void cpu_ticks_init(void)
790{
ccdb3c1f 791 seqlock_init(&timers_state.vm_clock_seqlock);
87a09cdc 792 qemu_spin_init(&timers_state.vm_clock_lock);
4603ea01 793 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
794 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
795 cpu_throttle_timer_tick, NULL);
4603ea01
PD
796}
797
1ad9580b 798void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 799{
abc9bf69
MA
800 const char *option = qemu_opt_get(opts, "shift");
801 bool sleep = qemu_opt_get_bool(opts, "sleep", true);
802 bool align = qemu_opt_get_bool(opts, "align", false);
803 long time_shift = -1;
a8bfac37 804 char *rem_str = NULL;
1ad9580b 805
abc9bf69
MA
806 if (!option && qemu_opt_get(opts, "align")) {
807 error_setg(errp, "Please specify shift option when using align");
946fb27c
PB
808 return;
809 }
f1f4b57e 810
abc9bf69 811 if (align && !sleep) {
778d9f9b 812 error_setg(errp, "align=on and sleep=off are incompatible");
abc9bf69 813 return;
f1f4b57e 814 }
abc9bf69 815
946fb27c 816 if (strcmp(option, "auto") != 0) {
a8bfac37 817 errno = 0;
abc9bf69 818 time_shift = strtol(option, &rem_str, 0);
a8bfac37
ST
819 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
820 error_setg(errp, "icount: Invalid shift value");
abc9bf69 821 return;
a8bfac37 822 }
a8bfac37
ST
823 } else if (icount_align_option) {
824 error_setg(errp, "shift=auto and align=on are incompatible");
abc9bf69 825 return;
f1f4b57e 826 } else if (!icount_sleep) {
778d9f9b 827 error_setg(errp, "shift=auto and sleep=off are incompatible");
abc9bf69
MA
828 return;
829 }
830
831 icount_sleep = sleep;
832 if (icount_sleep) {
833 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
834 icount_timer_cb, NULL);
835 }
836
837 icount_align_option = align;
838
839 if (time_shift >= 0) {
840 timers_state.icount_time_shift = time_shift;
841 use_icount = 1;
842 return;
946fb27c
PB
843 }
844
845 use_icount = 2;
846
847 /* 125MIPS seems a reasonable initial guess at the guest speed.
848 It will be corrected fairly quickly anyway. */
c1ff073c 849 timers_state.icount_time_shift = 3;
946fb27c
PB
850
851 /* Have both realtime and virtual time triggers for speed adjustment.
852 The realtime trigger catches emulated time passing too slowly,
853 the virtual time trigger catches emulated time passing too fast.
854 Realtime triggers occur even when idle, so use them less frequently
855 than VM triggers. */
b39e3f34
PD
856 timers_state.vm_clock_warp_start = -1;
857 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
bf2a7ddb 858 icount_adjust_rt, NULL);
b39e3f34 859 timer_mod(timers_state.icount_rt_timer,
bf2a7ddb 860 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
b39e3f34 861 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
40daca54 862 icount_adjust_vm, NULL);
b39e3f34 863 timer_mod(timers_state.icount_vm_timer,
40daca54 864 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 865 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
866}
867
6546706d
AB
868/***********************************************************/
869/* TCG vCPU kick timer
870 *
871 * The kick timer is responsible for moving single threaded vCPU
872 * emulation on to the next vCPU. If more than one vCPU is running a
873 * timer event with force a cpu->exit so the next vCPU can get
874 * scheduled.
875 *
876 * The timer is removed if all vCPUs are idle and restarted again once
877 * idleness is complete.
878 */
879
880static QEMUTimer *tcg_kick_vcpu_timer;
791158d9 881static CPUState *tcg_current_rr_cpu;
6546706d
AB
882
883#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
884
885static inline int64_t qemu_tcg_next_kick(void)
886{
887 return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
888}
889
e8f22f76
AB
890/* Kick the currently round-robin scheduled vCPU to next */
891static void qemu_cpu_kick_rr_next_cpu(void)
791158d9
AB
892{
893 CPUState *cpu;
791158d9
AB
894 do {
895 cpu = atomic_mb_read(&tcg_current_rr_cpu);
896 if (cpu) {
897 cpu_exit(cpu);
898 }
899 } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
900}
901
e8f22f76
AB
902/* Kick all RR vCPUs */
903static void qemu_cpu_kick_rr_cpus(void)
904{
905 CPUState *cpu;
906
907 CPU_FOREACH(cpu) {
908 cpu_exit(cpu);
909 };
910}
911
6b8f0187
PB
912static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
913{
914}
915
3f53bc61
PB
916void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
917{
6b8f0187
PB
918 if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
919 qemu_notify_event();
920 return;
921 }
922
c52e7132
PM
923 if (qemu_in_vcpu_thread()) {
924 /* A CPU is currently running; kick it back out to the
925 * tcg_cpu_exec() loop so it will recalculate its
926 * icount deadline immediately.
927 */
928 qemu_cpu_kick(current_cpu);
929 } else if (first_cpu) {
6b8f0187
PB
930 /* qemu_cpu_kick is not enough to kick a halted CPU out of
931 * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
932 * causes cpu_thread_is_idle to return false. This way,
933 * handle_icount_deadline can run.
c52e7132
PM
934 * If we have no CPUs at all for some reason, we don't
935 * need to do anything.
6b8f0187
PB
936 */
937 async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
938 }
3f53bc61
PB
939}
940
6546706d
AB
941static void kick_tcg_thread(void *opaque)
942{
943 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
e8f22f76 944 qemu_cpu_kick_rr_next_cpu();
6546706d
AB
945}
946
947static void start_tcg_kick_timer(void)
948{
db08b687
PB
949 assert(!mttcg_enabled);
950 if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
6546706d
AB
951 tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
952 kick_tcg_thread, NULL);
1926ab27
AB
953 }
954 if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
6546706d
AB
955 timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
956 }
957}
958
959static void stop_tcg_kick_timer(void)
960{
db08b687 961 assert(!mttcg_enabled);
1926ab27 962 if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
6546706d 963 timer_del(tcg_kick_vcpu_timer);
6546706d
AB
964 }
965}
966
296af7c9
BS
967/***********************************************************/
968void hw_error(const char *fmt, ...)
969{
970 va_list ap;
55e5c285 971 CPUState *cpu;
296af7c9
BS
972
973 va_start(ap, fmt);
974 fprintf(stderr, "qemu: hardware error: ");
975 vfprintf(stderr, fmt, ap);
976 fprintf(stderr, "\n");
bdc44640 977 CPU_FOREACH(cpu) {
55e5c285 978 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
90c84c56 979 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
296af7c9
BS
980 }
981 va_end(ap);
982 abort();
983}
984
985void cpu_synchronize_all_states(void)
986{
182735ef 987 CPUState *cpu;
296af7c9 988
bdc44640 989 CPU_FOREACH(cpu) {
182735ef 990 cpu_synchronize_state(cpu);
c97d6d2c
SAGDR
991 /* TODO: move to cpu_synchronize_state() */
992 if (hvf_enabled()) {
993 hvf_cpu_synchronize_state(cpu);
994 }
296af7c9
BS
995 }
996}
997
998void cpu_synchronize_all_post_reset(void)
999{
182735ef 1000 CPUState *cpu;
296af7c9 1001
bdc44640 1002 CPU_FOREACH(cpu) {
182735ef 1003 cpu_synchronize_post_reset(cpu);
c97d6d2c
SAGDR
1004 /* TODO: move to cpu_synchronize_post_reset() */
1005 if (hvf_enabled()) {
1006 hvf_cpu_synchronize_post_reset(cpu);
1007 }
296af7c9
BS
1008 }
1009}
1010
1011void cpu_synchronize_all_post_init(void)
1012{
182735ef 1013 CPUState *cpu;
296af7c9 1014
bdc44640 1015 CPU_FOREACH(cpu) {
182735ef 1016 cpu_synchronize_post_init(cpu);
c97d6d2c
SAGDR
1017 /* TODO: move to cpu_synchronize_post_init() */
1018 if (hvf_enabled()) {
1019 hvf_cpu_synchronize_post_init(cpu);
1020 }
296af7c9
BS
1021 }
1022}
1023
75e972da
DG
1024void cpu_synchronize_all_pre_loadvm(void)
1025{
1026 CPUState *cpu;
1027
1028 CPU_FOREACH(cpu) {
1029 cpu_synchronize_pre_loadvm(cpu);
1030 }
1031}
1032
4486e89c 1033static int do_vm_stop(RunState state, bool send_stop)
296af7c9 1034{
56983463
KW
1035 int ret = 0;
1036
1354869c 1037 if (runstate_is_running()) {
f962cac4 1038 runstate_set(state);
296af7c9 1039 cpu_disable_ticks();
296af7c9 1040 pause_all_vcpus();
1dfb4dd9 1041 vm_state_notify(0, state);
4486e89c 1042 if (send_stop) {
3ab72385 1043 qapi_event_send_stop();
4486e89c 1044 }
296af7c9 1045 }
56983463 1046
594a45ce 1047 bdrv_drain_all();
22af08ea 1048 ret = bdrv_flush_all();
594a45ce 1049
56983463 1050 return ret;
296af7c9
BS
1051}
1052
4486e89c
SH
1053/* Special vm_stop() variant for terminating the process. Historically clients
1054 * did not expect a QMP STOP event and so we need to retain compatibility.
1055 */
1056int vm_shutdown(void)
1057{
1058 return do_vm_stop(RUN_STATE_SHUTDOWN, false);
1059}
1060
a1fcaa73 1061static bool cpu_can_run(CPUState *cpu)
296af7c9 1062{
4fdeee7c 1063 if (cpu->stop) {
a1fcaa73 1064 return false;
0ab07c62 1065 }
321bc0b2 1066 if (cpu_is_stopped(cpu)) {
a1fcaa73 1067 return false;
0ab07c62 1068 }
a1fcaa73 1069 return true;
296af7c9
BS
1070}
1071
91325046 1072static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 1073{
64f6b346 1074 gdb_set_stop_cpu(cpu);
8cf71710 1075 qemu_system_debug_request();
f324e766 1076 cpu->stopped = true;
3c638d06
JK
1077}
1078
6d9cb73c
JK
1079#ifdef CONFIG_LINUX
1080static void sigbus_reraise(void)
1081{
1082 sigset_t set;
1083 struct sigaction action;
1084
1085 memset(&action, 0, sizeof(action));
1086 action.sa_handler = SIG_DFL;
1087 if (!sigaction(SIGBUS, &action, NULL)) {
1088 raise(SIGBUS);
1089 sigemptyset(&set);
1090 sigaddset(&set, SIGBUS);
a2d1761d 1091 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
1092 }
1093 perror("Failed to re-raise SIGBUS!\n");
1094 abort();
1095}
1096
d98d4072 1097static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
6d9cb73c 1098{
a16fc07e
PB
1099 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1100 sigbus_reraise();
1101 }
1102
2ae41db2
PB
1103 if (current_cpu) {
1104 /* Called asynchronously in VCPU thread. */
1105 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1106 sigbus_reraise();
1107 }
1108 } else {
1109 /* Called synchronously (via signalfd) in main thread. */
1110 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1111 sigbus_reraise();
1112 }
6d9cb73c
JK
1113 }
1114}
1115
1116static void qemu_init_sigbus(void)
1117{
1118 struct sigaction action;
1119
1120 memset(&action, 0, sizeof(action));
1121 action.sa_flags = SA_SIGINFO;
d98d4072 1122 action.sa_sigaction = sigbus_handler;
6d9cb73c
JK
1123 sigaction(SIGBUS, &action, NULL);
1124
1125 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1126}
6d9cb73c 1127#else /* !CONFIG_LINUX */
6d9cb73c
JK
1128static void qemu_init_sigbus(void)
1129{
1130}
a16fc07e 1131#endif /* !CONFIG_LINUX */
ff48eb5f 1132
296af7c9
BS
1133static QemuThread io_thread;
1134
296af7c9
BS
1135/* cpu creation */
1136static QemuCond qemu_cpu_cond;
1137/* system init */
296af7c9
BS
1138static QemuCond qemu_pause_cond;
1139
d3b12f5d 1140void qemu_init_cpu_loop(void)
296af7c9 1141{
6d9cb73c 1142 qemu_init_sigbus();
ed94592b 1143 qemu_cond_init(&qemu_cpu_cond);
ed94592b 1144 qemu_cond_init(&qemu_pause_cond);
296af7c9 1145 qemu_mutex_init(&qemu_global_mutex);
296af7c9 1146
b7680cb6 1147 qemu_thread_get_self(&io_thread);
296af7c9
BS
1148}
1149
14e6fe12 1150void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
e82bcec2 1151{
d148d90e 1152 do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
3c02270d
CV
1153}
1154
4c055ab5
GZ
1155static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1156{
1157 if (kvm_destroy_vcpu(cpu) < 0) {
1158 error_report("kvm_destroy_vcpu failed");
1159 exit(EXIT_FAILURE);
1160 }
1161}
1162
1163static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1164{
1165}
1166
ebd05fea
DH
1167static void qemu_cpu_stop(CPUState *cpu, bool exit)
1168{
1169 g_assert(qemu_cpu_is_self(cpu));
1170 cpu->stop = false;
1171 cpu->stopped = true;
1172 if (exit) {
1173 cpu_exit(cpu);
1174 }
1175 qemu_cond_broadcast(&qemu_pause_cond);
1176}
1177
509a0d78 1178static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1179{
37257942 1180 atomic_mb_set(&cpu->thread_kicked, false);
4fdeee7c 1181 if (cpu->stop) {
ebd05fea 1182 qemu_cpu_stop(cpu, false);
296af7c9 1183 }
a5403c69 1184 process_queued_cpu_work(cpu);
37257942
AB
1185}
1186
a8efa606 1187static void qemu_tcg_rr_wait_io_event(void)
37257942 1188{
a8efa606
PB
1189 CPUState *cpu;
1190
db08b687 1191 while (all_cpu_threads_idle()) {
6546706d 1192 stop_tcg_kick_timer();
a8efa606 1193 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
16400322 1194 }
296af7c9 1195
6546706d
AB
1196 start_tcg_kick_timer();
1197
a8efa606
PB
1198 CPU_FOREACH(cpu) {
1199 qemu_wait_io_event_common(cpu);
1200 }
296af7c9
BS
1201}
1202
db08b687 1203static void qemu_wait_io_event(CPUState *cpu)
296af7c9 1204{
30865f31
EC
1205 bool slept = false;
1206
a98ae1d8 1207 while (cpu_thread_is_idle(cpu)) {
30865f31
EC
1208 if (!slept) {
1209 slept = true;
1210 qemu_plugin_vcpu_idle_cb(cpu);
1211 }
f5c121b8 1212 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1213 }
30865f31
EC
1214 if (slept) {
1215 qemu_plugin_vcpu_resume_cb(cpu);
1216 }
296af7c9 1217
db08b687
PB
1218#ifdef _WIN32
1219 /* Eat dummy APC queued by qemu_cpu_kick_thread. */
1220 if (!tcg_enabled()) {
1221 SleepEx(0, TRUE);
c97d6d2c 1222 }
db08b687 1223#endif
c97d6d2c
SAGDR
1224 qemu_wait_io_event_common(cpu);
1225}
1226
7e97cd88 1227static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1228{
48a106bd 1229 CPUState *cpu = arg;
84b4915d 1230 int r;
296af7c9 1231
ab28bd23
PB
1232 rcu_register_thread();
1233
2e7f7a3c 1234 qemu_mutex_lock_iothread();
814e612e 1235 qemu_thread_get_self(cpu->thread);
9f09e18a 1236 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1237 cpu->can_do_io = 1;
4917cf44 1238 current_cpu = cpu;
296af7c9 1239
504134d2 1240 r = kvm_init_vcpu(cpu);
84b4915d 1241 if (r < 0) {
493d89bf 1242 error_report("kvm_init_vcpu failed: %s", strerror(-r));
84b4915d
JK
1243 exit(1);
1244 }
296af7c9 1245
18268b60 1246 kvm_init_cpu_signals(cpu);
296af7c9
BS
1247
1248 /* signal CPU creation */
61a46217 1249 cpu->created = true;
296af7c9 1250 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1251 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1252
4c055ab5 1253 do {
a1fcaa73 1254 if (cpu_can_run(cpu)) {
1458c363 1255 r = kvm_cpu_exec(cpu);
83f338f7 1256 if (r == EXCP_DEBUG) {
91325046 1257 cpu_handle_guest_debug(cpu);
83f338f7 1258 }
0ab07c62 1259 }
db08b687 1260 qemu_wait_io_event(cpu);
4c055ab5 1261 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1262
4c055ab5 1263 qemu_kvm_destroy_vcpu(cpu);
2c579042
BR
1264 cpu->created = false;
1265 qemu_cond_signal(&qemu_cpu_cond);
4c055ab5 1266 qemu_mutex_unlock_iothread();
57615ed5 1267 rcu_unregister_thread();
296af7c9
BS
1268 return NULL;
1269}
1270
c7f0f3b1
AL
1271static void *qemu_dummy_cpu_thread_fn(void *arg)
1272{
1273#ifdef _WIN32
493d89bf 1274 error_report("qtest is not supported under Windows");
c7f0f3b1
AL
1275 exit(1);
1276#else
10a9021d 1277 CPUState *cpu = arg;
c7f0f3b1
AL
1278 sigset_t waitset;
1279 int r;
1280
ab28bd23
PB
1281 rcu_register_thread();
1282
c7f0f3b1 1283 qemu_mutex_lock_iothread();
814e612e 1284 qemu_thread_get_self(cpu->thread);
9f09e18a 1285 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1286 cpu->can_do_io = 1;
37257942 1287 current_cpu = cpu;
c7f0f3b1
AL
1288
1289 sigemptyset(&waitset);
1290 sigaddset(&waitset, SIG_IPI);
1291
1292 /* signal CPU creation */
61a46217 1293 cpu->created = true;
c7f0f3b1 1294 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1295 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c7f0f3b1 1296
d2831ab0 1297 do {
c7f0f3b1
AL
1298 qemu_mutex_unlock_iothread();
1299 do {
1300 int sig;
1301 r = sigwait(&waitset, &sig);
1302 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1303 if (r == -1) {
1304 perror("sigwait");
1305 exit(1);
1306 }
1307 qemu_mutex_lock_iothread();
db08b687 1308 qemu_wait_io_event(cpu);
d2831ab0 1309 } while (!cpu->unplug);
c7f0f3b1 1310
d40bfcbb 1311 qemu_mutex_unlock_iothread();
d2831ab0 1312 rcu_unregister_thread();
c7f0f3b1
AL
1313 return NULL;
1314#endif
1315}
1316
1be7fcb8
AB
1317static int64_t tcg_get_icount_limit(void)
1318{
1319 int64_t deadline;
1320
1321 if (replay_mode != REPLAY_MODE_PLAY) {
dcb15780
PD
1322 /*
1323 * Include all the timers, because they may need an attention.
1324 * Too long CPU execution may create unnecessary delay in UI.
1325 */
1326 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1327 QEMU_TIMER_ATTR_ALL);
fc6b2dba
PD
1328 /* Check realtime timers, because they help with input processing */
1329 deadline = qemu_soonest_timeout(deadline,
1330 qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
1331 QEMU_TIMER_ATTR_ALL));
1be7fcb8
AB
1332
1333 /* Maintain prior (possibly buggy) behaviour where if no deadline
1334 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1335 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1336 * nanoseconds.
1337 */
1338 if ((deadline < 0) || (deadline > INT32_MAX)) {
1339 deadline = INT32_MAX;
1340 }
1341
1342 return qemu_icount_round(deadline);
1343 } else {
1344 return replay_get_instructions();
1345 }
1346}
1347
12e9700d
AB
1348static void handle_icount_deadline(void)
1349{
6b8f0187 1350 assert(qemu_in_vcpu_thread());
12e9700d 1351 if (use_icount) {
dcb15780
PD
1352 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
1353 QEMU_TIMER_ATTR_ALL);
12e9700d
AB
1354
1355 if (deadline == 0) {
6b8f0187 1356 /* Wake up other AioContexts. */
12e9700d 1357 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
6b8f0187 1358 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
12e9700d
AB
1359 }
1360 }
1361}
1362
05248382 1363static void prepare_icount_for_run(CPUState *cpu)
1be7fcb8 1364{
1be7fcb8 1365 if (use_icount) {
eda5f7c6 1366 int insns_left;
05248382
AB
1367
1368 /* These should always be cleared by process_icount_data after
1369 * each vCPU execution. However u16.high can be raised
1370 * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1371 */
5e140196 1372 g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
05248382
AB
1373 g_assert(cpu->icount_extra == 0);
1374
eda5f7c6
AB
1375 cpu->icount_budget = tcg_get_icount_limit();
1376 insns_left = MIN(0xffff, cpu->icount_budget);
5e140196 1377 cpu_neg(cpu)->icount_decr.u16.low = insns_left;
eda5f7c6 1378 cpu->icount_extra = cpu->icount_budget - insns_left;
d759c951
AB
1379
1380 replay_mutex_lock();
1be7fcb8 1381 }
05248382
AB
1382}
1383
1384static void process_icount_data(CPUState *cpu)
1385{
1be7fcb8 1386 if (use_icount) {
e4cd9657 1387 /* Account for executed instructions */
512d3c80 1388 cpu_update_icount(cpu);
05248382
AB
1389
1390 /* Reset the counters */
5e140196 1391 cpu_neg(cpu)->icount_decr.u16.low = 0;
1be7fcb8 1392 cpu->icount_extra = 0;
e4cd9657
AB
1393 cpu->icount_budget = 0;
1394
1be7fcb8 1395 replay_account_executed_instructions();
d759c951
AB
1396
1397 replay_mutex_unlock();
1be7fcb8 1398 }
05248382
AB
1399}
1400
1401
1402static int tcg_cpu_exec(CPUState *cpu)
1403{
1404 int ret;
1405#ifdef CONFIG_PROFILER
1406 int64_t ti;
1407#endif
1408
f28d0dfd 1409 assert(tcg_enabled());
05248382
AB
1410#ifdef CONFIG_PROFILER
1411 ti = profile_getclock();
1412#endif
05248382
AB
1413 cpu_exec_start(cpu);
1414 ret = cpu_exec(cpu);
1415 cpu_exec_end(cpu);
05248382 1416#ifdef CONFIG_PROFILER
72fd2efb
EC
1417 atomic_set(&tcg_ctx->prof.cpu_exec_time,
1418 tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
05248382 1419#endif
1be7fcb8
AB
1420 return ret;
1421}
1422
c93bbbef
AB
1423/* Destroy any remaining vCPUs which have been unplugged and have
1424 * finished running
1425 */
1426static void deal_with_unplugged_cpus(void)
1be7fcb8 1427{
c93bbbef 1428 CPUState *cpu;
1be7fcb8 1429
c93bbbef
AB
1430 CPU_FOREACH(cpu) {
1431 if (cpu->unplug && !cpu_can_run(cpu)) {
1432 qemu_tcg_destroy_vcpu(cpu);
1433 cpu->created = false;
1434 qemu_cond_signal(&qemu_cpu_cond);
1be7fcb8
AB
1435 break;
1436 }
1437 }
1be7fcb8 1438}
bdb7ca67 1439
6546706d
AB
1440/* Single-threaded TCG
1441 *
1442 * In the single-threaded case each vCPU is simulated in turn. If
1443 * there is more than a single vCPU we create a simple timer to kick
1444 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1445 * This is done explicitly rather than relying on side-effects
1446 * elsewhere.
1447 */
1448
37257942 1449static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
296af7c9 1450{
c3586ba7 1451 CPUState *cpu = arg;
296af7c9 1452
f28d0dfd 1453 assert(tcg_enabled());
ab28bd23 1454 rcu_register_thread();
3468b59e 1455 tcg_register_thread();
ab28bd23 1456
2e7f7a3c 1457 qemu_mutex_lock_iothread();
814e612e 1458 qemu_thread_get_self(cpu->thread);
296af7c9 1459
5a9c973b
DH
1460 cpu->thread_id = qemu_get_thread_id();
1461 cpu->created = true;
1462 cpu->can_do_io = 1;
296af7c9 1463 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1464 qemu_guest_random_seed_thread_part2(cpu->random_seed);
296af7c9 1465
fa7d1867 1466 /* wait for initial kick-off after machine start */
c28e399c 1467 while (first_cpu->stopped) {
d5f8d613 1468 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1469
1470 /* process any pending work */
bdc44640 1471 CPU_FOREACH(cpu) {
37257942 1472 current_cpu = cpu;
182735ef 1473 qemu_wait_io_event_common(cpu);
8e564b4e 1474 }
0ab07c62 1475 }
296af7c9 1476
6546706d
AB
1477 start_tcg_kick_timer();
1478
c93bbbef
AB
1479 cpu = first_cpu;
1480
e5143e30
AB
1481 /* process any pending work */
1482 cpu->exit_request = 1;
1483
296af7c9 1484 while (1) {
d759c951
AB
1485 qemu_mutex_unlock_iothread();
1486 replay_mutex_lock();
1487 qemu_mutex_lock_iothread();
c93bbbef
AB
1488 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1489 qemu_account_warp_timer();
1490
6b8f0187
PB
1491 /* Run the timers here. This is much more efficient than
1492 * waking up the I/O thread and waiting for completion.
1493 */
1494 handle_icount_deadline();
1495
d759c951
AB
1496 replay_mutex_unlock();
1497
c93bbbef
AB
1498 if (!cpu) {
1499 cpu = first_cpu;
1500 }
1501
e5143e30
AB
1502 while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1503
791158d9 1504 atomic_mb_set(&tcg_current_rr_cpu, cpu);
37257942 1505 current_cpu = cpu;
c93bbbef
AB
1506
1507 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1508 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1509
1510 if (cpu_can_run(cpu)) {
1511 int r;
05248382 1512
d759c951 1513 qemu_mutex_unlock_iothread();
05248382
AB
1514 prepare_icount_for_run(cpu);
1515
c93bbbef 1516 r = tcg_cpu_exec(cpu);
05248382
AB
1517
1518 process_icount_data(cpu);
d759c951 1519 qemu_mutex_lock_iothread();
05248382 1520
c93bbbef
AB
1521 if (r == EXCP_DEBUG) {
1522 cpu_handle_guest_debug(cpu);
1523 break;
08e73c48
PK
1524 } else if (r == EXCP_ATOMIC) {
1525 qemu_mutex_unlock_iothread();
1526 cpu_exec_step_atomic(cpu);
1527 qemu_mutex_lock_iothread();
1528 break;
c93bbbef 1529 }
37257942 1530 } else if (cpu->stop) {
c93bbbef
AB
1531 if (cpu->unplug) {
1532 cpu = CPU_NEXT(cpu);
1533 }
1534 break;
1535 }
1536
e5143e30
AB
1537 cpu = CPU_NEXT(cpu);
1538 } /* while (cpu && !cpu->exit_request).. */
1539
791158d9
AB
1540 /* Does not need atomic_mb_set because a spurious wakeup is okay. */
1541 atomic_set(&tcg_current_rr_cpu, NULL);
c93bbbef 1542
e5143e30
AB
1543 if (cpu && cpu->exit_request) {
1544 atomic_mb_set(&cpu->exit_request, 0);
1545 }
ac70aafc 1546
013aabdc
CD
1547 if (use_icount && all_cpu_threads_idle()) {
1548 /*
1549 * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
1550 * in the main_loop, wake it up in order to start the warp timer.
1551 */
1552 qemu_notify_event();
1553 }
1554
a8efa606 1555 qemu_tcg_rr_wait_io_event();
c93bbbef 1556 deal_with_unplugged_cpus();
296af7c9
BS
1557 }
1558
9b0605f9 1559 rcu_unregister_thread();
296af7c9
BS
1560 return NULL;
1561}
1562
b0cb0a66
VP
1563static void *qemu_hax_cpu_thread_fn(void *arg)
1564{
1565 CPUState *cpu = arg;
1566 int r;
b3d3a426 1567
9857c2d2 1568 rcu_register_thread();
b3d3a426 1569 qemu_mutex_lock_iothread();
b0cb0a66 1570 qemu_thread_get_self(cpu->thread);
b0cb0a66
VP
1571
1572 cpu->thread_id = qemu_get_thread_id();
1573 cpu->created = true;
b0cb0a66
VP
1574 current_cpu = cpu;
1575
1576 hax_init_vcpu(cpu);
1577 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1578 qemu_guest_random_seed_thread_part2(cpu->random_seed);
b0cb0a66 1579
9857c2d2 1580 do {
b0cb0a66
VP
1581 if (cpu_can_run(cpu)) {
1582 r = hax_smp_cpu_exec(cpu);
1583 if (r == EXCP_DEBUG) {
1584 cpu_handle_guest_debug(cpu);
1585 }
1586 }
1587
db08b687 1588 qemu_wait_io_event(cpu);
9857c2d2
PB
1589 } while (!cpu->unplug || cpu_can_run(cpu));
1590 rcu_unregister_thread();
b0cb0a66
VP
1591 return NULL;
1592}
1593
c97d6d2c
SAGDR
1594/* The HVF-specific vCPU thread function. This one should only run when the host
1595 * CPU supports the VMX "unrestricted guest" feature. */
1596static void *qemu_hvf_cpu_thread_fn(void *arg)
1597{
1598 CPUState *cpu = arg;
1599
1600 int r;
1601
1602 assert(hvf_enabled());
1603
1604 rcu_register_thread();
1605
1606 qemu_mutex_lock_iothread();
1607 qemu_thread_get_self(cpu->thread);
1608
1609 cpu->thread_id = qemu_get_thread_id();
1610 cpu->can_do_io = 1;
1611 current_cpu = cpu;
1612
1613 hvf_init_vcpu(cpu);
1614
1615 /* signal CPU creation */
1616 cpu->created = true;
1617 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1618 qemu_guest_random_seed_thread_part2(cpu->random_seed);
c97d6d2c
SAGDR
1619
1620 do {
1621 if (cpu_can_run(cpu)) {
1622 r = hvf_vcpu_exec(cpu);
1623 if (r == EXCP_DEBUG) {
1624 cpu_handle_guest_debug(cpu);
1625 }
1626 }
db08b687 1627 qemu_wait_io_event(cpu);
c97d6d2c
SAGDR
1628 } while (!cpu->unplug || cpu_can_run(cpu));
1629
1630 hvf_vcpu_destroy(cpu);
1631 cpu->created = false;
1632 qemu_cond_signal(&qemu_cpu_cond);
1633 qemu_mutex_unlock_iothread();
8178e637 1634 rcu_unregister_thread();
c97d6d2c
SAGDR
1635 return NULL;
1636}
1637
19306806
JTV
1638static void *qemu_whpx_cpu_thread_fn(void *arg)
1639{
1640 CPUState *cpu = arg;
1641 int r;
1642
1643 rcu_register_thread();
1644
1645 qemu_mutex_lock_iothread();
1646 qemu_thread_get_self(cpu->thread);
1647 cpu->thread_id = qemu_get_thread_id();
1648 current_cpu = cpu;
1649
1650 r = whpx_init_vcpu(cpu);
1651 if (r < 0) {
1652 fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
1653 exit(1);
1654 }
1655
1656 /* signal CPU creation */
1657 cpu->created = true;
1658 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1659 qemu_guest_random_seed_thread_part2(cpu->random_seed);
19306806
JTV
1660
1661 do {
1662 if (cpu_can_run(cpu)) {
1663 r = whpx_vcpu_exec(cpu);
1664 if (r == EXCP_DEBUG) {
1665 cpu_handle_guest_debug(cpu);
1666 }
1667 }
1668 while (cpu_thread_is_idle(cpu)) {
1669 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1670 }
1671 qemu_wait_io_event_common(cpu);
1672 } while (!cpu->unplug || cpu_can_run(cpu));
1673
1674 whpx_destroy_vcpu(cpu);
1675 cpu->created = false;
1676 qemu_cond_signal(&qemu_cpu_cond);
1677 qemu_mutex_unlock_iothread();
1678 rcu_unregister_thread();
c97d6d2c
SAGDR
1679 return NULL;
1680}
1681
b0cb0a66
VP
1682#ifdef _WIN32
1683static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1684{
1685}
1686#endif
1687
37257942
AB
1688/* Multi-threaded TCG
1689 *
1690 * In the multi-threaded case each vCPU has its own thread. The TLS
1691 * variable current_cpu can be used deep in the code to find the
1692 * current CPUState for a given thread.
1693 */
1694
1695static void *qemu_tcg_cpu_thread_fn(void *arg)
1696{
1697 CPUState *cpu = arg;
1698
f28d0dfd 1699 assert(tcg_enabled());
bf51c720
AB
1700 g_assert(!use_icount);
1701
37257942 1702 rcu_register_thread();
3468b59e 1703 tcg_register_thread();
37257942
AB
1704
1705 qemu_mutex_lock_iothread();
1706 qemu_thread_get_self(cpu->thread);
1707
1708 cpu->thread_id = qemu_get_thread_id();
1709 cpu->created = true;
1710 cpu->can_do_io = 1;
1711 current_cpu = cpu;
1712 qemu_cond_signal(&qemu_cpu_cond);
9c09a251 1713 qemu_guest_random_seed_thread_part2(cpu->random_seed);
37257942
AB
1714
1715 /* process any pending work */
1716 cpu->exit_request = 1;
1717
54961aac 1718 do {
37257942
AB
1719 if (cpu_can_run(cpu)) {
1720 int r;
d759c951 1721 qemu_mutex_unlock_iothread();
37257942 1722 r = tcg_cpu_exec(cpu);
d759c951 1723 qemu_mutex_lock_iothread();
37257942
AB
1724 switch (r) {
1725 case EXCP_DEBUG:
1726 cpu_handle_guest_debug(cpu);
1727 break;
1728 case EXCP_HALTED:
1729 /* during start-up the vCPU is reset and the thread is
1730 * kicked several times. If we don't ensure we go back
1731 * to sleep in the halted state we won't cleanly
1732 * start-up when the vCPU is enabled.
1733 *
1734 * cpu->halted should ensure we sleep in wait_io_event
1735 */
1736 g_assert(cpu->halted);
1737 break;
08e73c48
PK
1738 case EXCP_ATOMIC:
1739 qemu_mutex_unlock_iothread();
1740 cpu_exec_step_atomic(cpu);
1741 qemu_mutex_lock_iothread();
37257942
AB
1742 default:
1743 /* Ignore everything else? */
1744 break;
1745 }
1746 }
1747
37257942 1748 atomic_mb_set(&cpu->exit_request, 0);
db08b687 1749 qemu_wait_io_event(cpu);
9b0605f9 1750 } while (!cpu->unplug || cpu_can_run(cpu));
37257942 1751
9b0605f9
PB
1752 qemu_tcg_destroy_vcpu(cpu);
1753 cpu->created = false;
1754 qemu_cond_signal(&qemu_cpu_cond);
1755 qemu_mutex_unlock_iothread();
1756 rcu_unregister_thread();
37257942
AB
1757 return NULL;
1758}
1759
2ff09a40 1760static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1761{
1762#ifndef _WIN32
1763 int err;
1764
e0c38211
PB
1765 if (cpu->thread_kicked) {
1766 return;
9102deda 1767 }
e0c38211 1768 cpu->thread_kicked = true;
814e612e 1769 err = pthread_kill(cpu->thread->thread, SIG_IPI);
d455ebc4 1770 if (err && err != ESRCH) {
cc015e9a
PB
1771 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1772 exit(1);
1773 }
1774#else /* _WIN32 */
b0cb0a66 1775 if (!qemu_cpu_is_self(cpu)) {
19306806
JTV
1776 if (whpx_enabled()) {
1777 whpx_vcpu_kick(cpu);
1778 } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
b0cb0a66
VP
1779 fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1780 __func__, GetLastError());
1781 exit(1);
1782 }
1783 }
e0c38211
PB
1784#endif
1785}
ed9164a3 1786
c08d7424 1787void qemu_cpu_kick(CPUState *cpu)
296af7c9 1788{
f5c121b8 1789 qemu_cond_broadcast(cpu->halt_cond);
e0c38211 1790 if (tcg_enabled()) {
e8f22f76
AB
1791 if (qemu_tcg_mttcg_enabled()) {
1792 cpu_exit(cpu);
1793 } else {
1794 qemu_cpu_kick_rr_cpus();
1795 }
e0c38211 1796 } else {
b0cb0a66
VP
1797 if (hax_enabled()) {
1798 /*
1799 * FIXME: race condition with the exit_request check in
1800 * hax_vcpu_hax_exec
1801 */
1802 cpu->exit_request = 1;
1803 }
e0c38211
PB
1804 qemu_cpu_kick_thread(cpu);
1805 }
296af7c9
BS
1806}
1807
46d62fac 1808void qemu_cpu_kick_self(void)
296af7c9 1809{
4917cf44 1810 assert(current_cpu);
9102deda 1811 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1812}
1813
60e82579 1814bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1815{
814e612e 1816 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1817}
1818
79e2b9ae 1819bool qemu_in_vcpu_thread(void)
aa723c23 1820{
4917cf44 1821 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1822}
1823
afbe7053
PB
1824static __thread bool iothread_locked = false;
1825
1826bool qemu_mutex_iothread_locked(void)
1827{
1828 return iothread_locked;
1829}
1830
cb764d06
EC
1831/*
1832 * The BQL is taken from so many places that it is worth profiling the
1833 * callers directly, instead of funneling them all through a single function.
1834 */
1835void qemu_mutex_lock_iothread_impl(const char *file, int line)
296af7c9 1836{
cb764d06
EC
1837 QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
1838
8d04fb55 1839 g_assert(!qemu_mutex_iothread_locked());
cb764d06 1840 bql_lock(&qemu_global_mutex, file, line);
afbe7053 1841 iothread_locked = true;
296af7c9
BS
1842}
1843
1844void qemu_mutex_unlock_iothread(void)
1845{
8d04fb55 1846 g_assert(qemu_mutex_iothread_locked());
afbe7053 1847 iothread_locked = false;
296af7c9
BS
1848 qemu_mutex_unlock(&qemu_global_mutex);
1849}
1850
19e067e0
AP
1851void qemu_cond_wait_iothread(QemuCond *cond)
1852{
1853 qemu_cond_wait(cond, &qemu_global_mutex);
1854}
1855
e8faee06 1856static bool all_vcpus_paused(void)
296af7c9 1857{
bdc44640 1858 CPUState *cpu;
296af7c9 1859
bdc44640 1860 CPU_FOREACH(cpu) {
182735ef 1861 if (!cpu->stopped) {
e8faee06 1862 return false;
0ab07c62 1863 }
296af7c9
BS
1864 }
1865
e8faee06 1866 return true;
296af7c9
BS
1867}
1868
1869void pause_all_vcpus(void)
1870{
bdc44640 1871 CPUState *cpu;
296af7c9 1872
40daca54 1873 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1874 CPU_FOREACH(cpu) {
ebd05fea
DH
1875 if (qemu_cpu_is_self(cpu)) {
1876 qemu_cpu_stop(cpu, true);
1877 } else {
1878 cpu->stop = true;
1879 qemu_cpu_kick(cpu);
1880 }
d798e974
JK
1881 }
1882
d759c951
AB
1883 /* We need to drop the replay_lock so any vCPU threads woken up
1884 * can finish their replay tasks
1885 */
1886 replay_mutex_unlock();
1887
296af7c9 1888 while (!all_vcpus_paused()) {
be7d6c57 1889 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1890 CPU_FOREACH(cpu) {
182735ef 1891 qemu_cpu_kick(cpu);
296af7c9
BS
1892 }
1893 }
d759c951
AB
1894
1895 qemu_mutex_unlock_iothread();
1896 replay_mutex_lock();
1897 qemu_mutex_lock_iothread();
296af7c9
BS
1898}
1899
2993683b
IM
1900void cpu_resume(CPUState *cpu)
1901{
1902 cpu->stop = false;
1903 cpu->stopped = false;
1904 qemu_cpu_kick(cpu);
1905}
1906
296af7c9
BS
1907void resume_all_vcpus(void)
1908{
bdc44640 1909 CPUState *cpu;
296af7c9 1910
f962cac4
LM
1911 if (!runstate_is_running()) {
1912 return;
1913 }
1914
40daca54 1915 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1916 CPU_FOREACH(cpu) {
182735ef 1917 cpu_resume(cpu);
296af7c9
BS
1918 }
1919}
1920
dbadee4f 1921void cpu_remove_sync(CPUState *cpu)
4c055ab5
GZ
1922{
1923 cpu->stop = true;
1924 cpu->unplug = true;
1925 qemu_cpu_kick(cpu);
dbadee4f
PB
1926 qemu_mutex_unlock_iothread();
1927 qemu_thread_join(cpu->thread);
1928 qemu_mutex_lock_iothread();
2c579042
BR
1929}
1930
4900116e
DDAG
1931/* For temporary buffers for forming a name */
1932#define VCPU_THREAD_NAME_SIZE 16
1933
e5ab30a2 1934static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1935{
4900116e 1936 char thread_name[VCPU_THREAD_NAME_SIZE];
37257942
AB
1937 static QemuCond *single_tcg_halt_cond;
1938 static QemuThread *single_tcg_cpu_thread;
e8feb96f
EC
1939 static int tcg_region_inited;
1940
f28d0dfd 1941 assert(tcg_enabled());
e8feb96f
EC
1942 /*
1943 * Initialize TCG regions--once. Now is a good time, because:
1944 * (1) TCG's init context, prologue and target globals have been set up.
1945 * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1946 * -accel flag is processed, so the check doesn't work then).
1947 */
1948 if (!tcg_region_inited) {
1949 tcg_region_inited = 1;
1950 tcg_region_init();
1951 }
4900116e 1952
37257942 1953 if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
814e612e 1954 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1955 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1956 qemu_cond_init(cpu->halt_cond);
37257942
AB
1957
1958 if (qemu_tcg_mttcg_enabled()) {
1959 /* create a thread per vCPU with TCG (MTTCG) */
1960 parallel_cpus = true;
1961 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
4900116e 1962 cpu->cpu_index);
37257942
AB
1963
1964 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1965 cpu, QEMU_THREAD_JOINABLE);
1966
1967 } else {
1968 /* share a single thread for all cpus with TCG */
1969 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1970 qemu_thread_create(cpu->thread, thread_name,
1971 qemu_tcg_rr_cpu_thread_fn,
1972 cpu, QEMU_THREAD_JOINABLE);
1973
1974 single_tcg_halt_cond = cpu->halt_cond;
1975 single_tcg_cpu_thread = cpu->thread;
1976 }
1ecf47bf 1977#ifdef _WIN32
814e612e 1978 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1979#endif
296af7c9 1980 } else {
37257942
AB
1981 /* For non-MTTCG cases we share the thread */
1982 cpu->thread = single_tcg_cpu_thread;
1983 cpu->halt_cond = single_tcg_halt_cond;
a342173a
DH
1984 cpu->thread_id = first_cpu->thread_id;
1985 cpu->can_do_io = 1;
1986 cpu->created = true;
296af7c9
BS
1987 }
1988}
1989
b0cb0a66
VP
1990static void qemu_hax_start_vcpu(CPUState *cpu)
1991{
1992 char thread_name[VCPU_THREAD_NAME_SIZE];
1993
1994 cpu->thread = g_malloc0(sizeof(QemuThread));
1995 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1996 qemu_cond_init(cpu->halt_cond);
1997
1998 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1999 cpu->cpu_index);
2000 qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
2001 cpu, QEMU_THREAD_JOINABLE);
2002#ifdef _WIN32
2003 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2004#endif
b0cb0a66
VP
2005}
2006
48a106bd 2007static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 2008{
4900116e
DDAG
2009 char thread_name[VCPU_THREAD_NAME_SIZE];
2010
814e612e 2011 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2012 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2013 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2014 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
2015 cpu->cpu_index);
2016 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
2017 cpu, QEMU_THREAD_JOINABLE);
296af7c9
BS
2018}
2019
c97d6d2c
SAGDR
2020static void qemu_hvf_start_vcpu(CPUState *cpu)
2021{
2022 char thread_name[VCPU_THREAD_NAME_SIZE];
2023
2024 /* HVF currently does not support TCG, and only runs in
2025 * unrestricted-guest mode. */
2026 assert(hvf_enabled());
2027
2028 cpu->thread = g_malloc0(sizeof(QemuThread));
2029 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2030 qemu_cond_init(cpu->halt_cond);
2031
2032 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
2033 cpu->cpu_index);
2034 qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
2035 cpu, QEMU_THREAD_JOINABLE);
c97d6d2c
SAGDR
2036}
2037
19306806
JTV
2038static void qemu_whpx_start_vcpu(CPUState *cpu)
2039{
2040 char thread_name[VCPU_THREAD_NAME_SIZE];
2041
2042 cpu->thread = g_malloc0(sizeof(QemuThread));
2043 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2044 qemu_cond_init(cpu->halt_cond);
2045 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
2046 cpu->cpu_index);
2047 qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
2048 cpu, QEMU_THREAD_JOINABLE);
2049#ifdef _WIN32
2050 cpu->hThread = qemu_thread_get_handle(cpu->thread);
2051#endif
19306806
JTV
2052}
2053
10a9021d 2054static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 2055{
4900116e
DDAG
2056 char thread_name[VCPU_THREAD_NAME_SIZE];
2057
814e612e 2058 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
2059 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
2060 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
2061 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
2062 cpu->cpu_index);
2063 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 2064 QEMU_THREAD_JOINABLE);
c7f0f3b1
AL
2065}
2066
c643bed9 2067void qemu_init_vcpu(CPUState *cpu)
296af7c9 2068{
5cc8767d
LX
2069 MachineState *ms = MACHINE(qdev_get_machine());
2070
2071 cpu->nr_cores = ms->smp.cores;
2072 cpu->nr_threads = ms->smp.threads;
f324e766 2073 cpu->stopped = true;
9c09a251 2074 cpu->random_seed = qemu_guest_random_seed_thread_part1();
56943e8c
PM
2075
2076 if (!cpu->as) {
2077 /* If the target cpu hasn't set up any address spaces itself,
2078 * give it the default one.
2079 */
12ebc9a7 2080 cpu->num_ases = 1;
80ceb07a 2081 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
56943e8c
PM
2082 }
2083
0ab07c62 2084 if (kvm_enabled()) {
48a106bd 2085 qemu_kvm_start_vcpu(cpu);
b0cb0a66
VP
2086 } else if (hax_enabled()) {
2087 qemu_hax_start_vcpu(cpu);
c97d6d2c
SAGDR
2088 } else if (hvf_enabled()) {
2089 qemu_hvf_start_vcpu(cpu);
c7f0f3b1 2090 } else if (tcg_enabled()) {
e5ab30a2 2091 qemu_tcg_init_vcpu(cpu);
19306806
JTV
2092 } else if (whpx_enabled()) {
2093 qemu_whpx_start_vcpu(cpu);
c7f0f3b1 2094 } else {
10a9021d 2095 qemu_dummy_start_vcpu(cpu);
0ab07c62 2096 }
81e96311
DH
2097
2098 while (!cpu->created) {
2099 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
2100 }
296af7c9
BS
2101}
2102
b4a3d965 2103void cpu_stop_current(void)
296af7c9 2104{
4917cf44 2105 if (current_cpu) {
0ec7e677
PM
2106 current_cpu->stop = true;
2107 cpu_exit(current_cpu);
b4a3d965 2108 }
296af7c9
BS
2109}
2110
56983463 2111int vm_stop(RunState state)
296af7c9 2112{
aa723c23 2113 if (qemu_in_vcpu_thread()) {
74892d24 2114 qemu_system_vmstop_request_prepare();
1dfb4dd9 2115 qemu_system_vmstop_request(state);
296af7c9
BS
2116 /*
2117 * FIXME: should not return to device code in case
2118 * vm_stop() has been requested.
2119 */
b4a3d965 2120 cpu_stop_current();
56983463 2121 return 0;
296af7c9 2122 }
56983463 2123
4486e89c 2124 return do_vm_stop(state, true);
296af7c9
BS
2125}
2126
2d76e823
CI
2127/**
2128 * Prepare for (re)starting the VM.
2129 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
2130 * running or in case of an error condition), 0 otherwise.
2131 */
2132int vm_prepare_start(void)
2133{
2134 RunState requested;
2d76e823
CI
2135
2136 qemu_vmstop_requested(&requested);
2137 if (runstate_is_running() && requested == RUN_STATE__MAX) {
2138 return -1;
2139 }
2140
2141 /* Ensure that a STOP/RESUME pair of events is emitted if a
2142 * vmstop request was pending. The BLOCK_IO_ERROR event, for
2143 * example, according to documentation is always followed by
2144 * the STOP event.
2145 */
2146 if (runstate_is_running()) {
3ab72385
PX
2147 qapi_event_send_stop();
2148 qapi_event_send_resume();
f056158d 2149 return -1;
2d76e823
CI
2150 }
2151
2152 /* We are sending this now, but the CPUs will be resumed shortly later */
3ab72385 2153 qapi_event_send_resume();
f056158d 2154
f056158d
MA
2155 cpu_enable_ticks();
2156 runstate_set(RUN_STATE_RUNNING);
2157 vm_state_notify(1, RUN_STATE_RUNNING);
2158 return 0;
2d76e823
CI
2159}
2160
2161void vm_start(void)
2162{
2163 if (!vm_prepare_start()) {
2164 resume_all_vcpus();
2165 }
2166}
2167
8a9236f1
LC
2168/* does a state transition even if the VM is already stopped,
2169 current state is forgotten forever */
56983463 2170int vm_stop_force_state(RunState state)
8a9236f1
LC
2171{
2172 if (runstate_is_running()) {
56983463 2173 return vm_stop(state);
8a9236f1
LC
2174 } else {
2175 runstate_set(state);
b2780d32
WC
2176
2177 bdrv_drain_all();
594a45ce
KW
2178 /* Make sure to return an error if the flush in a previous vm_stop()
2179 * failed. */
22af08ea 2180 return bdrv_flush_all();
8a9236f1
LC
2181 }
2182}
2183
0442428a 2184void list_cpus(const char *optarg)
262353cb
BS
2185{
2186 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8 2187#if defined(cpu_list)
0442428a 2188 cpu_list();
262353cb
BS
2189#endif
2190}
de0b36b6 2191
0cfd6a9a
LC
2192void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2193 bool has_cpu, int64_t cpu_index, Error **errp)
2194{
2195 FILE *f;
2196 uint32_t l;
55e5c285 2197 CPUState *cpu;
0cfd6a9a 2198 uint8_t buf[1024];
0dc9daf0 2199 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
2200
2201 if (!has_cpu) {
2202 cpu_index = 0;
2203 }
2204
151d1322
AF
2205 cpu = qemu_get_cpu(cpu_index);
2206 if (cpu == NULL) {
c6bd8c70
MA
2207 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2208 "a CPU number");
0cfd6a9a
LC
2209 return;
2210 }
2211
2212 f = fopen(filename, "wb");
2213 if (!f) {
618da851 2214 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
2215 return;
2216 }
2217
2218 while (size != 0) {
2219 l = sizeof(buf);
2220 if (l > size)
2221 l = size;
2f4d0f59 2222 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
2223 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2224 " specified", orig_addr, orig_size);
2f4d0f59
AK
2225 goto exit;
2226 }
0cfd6a9a 2227 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2228 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
2229 goto exit;
2230 }
2231 addr += l;
2232 size -= l;
2233 }
2234
2235exit:
2236 fclose(f);
2237}
6d3962bf
LC
2238
2239void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2240 Error **errp)
2241{
2242 FILE *f;
2243 uint32_t l;
2244 uint8_t buf[1024];
2245
2246 f = fopen(filename, "wb");
2247 if (!f) {
618da851 2248 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
2249 return;
2250 }
2251
2252 while (size != 0) {
2253 l = sizeof(buf);
2254 if (l > size)
2255 l = size;
eb6282f2 2256 cpu_physical_memory_read(addr, buf, l);
6d3962bf 2257 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 2258 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
2259 goto exit;
2260 }
2261 addr += l;
2262 size -= l;
2263 }
2264
2265exit:
2266 fclose(f);
2267}
ab49ab5c
LC
2268
2269void qmp_inject_nmi(Error **errp)
2270{
9cb805fd 2271 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 2272}
27498bef 2273
76c86615 2274void dump_drift_info(void)
27498bef
ST
2275{
2276 if (!use_icount) {
2277 return;
2278 }
2279
76c86615 2280 qemu_printf("Host - Guest clock %"PRIi64" ms\n",
27498bef
ST
2281 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2282 if (icount_align_option) {
76c86615
MA
2283 qemu_printf("Max guest delay %"PRIi64" ms\n",
2284 -max_delay / SCALE_MS);
2285 qemu_printf("Max guest advance %"PRIi64" ms\n",
2286 max_advance / SCALE_MS);
27498bef 2287 } else {
76c86615
MA
2288 qemu_printf("Max guest delay NA\n");
2289 qemu_printf("Max guest advance NA\n");
27498bef
ST
2290 }
2291}
This page took 0.979198 seconds and 4 git commands to generate.