]> Git Repo - qemu.git/blame - cpus.c
cpu: Reclaim vCPU objects
[qemu.git] / cpus.c
CommitLineData
296af7c9
BS
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25/* Needed early for CONFIG_BSD etc. */
7b31bbc2 26#include "qemu/osdep.h"
33c11879
PB
27#include "qemu-common.h"
28#include "cpu.h"
83c9089e 29#include "monitor/monitor.h"
a4e15de9 30#include "qapi/qmp/qerror.h"
d49b6836 31#include "qemu/error-report.h"
9c17d615 32#include "sysemu/sysemu.h"
da31d594 33#include "sysemu/block-backend.h"
022c62cb 34#include "exec/gdbstub.h"
9c17d615
PB
35#include "sysemu/dma.h"
36#include "sysemu/kvm.h"
de0b36b6 37#include "qmp-commands.h"
63c91552 38#include "exec/exec-all.h"
296af7c9 39
1de7afc9 40#include "qemu/thread.h"
9c17d615
PB
41#include "sysemu/cpus.h"
42#include "sysemu/qtest.h"
1de7afc9
PB
43#include "qemu/main-loop.h"
44#include "qemu/bitmap.h"
cb365646 45#include "qemu/seqlock.h"
a4e15de9 46#include "qapi-event.h"
9cb805fd 47#include "hw/nmi.h"
8b427044 48#include "sysemu/replay.h"
0ff0fc19
JK
49
50#ifndef _WIN32
1de7afc9 51#include "qemu/compatfd.h"
0ff0fc19 52#endif
296af7c9 53
6d9cb73c
JK
54#ifdef CONFIG_LINUX
55
56#include <sys/prctl.h>
57
c0532a76
MT
58#ifndef PR_MCE_KILL
59#define PR_MCE_KILL 33
60#endif
61
6d9cb73c
JK
62#ifndef PR_MCE_KILL_SET
63#define PR_MCE_KILL_SET 1
64#endif
65
66#ifndef PR_MCE_KILL_EARLY
67#define PR_MCE_KILL_EARLY 1
68#endif
69
70#endif /* CONFIG_LINUX */
71
182735ef 72static CPUState *next_cpu;
27498bef
ST
73int64_t max_delay;
74int64_t max_advance;
296af7c9 75
2adcc85d
JH
76/* vcpu throttling controls */
77static QEMUTimer *throttle_timer;
78static unsigned int throttle_percentage;
79
80#define CPU_THROTTLE_PCT_MIN 1
81#define CPU_THROTTLE_PCT_MAX 99
82#define CPU_THROTTLE_TIMESLICE_NS 10000000
83
321bc0b2
TC
84bool cpu_is_stopped(CPUState *cpu)
85{
86 return cpu->stopped || !runstate_is_running();
87}
88
a98ae1d8 89static bool cpu_thread_is_idle(CPUState *cpu)
ac873f1e 90{
c64ca814 91 if (cpu->stop || cpu->queued_work_first) {
ac873f1e
PM
92 return false;
93 }
321bc0b2 94 if (cpu_is_stopped(cpu)) {
ac873f1e
PM
95 return true;
96 }
8c2e1b00 97 if (!cpu->halted || cpu_has_work(cpu) ||
215e79c0 98 kvm_halt_in_kernel()) {
ac873f1e
PM
99 return false;
100 }
101 return true;
102}
103
104static bool all_cpu_threads_idle(void)
105{
182735ef 106 CPUState *cpu;
ac873f1e 107
bdc44640 108 CPU_FOREACH(cpu) {
182735ef 109 if (!cpu_thread_is_idle(cpu)) {
ac873f1e
PM
110 return false;
111 }
112 }
113 return true;
114}
115
946fb27c
PB
116/***********************************************************/
117/* guest cycle counter */
118
a3270e19
PB
119/* Protected by TimersState seqlock */
120
5045e9d9 121static bool icount_sleep = true;
71468395 122static int64_t vm_clock_warp_start = -1;
946fb27c
PB
123/* Conversion factor from emulated instructions to virtual clock ticks. */
124static int icount_time_shift;
125/* Arbitrarily pick 1MIPS as the minimum allowable speed. */
126#define MAX_ICOUNT_SHIFT 10
a3270e19 127
946fb27c
PB
128static QEMUTimer *icount_rt_timer;
129static QEMUTimer *icount_vm_timer;
130static QEMUTimer *icount_warp_timer;
946fb27c
PB
131
132typedef struct TimersState {
cb365646 133 /* Protected by BQL. */
946fb27c
PB
134 int64_t cpu_ticks_prev;
135 int64_t cpu_ticks_offset;
cb365646
LPF
136
137 /* cpu_clock_offset can be read out of BQL, so protect it with
138 * this lock.
139 */
140 QemuSeqLock vm_clock_seqlock;
946fb27c
PB
141 int64_t cpu_clock_offset;
142 int32_t cpu_ticks_enabled;
143 int64_t dummy;
c96778bb
FK
144
145 /* Compensate for varying guest execution speed. */
146 int64_t qemu_icount_bias;
147 /* Only written by TCG thread */
148 int64_t qemu_icount;
946fb27c
PB
149} TimersState;
150
d9cd4007 151static TimersState timers_state;
946fb27c 152
2a62914b 153int64_t cpu_get_icount_raw(void)
946fb27c
PB
154{
155 int64_t icount;
4917cf44 156 CPUState *cpu = current_cpu;
946fb27c 157
c96778bb 158 icount = timers_state.qemu_icount;
4917cf44 159 if (cpu) {
414b15c9 160 if (!cpu->can_do_io) {
2a62914b
PD
161 fprintf(stderr, "Bad icount read\n");
162 exit(1);
946fb27c 163 }
28ecfd7a 164 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
946fb27c 165 }
2a62914b
PD
166 return icount;
167}
168
169/* Return the virtual CPU time, based on the instruction counter. */
170static int64_t cpu_get_icount_locked(void)
171{
172 int64_t icount = cpu_get_icount_raw();
3f031313 173 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
946fb27c
PB
174}
175
17a15f1b
PB
176int64_t cpu_get_icount(void)
177{
178 int64_t icount;
179 unsigned start;
180
181 do {
182 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
183 icount = cpu_get_icount_locked();
184 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
185
186 return icount;
187}
188
3f031313
FK
189int64_t cpu_icount_to_ns(int64_t icount)
190{
191 return icount << icount_time_shift;
192}
193
946fb27c 194/* return the host CPU cycle counter and handle stop/restart */
cb365646 195/* Caller must hold the BQL */
946fb27c
PB
196int64_t cpu_get_ticks(void)
197{
5f3e3101
PB
198 int64_t ticks;
199
946fb27c
PB
200 if (use_icount) {
201 return cpu_get_icount();
202 }
5f3e3101
PB
203
204 ticks = timers_state.cpu_ticks_offset;
205 if (timers_state.cpu_ticks_enabled) {
4a7428c5 206 ticks += cpu_get_host_ticks();
5f3e3101
PB
207 }
208
209 if (timers_state.cpu_ticks_prev > ticks) {
210 /* Note: non increasing ticks may happen if the host uses
211 software suspend */
212 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
213 ticks = timers_state.cpu_ticks_prev;
946fb27c 214 }
5f3e3101
PB
215
216 timers_state.cpu_ticks_prev = ticks;
217 return ticks;
946fb27c
PB
218}
219
cb365646 220static int64_t cpu_get_clock_locked(void)
946fb27c 221{
5f3e3101 222 int64_t ticks;
cb365646 223
5f3e3101
PB
224 ticks = timers_state.cpu_clock_offset;
225 if (timers_state.cpu_ticks_enabled) {
226 ticks += get_clock();
946fb27c 227 }
cb365646 228
5f3e3101 229 return ticks;
cb365646
LPF
230}
231
232/* return the host CPU monotonic timer and handle stop/restart */
233int64_t cpu_get_clock(void)
234{
235 int64_t ti;
236 unsigned start;
237
238 do {
239 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
240 ti = cpu_get_clock_locked();
241 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
242
243 return ti;
946fb27c
PB
244}
245
cb365646
LPF
246/* enable cpu_get_ticks()
247 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
248 */
946fb27c
PB
249void cpu_enable_ticks(void)
250{
cb365646
LPF
251 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
252 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 253 if (!timers_state.cpu_ticks_enabled) {
4a7428c5 254 timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
946fb27c
PB
255 timers_state.cpu_clock_offset -= get_clock();
256 timers_state.cpu_ticks_enabled = 1;
257 }
cb365646 258 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
259}
260
261/* disable cpu_get_ticks() : the clock is stopped. You must not call
cb365646
LPF
262 * cpu_get_ticks() after that.
263 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
264 */
946fb27c
PB
265void cpu_disable_ticks(void)
266{
cb365646
LPF
267 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
268 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 269 if (timers_state.cpu_ticks_enabled) {
4a7428c5 270 timers_state.cpu_ticks_offset += cpu_get_host_ticks();
cb365646 271 timers_state.cpu_clock_offset = cpu_get_clock_locked();
946fb27c
PB
272 timers_state.cpu_ticks_enabled = 0;
273 }
cb365646 274 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
275}
276
277/* Correlation between real and virtual time is always going to be
278 fairly approximate, so ignore small variation.
279 When the guest is idle real and virtual time will be aligned in
280 the IO wait loop. */
73bcb24d 281#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
946fb27c
PB
282
283static void icount_adjust(void)
284{
285 int64_t cur_time;
286 int64_t cur_icount;
287 int64_t delta;
a3270e19
PB
288
289 /* Protected by TimersState mutex. */
946fb27c 290 static int64_t last_delta;
468cc7cf 291
946fb27c
PB
292 /* If the VM is not running, then do nothing. */
293 if (!runstate_is_running()) {
294 return;
295 }
468cc7cf 296
17a15f1b
PB
297 seqlock_write_lock(&timers_state.vm_clock_seqlock);
298 cur_time = cpu_get_clock_locked();
299 cur_icount = cpu_get_icount_locked();
468cc7cf 300
946fb27c
PB
301 delta = cur_icount - cur_time;
302 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
303 if (delta > 0
304 && last_delta + ICOUNT_WOBBLE < delta * 2
305 && icount_time_shift > 0) {
306 /* The guest is getting too far ahead. Slow time down. */
307 icount_time_shift--;
308 }
309 if (delta < 0
310 && last_delta - ICOUNT_WOBBLE > delta * 2
311 && icount_time_shift < MAX_ICOUNT_SHIFT) {
312 /* The guest is getting too far behind. Speed time up. */
313 icount_time_shift++;
314 }
315 last_delta = delta;
c96778bb
FK
316 timers_state.qemu_icount_bias = cur_icount
317 - (timers_state.qemu_icount << icount_time_shift);
17a15f1b 318 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
946fb27c
PB
319}
320
321static void icount_adjust_rt(void *opaque)
322{
40daca54 323 timer_mod(icount_rt_timer,
1979b908 324 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
946fb27c
PB
325 icount_adjust();
326}
327
328static void icount_adjust_vm(void *opaque)
329{
40daca54
AB
330 timer_mod(icount_vm_timer,
331 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 332 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
333 icount_adjust();
334}
335
336static int64_t qemu_icount_round(int64_t count)
337{
338 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
339}
340
efab87cf 341static void icount_warp_rt(void)
946fb27c 342{
ccffff48
AB
343 unsigned seq;
344 int64_t warp_start;
345
17a15f1b
PB
346 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
347 * changes from -1 to another value, so the race here is okay.
348 */
ccffff48
AB
349 do {
350 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
351 warp_start = vm_clock_warp_start;
352 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
353
354 if (warp_start == -1) {
946fb27c
PB
355 return;
356 }
357
17a15f1b 358 seqlock_write_lock(&timers_state.vm_clock_seqlock);
946fb27c 359 if (runstate_is_running()) {
8eda206e
PD
360 int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
361 cpu_get_clock_locked());
8ed961d9
PB
362 int64_t warp_delta;
363
364 warp_delta = clock - vm_clock_warp_start;
365 if (use_icount == 2) {
946fb27c 366 /*
40daca54 367 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
946fb27c
PB
368 * far ahead of real time.
369 */
17a15f1b 370 int64_t cur_icount = cpu_get_icount_locked();
bf2a7ddb 371 int64_t delta = clock - cur_icount;
8ed961d9 372 warp_delta = MIN(warp_delta, delta);
946fb27c 373 }
c96778bb 374 timers_state.qemu_icount_bias += warp_delta;
946fb27c
PB
375 }
376 vm_clock_warp_start = -1;
17a15f1b 377 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
8ed961d9
PB
378
379 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
380 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
381 }
946fb27c
PB
382}
383
e76d1798 384static void icount_timer_cb(void *opaque)
efab87cf 385{
e76d1798
PD
386 /* No need for a checkpoint because the timer already synchronizes
387 * with CHECKPOINT_CLOCK_VIRTUAL_RT.
388 */
389 icount_warp_rt();
efab87cf
PD
390}
391
8156be56
PB
392void qtest_clock_warp(int64_t dest)
393{
40daca54 394 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
efef88b3 395 AioContext *aio_context;
8156be56 396 assert(qtest_enabled());
efef88b3 397 aio_context = qemu_get_aio_context();
8156be56 398 while (clock < dest) {
40daca54 399 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
c9299e2f 400 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
efef88b3 401
17a15f1b 402 seqlock_write_lock(&timers_state.vm_clock_seqlock);
c96778bb 403 timers_state.qemu_icount_bias += warp;
17a15f1b
PB
404 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
405
40daca54 406 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
efef88b3 407 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
40daca54 408 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
8156be56 409 }
40daca54 410 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
8156be56
PB
411}
412
e76d1798 413void qemu_start_warp_timer(void)
946fb27c 414{
ce78d18c 415 int64_t clock;
946fb27c
PB
416 int64_t deadline;
417
e76d1798 418 if (!use_icount) {
946fb27c
PB
419 return;
420 }
421
8bd7f71d
PD
422 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
423 * do not fire, so computing the deadline does not make sense.
424 */
425 if (!runstate_is_running()) {
426 return;
427 }
428
429 /* warp clock deterministically in record/replay mode */
e76d1798 430 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
8bd7f71d
PD
431 return;
432 }
433
ce78d18c 434 if (!all_cpu_threads_idle()) {
946fb27c
PB
435 return;
436 }
437
8156be56
PB
438 if (qtest_enabled()) {
439 /* When testing, qtest commands advance icount. */
e76d1798 440 return;
8156be56
PB
441 }
442
ac70aafc 443 /* We want to use the earliest deadline from ALL vm_clocks */
bf2a7ddb 444 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
40daca54 445 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ce78d18c 446 if (deadline < 0) {
d7a0f71d
VC
447 static bool notified;
448 if (!icount_sleep && !notified) {
449 error_report("WARNING: icount sleep disabled and no active timers");
450 notified = true;
451 }
ce78d18c 452 return;
ac70aafc
AB
453 }
454
946fb27c
PB
455 if (deadline > 0) {
456 /*
40daca54 457 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
946fb27c
PB
458 * sleep. Otherwise, the CPU might be waiting for a future timer
459 * interrupt to wake it up, but the interrupt never comes because
460 * the vCPU isn't running any insns and thus doesn't advance the
40daca54 461 * QEMU_CLOCK_VIRTUAL.
946fb27c 462 */
5045e9d9
VC
463 if (!icount_sleep) {
464 /*
465 * We never let VCPUs sleep in no sleep icount mode.
466 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
467 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
468 * It is useful when we want a deterministic execution time,
469 * isolated from host latencies.
470 */
471 seqlock_write_lock(&timers_state.vm_clock_seqlock);
472 timers_state.qemu_icount_bias += deadline;
473 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
474 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
475 } else {
476 /*
477 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
478 * "real" time, (related to the time left until the next event) has
479 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
480 * This avoids that the warps are visible externally; for example,
481 * you will not be sending network packets continuously instead of
482 * every 100ms.
483 */
484 seqlock_write_lock(&timers_state.vm_clock_seqlock);
485 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
486 vm_clock_warp_start = clock;
487 }
488 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
489 timer_mod_anticipate(icount_warp_timer, clock + deadline);
ce78d18c 490 }
ac70aafc 491 } else if (deadline == 0) {
40daca54 492 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
946fb27c
PB
493 }
494}
495
e76d1798
PD
496static void qemu_account_warp_timer(void)
497{
498 if (!use_icount || !icount_sleep) {
499 return;
500 }
501
502 /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
503 * do not fire, so computing the deadline does not make sense.
504 */
505 if (!runstate_is_running()) {
506 return;
507 }
508
509 /* warp clock deterministically in record/replay mode */
510 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
511 return;
512 }
513
514 timer_del(icount_warp_timer);
515 icount_warp_rt();
516}
517
d09eae37
FK
518static bool icount_state_needed(void *opaque)
519{
520 return use_icount;
521}
522
523/*
524 * This is a subsection for icount migration.
525 */
526static const VMStateDescription icount_vmstate_timers = {
527 .name = "timer/icount",
528 .version_id = 1,
529 .minimum_version_id = 1,
5cd8cada 530 .needed = icount_state_needed,
d09eae37
FK
531 .fields = (VMStateField[]) {
532 VMSTATE_INT64(qemu_icount_bias, TimersState),
533 VMSTATE_INT64(qemu_icount, TimersState),
534 VMSTATE_END_OF_LIST()
535 }
536};
537
946fb27c
PB
538static const VMStateDescription vmstate_timers = {
539 .name = "timer",
540 .version_id = 2,
541 .minimum_version_id = 1,
35d08458 542 .fields = (VMStateField[]) {
946fb27c
PB
543 VMSTATE_INT64(cpu_ticks_offset, TimersState),
544 VMSTATE_INT64(dummy, TimersState),
545 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
546 VMSTATE_END_OF_LIST()
d09eae37 547 },
5cd8cada
JQ
548 .subsections = (const VMStateDescription*[]) {
549 &icount_vmstate_timers,
550 NULL
946fb27c
PB
551 }
552};
553
2adcc85d
JH
554static void cpu_throttle_thread(void *opaque)
555{
556 CPUState *cpu = opaque;
557 double pct;
558 double throttle_ratio;
559 long sleeptime_ns;
560
561 if (!cpu_throttle_get_percentage()) {
562 return;
563 }
564
565 pct = (double)cpu_throttle_get_percentage()/100;
566 throttle_ratio = pct / (1 - pct);
567 sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
568
569 qemu_mutex_unlock_iothread();
570 atomic_set(&cpu->throttle_thread_scheduled, 0);
571 g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
572 qemu_mutex_lock_iothread();
573}
574
575static void cpu_throttle_timer_tick(void *opaque)
576{
577 CPUState *cpu;
578 double pct;
579
580 /* Stop the timer if needed */
581 if (!cpu_throttle_get_percentage()) {
582 return;
583 }
584 CPU_FOREACH(cpu) {
585 if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
586 async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
587 }
588 }
589
590 pct = (double)cpu_throttle_get_percentage()/100;
591 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
592 CPU_THROTTLE_TIMESLICE_NS / (1-pct));
593}
594
595void cpu_throttle_set(int new_throttle_pct)
596{
597 /* Ensure throttle percentage is within valid range */
598 new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
599 new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
600
601 atomic_set(&throttle_percentage, new_throttle_pct);
602
603 timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
604 CPU_THROTTLE_TIMESLICE_NS);
605}
606
607void cpu_throttle_stop(void)
608{
609 atomic_set(&throttle_percentage, 0);
610}
611
612bool cpu_throttle_active(void)
613{
614 return (cpu_throttle_get_percentage() != 0);
615}
616
617int cpu_throttle_get_percentage(void)
618{
619 return atomic_read(&throttle_percentage);
620}
621
4603ea01
PD
622void cpu_ticks_init(void)
623{
624 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
625 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
2adcc85d
JH
626 throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
627 cpu_throttle_timer_tick, NULL);
4603ea01
PD
628}
629
1ad9580b 630void configure_icount(QemuOpts *opts, Error **errp)
946fb27c 631{
1ad9580b 632 const char *option;
a8bfac37 633 char *rem_str = NULL;
1ad9580b 634
1ad9580b 635 option = qemu_opt_get(opts, "shift");
946fb27c 636 if (!option) {
a8bfac37
ST
637 if (qemu_opt_get(opts, "align") != NULL) {
638 error_setg(errp, "Please specify shift option when using align");
639 }
946fb27c
PB
640 return;
641 }
f1f4b57e
VC
642
643 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
5045e9d9
VC
644 if (icount_sleep) {
645 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
e76d1798 646 icount_timer_cb, NULL);
5045e9d9 647 }
f1f4b57e 648
a8bfac37 649 icount_align_option = qemu_opt_get_bool(opts, "align", false);
f1f4b57e
VC
650
651 if (icount_align_option && !icount_sleep) {
778d9f9b 652 error_setg(errp, "align=on and sleep=off are incompatible");
f1f4b57e 653 }
946fb27c 654 if (strcmp(option, "auto") != 0) {
a8bfac37
ST
655 errno = 0;
656 icount_time_shift = strtol(option, &rem_str, 0);
657 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
658 error_setg(errp, "icount: Invalid shift value");
659 }
946fb27c
PB
660 use_icount = 1;
661 return;
a8bfac37
ST
662 } else if (icount_align_option) {
663 error_setg(errp, "shift=auto and align=on are incompatible");
f1f4b57e 664 } else if (!icount_sleep) {
778d9f9b 665 error_setg(errp, "shift=auto and sleep=off are incompatible");
946fb27c
PB
666 }
667
668 use_icount = 2;
669
670 /* 125MIPS seems a reasonable initial guess at the guest speed.
671 It will be corrected fairly quickly anyway. */
672 icount_time_shift = 3;
673
674 /* Have both realtime and virtual time triggers for speed adjustment.
675 The realtime trigger catches emulated time passing too slowly,
676 the virtual time trigger catches emulated time passing too fast.
677 Realtime triggers occur even when idle, so use them less frequently
678 than VM triggers. */
bf2a7ddb
PD
679 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
680 icount_adjust_rt, NULL);
40daca54 681 timer_mod(icount_rt_timer,
bf2a7ddb 682 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
40daca54
AB
683 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
684 icount_adjust_vm, NULL);
685 timer_mod(icount_vm_timer,
686 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
73bcb24d 687 NANOSECONDS_PER_SECOND / 10);
946fb27c
PB
688}
689
296af7c9
BS
690/***********************************************************/
691void hw_error(const char *fmt, ...)
692{
693 va_list ap;
55e5c285 694 CPUState *cpu;
296af7c9
BS
695
696 va_start(ap, fmt);
697 fprintf(stderr, "qemu: hardware error: ");
698 vfprintf(stderr, fmt, ap);
699 fprintf(stderr, "\n");
bdc44640 700 CPU_FOREACH(cpu) {
55e5c285 701 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
878096ee 702 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
296af7c9
BS
703 }
704 va_end(ap);
705 abort();
706}
707
708void cpu_synchronize_all_states(void)
709{
182735ef 710 CPUState *cpu;
296af7c9 711
bdc44640 712 CPU_FOREACH(cpu) {
182735ef 713 cpu_synchronize_state(cpu);
296af7c9
BS
714 }
715}
716
717void cpu_synchronize_all_post_reset(void)
718{
182735ef 719 CPUState *cpu;
296af7c9 720
bdc44640 721 CPU_FOREACH(cpu) {
182735ef 722 cpu_synchronize_post_reset(cpu);
296af7c9
BS
723 }
724}
725
726void cpu_synchronize_all_post_init(void)
727{
182735ef 728 CPUState *cpu;
296af7c9 729
bdc44640 730 CPU_FOREACH(cpu) {
182735ef 731 cpu_synchronize_post_init(cpu);
296af7c9
BS
732 }
733}
734
56983463 735static int do_vm_stop(RunState state)
296af7c9 736{
56983463
KW
737 int ret = 0;
738
1354869c 739 if (runstate_is_running()) {
296af7c9 740 cpu_disable_ticks();
296af7c9 741 pause_all_vcpus();
f5bbfba1 742 runstate_set(state);
1dfb4dd9 743 vm_state_notify(0, state);
a4e15de9 744 qapi_event_send_stop(&error_abort);
296af7c9 745 }
56983463 746
594a45ce 747 bdrv_drain_all();
da31d594 748 ret = blk_flush_all();
594a45ce 749
56983463 750 return ret;
296af7c9
BS
751}
752
a1fcaa73 753static bool cpu_can_run(CPUState *cpu)
296af7c9 754{
4fdeee7c 755 if (cpu->stop) {
a1fcaa73 756 return false;
0ab07c62 757 }
321bc0b2 758 if (cpu_is_stopped(cpu)) {
a1fcaa73 759 return false;
0ab07c62 760 }
a1fcaa73 761 return true;
296af7c9
BS
762}
763
91325046 764static void cpu_handle_guest_debug(CPUState *cpu)
83f338f7 765{
64f6b346 766 gdb_set_stop_cpu(cpu);
8cf71710 767 qemu_system_debug_request();
f324e766 768 cpu->stopped = true;
3c638d06
JK
769}
770
6d9cb73c
JK
771#ifdef CONFIG_LINUX
772static void sigbus_reraise(void)
773{
774 sigset_t set;
775 struct sigaction action;
776
777 memset(&action, 0, sizeof(action));
778 action.sa_handler = SIG_DFL;
779 if (!sigaction(SIGBUS, &action, NULL)) {
780 raise(SIGBUS);
781 sigemptyset(&set);
782 sigaddset(&set, SIGBUS);
a2d1761d 783 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
6d9cb73c
JK
784 }
785 perror("Failed to re-raise SIGBUS!\n");
786 abort();
787}
788
789static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
790 void *ctx)
791{
792 if (kvm_on_sigbus(siginfo->ssi_code,
793 (void *)(intptr_t)siginfo->ssi_addr)) {
794 sigbus_reraise();
795 }
796}
797
798static void qemu_init_sigbus(void)
799{
800 struct sigaction action;
801
802 memset(&action, 0, sizeof(action));
803 action.sa_flags = SA_SIGINFO;
804 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
805 sigaction(SIGBUS, &action, NULL);
806
807 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
808}
809
290adf38 810static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
811{
812 struct timespec ts = { 0, 0 };
813 siginfo_t siginfo;
814 sigset_t waitset;
815 sigset_t chkset;
816 int r;
817
818 sigemptyset(&waitset);
819 sigaddset(&waitset, SIG_IPI);
820 sigaddset(&waitset, SIGBUS);
821
822 do {
823 r = sigtimedwait(&waitset, &siginfo, &ts);
824 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
825 perror("sigtimedwait");
826 exit(1);
827 }
828
829 switch (r) {
830 case SIGBUS:
290adf38 831 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
1ab3c6c0
JK
832 sigbus_reraise();
833 }
834 break;
835 default:
836 break;
837 }
838
839 r = sigpending(&chkset);
840 if (r == -1) {
841 perror("sigpending");
842 exit(1);
843 }
844 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
1ab3c6c0
JK
845}
846
6d9cb73c
JK
847#else /* !CONFIG_LINUX */
848
849static void qemu_init_sigbus(void)
850{
851}
1ab3c6c0 852
290adf38 853static void qemu_kvm_eat_signals(CPUState *cpu)
1ab3c6c0
JK
854{
855}
6d9cb73c
JK
856#endif /* !CONFIG_LINUX */
857
296af7c9 858#ifndef _WIN32
55f8d6ac
JK
859static void dummy_signal(int sig)
860{
861}
55f8d6ac 862
13618e05 863static void qemu_kvm_init_cpu_signals(CPUState *cpu)
714bd040
PB
864{
865 int r;
866 sigset_t set;
867 struct sigaction sigact;
868
869 memset(&sigact, 0, sizeof(sigact));
870 sigact.sa_handler = dummy_signal;
871 sigaction(SIG_IPI, &sigact, NULL);
872
714bd040
PB
873 pthread_sigmask(SIG_BLOCK, NULL, &set);
874 sigdelset(&set, SIG_IPI);
714bd040 875 sigdelset(&set, SIGBUS);
491d6e80 876 r = kvm_set_signal_mask(cpu, &set);
714bd040
PB
877 if (r) {
878 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
879 exit(1);
880 }
881}
882
55f8d6ac 883#else /* _WIN32 */
13618e05 884static void qemu_kvm_init_cpu_signals(CPUState *cpu)
ff48eb5f 885{
714bd040
PB
886 abort();
887}
714bd040 888#endif /* _WIN32 */
ff48eb5f 889
b2532d88 890static QemuMutex qemu_global_mutex;
46daff13 891static QemuCond qemu_io_proceeded_cond;
6b49809c 892static unsigned iothread_requesting_mutex;
296af7c9
BS
893
894static QemuThread io_thread;
895
296af7c9
BS
896/* cpu creation */
897static QemuCond qemu_cpu_cond;
898/* system init */
296af7c9 899static QemuCond qemu_pause_cond;
e82bcec2 900static QemuCond qemu_work_cond;
296af7c9 901
d3b12f5d 902void qemu_init_cpu_loop(void)
296af7c9 903{
6d9cb73c 904 qemu_init_sigbus();
ed94592b 905 qemu_cond_init(&qemu_cpu_cond);
ed94592b
AL
906 qemu_cond_init(&qemu_pause_cond);
907 qemu_cond_init(&qemu_work_cond);
46daff13 908 qemu_cond_init(&qemu_io_proceeded_cond);
296af7c9 909 qemu_mutex_init(&qemu_global_mutex);
296af7c9 910
b7680cb6 911 qemu_thread_get_self(&io_thread);
296af7c9
BS
912}
913
f100f0b3 914void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
e82bcec2
MT
915{
916 struct qemu_work_item wi;
917
60e82579 918 if (qemu_cpu_is_self(cpu)) {
e82bcec2
MT
919 func(data);
920 return;
921 }
922
923 wi.func = func;
924 wi.data = data;
3c02270d 925 wi.free = false;
376692b9
PB
926
927 qemu_mutex_lock(&cpu->work_mutex);
c64ca814
AF
928 if (cpu->queued_work_first == NULL) {
929 cpu->queued_work_first = &wi;
0ab07c62 930 } else {
c64ca814 931 cpu->queued_work_last->next = &wi;
0ab07c62 932 }
c64ca814 933 cpu->queued_work_last = &wi;
e82bcec2
MT
934 wi.next = NULL;
935 wi.done = false;
376692b9 936 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 937
c08d7424 938 qemu_cpu_kick(cpu);
376692b9 939 while (!atomic_mb_read(&wi.done)) {
4917cf44 940 CPUState *self_cpu = current_cpu;
e82bcec2
MT
941
942 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
4917cf44 943 current_cpu = self_cpu;
e82bcec2
MT
944 }
945}
946
3c02270d
CV
947void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
948{
949 struct qemu_work_item *wi;
950
951 if (qemu_cpu_is_self(cpu)) {
952 func(data);
953 return;
954 }
955
956 wi = g_malloc0(sizeof(struct qemu_work_item));
957 wi->func = func;
958 wi->data = data;
959 wi->free = true;
376692b9
PB
960
961 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
962 if (cpu->queued_work_first == NULL) {
963 cpu->queued_work_first = wi;
964 } else {
965 cpu->queued_work_last->next = wi;
966 }
967 cpu->queued_work_last = wi;
968 wi->next = NULL;
969 wi->done = false;
376692b9 970 qemu_mutex_unlock(&cpu->work_mutex);
3c02270d
CV
971
972 qemu_cpu_kick(cpu);
973}
974
4c055ab5
GZ
975static void qemu_kvm_destroy_vcpu(CPUState *cpu)
976{
977 if (kvm_destroy_vcpu(cpu) < 0) {
978 error_report("kvm_destroy_vcpu failed");
979 exit(EXIT_FAILURE);
980 }
981}
982
983static void qemu_tcg_destroy_vcpu(CPUState *cpu)
984{
985}
986
6d45b109 987static void flush_queued_work(CPUState *cpu)
e82bcec2
MT
988{
989 struct qemu_work_item *wi;
990
c64ca814 991 if (cpu->queued_work_first == NULL) {
e82bcec2 992 return;
0ab07c62 993 }
e82bcec2 994
376692b9
PB
995 qemu_mutex_lock(&cpu->work_mutex);
996 while (cpu->queued_work_first != NULL) {
997 wi = cpu->queued_work_first;
c64ca814 998 cpu->queued_work_first = wi->next;
376692b9
PB
999 if (!cpu->queued_work_first) {
1000 cpu->queued_work_last = NULL;
1001 }
1002 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2 1003 wi->func(wi->data);
376692b9 1004 qemu_mutex_lock(&cpu->work_mutex);
3c02270d
CV
1005 if (wi->free) {
1006 g_free(wi);
376692b9
PB
1007 } else {
1008 atomic_mb_set(&wi->done, true);
3c02270d 1009 }
e82bcec2 1010 }
376692b9 1011 qemu_mutex_unlock(&cpu->work_mutex);
e82bcec2
MT
1012 qemu_cond_broadcast(&qemu_work_cond);
1013}
1014
509a0d78 1015static void qemu_wait_io_event_common(CPUState *cpu)
296af7c9 1016{
4fdeee7c
AF
1017 if (cpu->stop) {
1018 cpu->stop = false;
f324e766 1019 cpu->stopped = true;
96bce683 1020 qemu_cond_broadcast(&qemu_pause_cond);
296af7c9 1021 }
6d45b109 1022 flush_queued_work(cpu);
216fc9a4 1023 cpu->thread_kicked = false;
296af7c9
BS
1024}
1025
d5f8d613 1026static void qemu_tcg_wait_io_event(CPUState *cpu)
296af7c9 1027{
16400322 1028 while (all_cpu_threads_idle()) {
d5f8d613 1029 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1030 }
296af7c9 1031
46daff13
PB
1032 while (iothread_requesting_mutex) {
1033 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
1034 }
6cabe1f3 1035
bdc44640 1036 CPU_FOREACH(cpu) {
182735ef 1037 qemu_wait_io_event_common(cpu);
6cabe1f3 1038 }
296af7c9
BS
1039}
1040
fd529e8f 1041static void qemu_kvm_wait_io_event(CPUState *cpu)
296af7c9 1042{
a98ae1d8 1043 while (cpu_thread_is_idle(cpu)) {
f5c121b8 1044 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
16400322 1045 }
296af7c9 1046
290adf38 1047 qemu_kvm_eat_signals(cpu);
509a0d78 1048 qemu_wait_io_event_common(cpu);
296af7c9
BS
1049}
1050
7e97cd88 1051static void *qemu_kvm_cpu_thread_fn(void *arg)
296af7c9 1052{
48a106bd 1053 CPUState *cpu = arg;
84b4915d 1054 int r;
296af7c9 1055
ab28bd23
PB
1056 rcu_register_thread();
1057
2e7f7a3c 1058 qemu_mutex_lock_iothread();
814e612e 1059 qemu_thread_get_self(cpu->thread);
9f09e18a 1060 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1061 cpu->can_do_io = 1;
4917cf44 1062 current_cpu = cpu;
296af7c9 1063
504134d2 1064 r = kvm_init_vcpu(cpu);
84b4915d
JK
1065 if (r < 0) {
1066 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1067 exit(1);
1068 }
296af7c9 1069
13618e05 1070 qemu_kvm_init_cpu_signals(cpu);
296af7c9
BS
1071
1072 /* signal CPU creation */
61a46217 1073 cpu->created = true;
296af7c9
BS
1074 qemu_cond_signal(&qemu_cpu_cond);
1075
4c055ab5 1076 do {
a1fcaa73 1077 if (cpu_can_run(cpu)) {
1458c363 1078 r = kvm_cpu_exec(cpu);
83f338f7 1079 if (r == EXCP_DEBUG) {
91325046 1080 cpu_handle_guest_debug(cpu);
83f338f7 1081 }
0ab07c62 1082 }
fd529e8f 1083 qemu_kvm_wait_io_event(cpu);
4c055ab5 1084 } while (!cpu->unplug || cpu_can_run(cpu));
296af7c9 1085
4c055ab5
GZ
1086 qemu_kvm_destroy_vcpu(cpu);
1087 qemu_mutex_unlock_iothread();
296af7c9
BS
1088 return NULL;
1089}
1090
c7f0f3b1
AL
1091static void *qemu_dummy_cpu_thread_fn(void *arg)
1092{
1093#ifdef _WIN32
1094 fprintf(stderr, "qtest is not supported under Windows\n");
1095 exit(1);
1096#else
10a9021d 1097 CPUState *cpu = arg;
c7f0f3b1
AL
1098 sigset_t waitset;
1099 int r;
1100
ab28bd23
PB
1101 rcu_register_thread();
1102
c7f0f3b1 1103 qemu_mutex_lock_iothread();
814e612e 1104 qemu_thread_get_self(cpu->thread);
9f09e18a 1105 cpu->thread_id = qemu_get_thread_id();
626cf8f4 1106 cpu->can_do_io = 1;
c7f0f3b1
AL
1107
1108 sigemptyset(&waitset);
1109 sigaddset(&waitset, SIG_IPI);
1110
1111 /* signal CPU creation */
61a46217 1112 cpu->created = true;
c7f0f3b1
AL
1113 qemu_cond_signal(&qemu_cpu_cond);
1114
4917cf44 1115 current_cpu = cpu;
c7f0f3b1 1116 while (1) {
4917cf44 1117 current_cpu = NULL;
c7f0f3b1
AL
1118 qemu_mutex_unlock_iothread();
1119 do {
1120 int sig;
1121 r = sigwait(&waitset, &sig);
1122 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1123 if (r == -1) {
1124 perror("sigwait");
1125 exit(1);
1126 }
1127 qemu_mutex_lock_iothread();
4917cf44 1128 current_cpu = cpu;
509a0d78 1129 qemu_wait_io_event_common(cpu);
c7f0f3b1
AL
1130 }
1131
1132 return NULL;
1133#endif
1134}
1135
bdb7ca67
JK
1136static void tcg_exec_all(void);
1137
7e97cd88 1138static void *qemu_tcg_cpu_thread_fn(void *arg)
296af7c9 1139{
c3586ba7 1140 CPUState *cpu = arg;
4c055ab5 1141 CPUState *remove_cpu = NULL;
296af7c9 1142
ab28bd23
PB
1143 rcu_register_thread();
1144
2e7f7a3c 1145 qemu_mutex_lock_iothread();
814e612e 1146 qemu_thread_get_self(cpu->thread);
296af7c9 1147
38fcbd3f
AF
1148 CPU_FOREACH(cpu) {
1149 cpu->thread_id = qemu_get_thread_id();
1150 cpu->created = true;
626cf8f4 1151 cpu->can_do_io = 1;
38fcbd3f 1152 }
296af7c9
BS
1153 qemu_cond_signal(&qemu_cpu_cond);
1154
fa7d1867 1155 /* wait for initial kick-off after machine start */
c28e399c 1156 while (first_cpu->stopped) {
d5f8d613 1157 qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
8e564b4e
JK
1158
1159 /* process any pending work */
bdc44640 1160 CPU_FOREACH(cpu) {
182735ef 1161 qemu_wait_io_event_common(cpu);
8e564b4e 1162 }
0ab07c62 1163 }
296af7c9 1164
21618b3e 1165 /* process any pending work */
aed807c8 1166 atomic_mb_set(&exit_request, 1);
21618b3e 1167
296af7c9 1168 while (1) {
bdb7ca67 1169 tcg_exec_all();
ac70aafc
AB
1170
1171 if (use_icount) {
40daca54 1172 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
ac70aafc
AB
1173
1174 if (deadline == 0) {
40daca54 1175 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
ac70aafc 1176 }
3b2319a3 1177 }
d5f8d613 1178 qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
4c055ab5
GZ
1179 CPU_FOREACH(cpu) {
1180 if (cpu->unplug && !cpu_can_run(cpu)) {
1181 remove_cpu = cpu;
1182 break;
1183 }
1184 }
1185 if (remove_cpu) {
1186 qemu_tcg_destroy_vcpu(remove_cpu);
1187 remove_cpu = NULL;
1188 }
296af7c9
BS
1189 }
1190
1191 return NULL;
1192}
1193
2ff09a40 1194static void qemu_cpu_kick_thread(CPUState *cpu)
cc015e9a
PB
1195{
1196#ifndef _WIN32
1197 int err;
1198
e0c38211
PB
1199 if (cpu->thread_kicked) {
1200 return;
9102deda 1201 }
e0c38211 1202 cpu->thread_kicked = true;
814e612e 1203 err = pthread_kill(cpu->thread->thread, SIG_IPI);
cc015e9a
PB
1204 if (err) {
1205 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1206 exit(1);
1207 }
1208#else /* _WIN32 */
e0c38211
PB
1209 abort();
1210#endif
1211}
ed9164a3 1212
e0c38211
PB
1213static void qemu_cpu_kick_no_halt(void)
1214{
1215 CPUState *cpu;
1216 /* Ensure whatever caused the exit has reached the CPU threads before
1217 * writing exit_request.
1218 */
1219 atomic_mb_set(&exit_request, 1);
1220 cpu = atomic_mb_read(&tcg_current_cpu);
1221 if (cpu) {
1222 cpu_exit(cpu);
cc015e9a 1223 }
cc015e9a
PB
1224}
1225
c08d7424 1226void qemu_cpu_kick(CPUState *cpu)
296af7c9 1227{
f5c121b8 1228 qemu_cond_broadcast(cpu->halt_cond);
e0c38211
PB
1229 if (tcg_enabled()) {
1230 qemu_cpu_kick_no_halt();
1231 } else {
1232 qemu_cpu_kick_thread(cpu);
1233 }
296af7c9
BS
1234}
1235
46d62fac 1236void qemu_cpu_kick_self(void)
296af7c9 1237{
4917cf44 1238 assert(current_cpu);
9102deda 1239 qemu_cpu_kick_thread(current_cpu);
296af7c9
BS
1240}
1241
60e82579 1242bool qemu_cpu_is_self(CPUState *cpu)
296af7c9 1243{
814e612e 1244 return qemu_thread_is_self(cpu->thread);
296af7c9
BS
1245}
1246
79e2b9ae 1247bool qemu_in_vcpu_thread(void)
aa723c23 1248{
4917cf44 1249 return current_cpu && qemu_cpu_is_self(current_cpu);
aa723c23
JQ
1250}
1251
afbe7053
PB
1252static __thread bool iothread_locked = false;
1253
1254bool qemu_mutex_iothread_locked(void)
1255{
1256 return iothread_locked;
1257}
1258
296af7c9
BS
1259void qemu_mutex_lock_iothread(void)
1260{
21618b3e 1261 atomic_inc(&iothread_requesting_mutex);
2e7f7a3c
PB
1262 /* In the simple case there is no need to bump the VCPU thread out of
1263 * TCG code execution.
1264 */
1265 if (!tcg_enabled() || qemu_in_vcpu_thread() ||
46036b24 1266 !first_cpu || !first_cpu->created) {
296af7c9 1267 qemu_mutex_lock(&qemu_global_mutex);
21618b3e 1268 atomic_dec(&iothread_requesting_mutex);
1a28cac3 1269 } else {
1a28cac3 1270 if (qemu_mutex_trylock(&qemu_global_mutex)) {
e0c38211 1271 qemu_cpu_kick_no_halt();
1a28cac3
MT
1272 qemu_mutex_lock(&qemu_global_mutex);
1273 }
6b49809c 1274 atomic_dec(&iothread_requesting_mutex);
46daff13 1275 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1a28cac3 1276 }
afbe7053 1277 iothread_locked = true;
296af7c9
BS
1278}
1279
1280void qemu_mutex_unlock_iothread(void)
1281{
afbe7053 1282 iothread_locked = false;
296af7c9
BS
1283 qemu_mutex_unlock(&qemu_global_mutex);
1284}
1285
1286static int all_vcpus_paused(void)
1287{
bdc44640 1288 CPUState *cpu;
296af7c9 1289
bdc44640 1290 CPU_FOREACH(cpu) {
182735ef 1291 if (!cpu->stopped) {
296af7c9 1292 return 0;
0ab07c62 1293 }
296af7c9
BS
1294 }
1295
1296 return 1;
1297}
1298
1299void pause_all_vcpus(void)
1300{
bdc44640 1301 CPUState *cpu;
296af7c9 1302
40daca54 1303 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
bdc44640 1304 CPU_FOREACH(cpu) {
182735ef
AF
1305 cpu->stop = true;
1306 qemu_cpu_kick(cpu);
296af7c9
BS
1307 }
1308
aa723c23 1309 if (qemu_in_vcpu_thread()) {
d798e974
JK
1310 cpu_stop_current();
1311 if (!kvm_enabled()) {
bdc44640 1312 CPU_FOREACH(cpu) {
182735ef
AF
1313 cpu->stop = false;
1314 cpu->stopped = true;
d798e974
JK
1315 }
1316 return;
1317 }
1318 }
1319
296af7c9 1320 while (!all_vcpus_paused()) {
be7d6c57 1321 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
bdc44640 1322 CPU_FOREACH(cpu) {
182735ef 1323 qemu_cpu_kick(cpu);
296af7c9
BS
1324 }
1325 }
1326}
1327
2993683b
IM
1328void cpu_resume(CPUState *cpu)
1329{
1330 cpu->stop = false;
1331 cpu->stopped = false;
1332 qemu_cpu_kick(cpu);
1333}
1334
296af7c9
BS
1335void resume_all_vcpus(void)
1336{
bdc44640 1337 CPUState *cpu;
296af7c9 1338
40daca54 1339 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
bdc44640 1340 CPU_FOREACH(cpu) {
182735ef 1341 cpu_resume(cpu);
296af7c9
BS
1342 }
1343}
1344
4c055ab5
GZ
1345void cpu_remove(CPUState *cpu)
1346{
1347 cpu->stop = true;
1348 cpu->unplug = true;
1349 qemu_cpu_kick(cpu);
1350}
1351
4900116e
DDAG
1352/* For temporary buffers for forming a name */
1353#define VCPU_THREAD_NAME_SIZE 16
1354
e5ab30a2 1355static void qemu_tcg_init_vcpu(CPUState *cpu)
296af7c9 1356{
4900116e 1357 char thread_name[VCPU_THREAD_NAME_SIZE];
d5f8d613
FK
1358 static QemuCond *tcg_halt_cond;
1359 static QemuThread *tcg_cpu_thread;
4900116e 1360
296af7c9
BS
1361 /* share a single thread for all cpus with TCG */
1362 if (!tcg_cpu_thread) {
814e612e 1363 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1364 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1365 qemu_cond_init(cpu->halt_cond);
1366 tcg_halt_cond = cpu->halt_cond;
4900116e
DDAG
1367 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1368 cpu->cpu_index);
1369 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1370 cpu, QEMU_THREAD_JOINABLE);
1ecf47bf 1371#ifdef _WIN32
814e612e 1372 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1ecf47bf 1373#endif
61a46217 1374 while (!cpu->created) {
18a85728 1375 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1376 }
814e612e 1377 tcg_cpu_thread = cpu->thread;
296af7c9 1378 } else {
814e612e 1379 cpu->thread = tcg_cpu_thread;
f5c121b8 1380 cpu->halt_cond = tcg_halt_cond;
296af7c9
BS
1381 }
1382}
1383
48a106bd 1384static void qemu_kvm_start_vcpu(CPUState *cpu)
296af7c9 1385{
4900116e
DDAG
1386 char thread_name[VCPU_THREAD_NAME_SIZE];
1387
814e612e 1388 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1389 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1390 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1391 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1392 cpu->cpu_index);
1393 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1394 cpu, QEMU_THREAD_JOINABLE);
61a46217 1395 while (!cpu->created) {
18a85728 1396 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
0ab07c62 1397 }
296af7c9
BS
1398}
1399
10a9021d 1400static void qemu_dummy_start_vcpu(CPUState *cpu)
c7f0f3b1 1401{
4900116e
DDAG
1402 char thread_name[VCPU_THREAD_NAME_SIZE];
1403
814e612e 1404 cpu->thread = g_malloc0(sizeof(QemuThread));
f5c121b8
AF
1405 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1406 qemu_cond_init(cpu->halt_cond);
4900116e
DDAG
1407 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1408 cpu->cpu_index);
1409 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
c7f0f3b1 1410 QEMU_THREAD_JOINABLE);
61a46217 1411 while (!cpu->created) {
c7f0f3b1
AL
1412 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1413 }
1414}
1415
c643bed9 1416void qemu_init_vcpu(CPUState *cpu)
296af7c9 1417{
ce3960eb
AF
1418 cpu->nr_cores = smp_cores;
1419 cpu->nr_threads = smp_threads;
f324e766 1420 cpu->stopped = true;
56943e8c
PM
1421
1422 if (!cpu->as) {
1423 /* If the target cpu hasn't set up any address spaces itself,
1424 * give it the default one.
1425 */
6731d864
PC
1426 AddressSpace *as = address_space_init_shareable(cpu->memory,
1427 "cpu-memory");
12ebc9a7 1428 cpu->num_ases = 1;
6731d864 1429 cpu_address_space_init(cpu, as, 0);
56943e8c
PM
1430 }
1431
0ab07c62 1432 if (kvm_enabled()) {
48a106bd 1433 qemu_kvm_start_vcpu(cpu);
c7f0f3b1 1434 } else if (tcg_enabled()) {
e5ab30a2 1435 qemu_tcg_init_vcpu(cpu);
c7f0f3b1 1436 } else {
10a9021d 1437 qemu_dummy_start_vcpu(cpu);
0ab07c62 1438 }
296af7c9
BS
1439}
1440
b4a3d965 1441void cpu_stop_current(void)
296af7c9 1442{
4917cf44
AF
1443 if (current_cpu) {
1444 current_cpu->stop = false;
1445 current_cpu->stopped = true;
1446 cpu_exit(current_cpu);
96bce683 1447 qemu_cond_broadcast(&qemu_pause_cond);
b4a3d965 1448 }
296af7c9
BS
1449}
1450
56983463 1451int vm_stop(RunState state)
296af7c9 1452{
aa723c23 1453 if (qemu_in_vcpu_thread()) {
74892d24 1454 qemu_system_vmstop_request_prepare();
1dfb4dd9 1455 qemu_system_vmstop_request(state);
296af7c9
BS
1456 /*
1457 * FIXME: should not return to device code in case
1458 * vm_stop() has been requested.
1459 */
b4a3d965 1460 cpu_stop_current();
56983463 1461 return 0;
296af7c9 1462 }
56983463
KW
1463
1464 return do_vm_stop(state);
296af7c9
BS
1465}
1466
8a9236f1
LC
1467/* does a state transition even if the VM is already stopped,
1468 current state is forgotten forever */
56983463 1469int vm_stop_force_state(RunState state)
8a9236f1
LC
1470{
1471 if (runstate_is_running()) {
56983463 1472 return vm_stop(state);
8a9236f1
LC
1473 } else {
1474 runstate_set(state);
b2780d32
WC
1475
1476 bdrv_drain_all();
594a45ce
KW
1477 /* Make sure to return an error if the flush in a previous vm_stop()
1478 * failed. */
da31d594 1479 return blk_flush_all();
8a9236f1
LC
1480 }
1481}
1482
8b427044
PD
1483static int64_t tcg_get_icount_limit(void)
1484{
1485 int64_t deadline;
1486
1487 if (replay_mode != REPLAY_MODE_PLAY) {
1488 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1489
1490 /* Maintain prior (possibly buggy) behaviour where if no deadline
1491 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1492 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1493 * nanoseconds.
1494 */
1495 if ((deadline < 0) || (deadline > INT32_MAX)) {
1496 deadline = INT32_MAX;
1497 }
1498
1499 return qemu_icount_round(deadline);
1500 } else {
1501 return replay_get_instructions();
1502 }
1503}
1504
3d57f789 1505static int tcg_cpu_exec(CPUState *cpu)
296af7c9
BS
1506{
1507 int ret;
1508#ifdef CONFIG_PROFILER
1509 int64_t ti;
1510#endif
1511
1512#ifdef CONFIG_PROFILER
1513 ti = profile_getclock();
1514#endif
1515 if (use_icount) {
1516 int64_t count;
1517 int decr;
c96778bb
FK
1518 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1519 + cpu->icount_extra);
28ecfd7a 1520 cpu->icount_decr.u16.low = 0;
efee7340 1521 cpu->icount_extra = 0;
8b427044 1522 count = tcg_get_icount_limit();
c96778bb 1523 timers_state.qemu_icount += count;
296af7c9
BS
1524 decr = (count > 0xffff) ? 0xffff : count;
1525 count -= decr;
28ecfd7a 1526 cpu->icount_decr.u16.low = decr;
efee7340 1527 cpu->icount_extra = count;
296af7c9 1528 }
ea3e9847 1529 ret = cpu_exec(cpu);
296af7c9 1530#ifdef CONFIG_PROFILER
89d5cbdd 1531 tcg_time += profile_getclock() - ti;
296af7c9
BS
1532#endif
1533 if (use_icount) {
1534 /* Fold pending instructions back into the
1535 instruction counter, and clear the interrupt flag. */
c96778bb
FK
1536 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1537 + cpu->icount_extra);
28ecfd7a 1538 cpu->icount_decr.u32 = 0;
efee7340 1539 cpu->icount_extra = 0;
8b427044 1540 replay_account_executed_instructions();
296af7c9
BS
1541 }
1542 return ret;
1543}
1544
bdb7ca67 1545static void tcg_exec_all(void)
296af7c9 1546{
9a36085b
JK
1547 int r;
1548
40daca54 1549 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
e76d1798 1550 qemu_account_warp_timer();
ab33fcda 1551
0ab07c62 1552 if (next_cpu == NULL) {
296af7c9 1553 next_cpu = first_cpu;
0ab07c62 1554 }
bdc44640 1555 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
182735ef 1556 CPUState *cpu = next_cpu;
296af7c9 1557
40daca54 1558 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
ed2803da 1559 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
296af7c9 1560
a1fcaa73 1561 if (cpu_can_run(cpu)) {
3d57f789 1562 r = tcg_cpu_exec(cpu);
9a36085b 1563 if (r == EXCP_DEBUG) {
91325046 1564 cpu_handle_guest_debug(cpu);
3c638d06
JK
1565 break;
1566 }
f324e766 1567 } else if (cpu->stop || cpu->stopped) {
4c055ab5
GZ
1568 if (cpu->unplug) {
1569 next_cpu = CPU_NEXT(cpu);
1570 }
296af7c9
BS
1571 break;
1572 }
1573 }
aed807c8
PB
1574
1575 /* Pairs with smp_wmb in qemu_cpu_kick. */
1576 atomic_mb_set(&exit_request, 0);
296af7c9
BS
1577}
1578
9a78eead 1579void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
262353cb
BS
1580{
1581 /* XXX: implement xxx_cpu_list for targets that still miss it */
e916cbf8
PM
1582#if defined(cpu_list)
1583 cpu_list(f, cpu_fprintf);
262353cb
BS
1584#endif
1585}
de0b36b6
LC
1586
1587CpuInfoList *qmp_query_cpus(Error **errp)
1588{
1589 CpuInfoList *head = NULL, *cur_item = NULL;
182735ef 1590 CPUState *cpu;
de0b36b6 1591
bdc44640 1592 CPU_FOREACH(cpu) {
de0b36b6 1593 CpuInfoList *info;
182735ef
AF
1594#if defined(TARGET_I386)
1595 X86CPU *x86_cpu = X86_CPU(cpu);
1596 CPUX86State *env = &x86_cpu->env;
1597#elif defined(TARGET_PPC)
1598 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1599 CPUPPCState *env = &ppc_cpu->env;
1600#elif defined(TARGET_SPARC)
1601 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1602 CPUSPARCState *env = &sparc_cpu->env;
1603#elif defined(TARGET_MIPS)
1604 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1605 CPUMIPSState *env = &mips_cpu->env;
48e06fe0
BK
1606#elif defined(TARGET_TRICORE)
1607 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1608 CPUTriCoreState *env = &tricore_cpu->env;
182735ef 1609#endif
de0b36b6 1610
cb446eca 1611 cpu_synchronize_state(cpu);
de0b36b6
LC
1612
1613 info = g_malloc0(sizeof(*info));
1614 info->value = g_malloc0(sizeof(*info->value));
55e5c285 1615 info->value->CPU = cpu->cpu_index;
182735ef 1616 info->value->current = (cpu == first_cpu);
259186a7 1617 info->value->halted = cpu->halted;
58f88d4b 1618 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
9f09e18a 1619 info->value->thread_id = cpu->thread_id;
de0b36b6 1620#if defined(TARGET_I386)
86f4b687 1621 info->value->arch = CPU_INFO_ARCH_X86;
544a3731 1622 info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
de0b36b6 1623#elif defined(TARGET_PPC)
86f4b687 1624 info->value->arch = CPU_INFO_ARCH_PPC;
544a3731 1625 info->value->u.ppc.nip = env->nip;
de0b36b6 1626#elif defined(TARGET_SPARC)
86f4b687 1627 info->value->arch = CPU_INFO_ARCH_SPARC;
544a3731
EB
1628 info->value->u.q_sparc.pc = env->pc;
1629 info->value->u.q_sparc.npc = env->npc;
de0b36b6 1630#elif defined(TARGET_MIPS)
86f4b687 1631 info->value->arch = CPU_INFO_ARCH_MIPS;
544a3731 1632 info->value->u.q_mips.PC = env->active_tc.PC;
48e06fe0 1633#elif defined(TARGET_TRICORE)
86f4b687 1634 info->value->arch = CPU_INFO_ARCH_TRICORE;
544a3731 1635 info->value->u.tricore.PC = env->PC;
86f4b687
EB
1636#else
1637 info->value->arch = CPU_INFO_ARCH_OTHER;
de0b36b6
LC
1638#endif
1639
1640 /* XXX: waiting for the qapi to support GSList */
1641 if (!cur_item) {
1642 head = cur_item = info;
1643 } else {
1644 cur_item->next = info;
1645 cur_item = info;
1646 }
1647 }
1648
1649 return head;
1650}
0cfd6a9a
LC
1651
1652void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1653 bool has_cpu, int64_t cpu_index, Error **errp)
1654{
1655 FILE *f;
1656 uint32_t l;
55e5c285 1657 CPUState *cpu;
0cfd6a9a 1658 uint8_t buf[1024];
0dc9daf0 1659 int64_t orig_addr = addr, orig_size = size;
0cfd6a9a
LC
1660
1661 if (!has_cpu) {
1662 cpu_index = 0;
1663 }
1664
151d1322
AF
1665 cpu = qemu_get_cpu(cpu_index);
1666 if (cpu == NULL) {
c6bd8c70
MA
1667 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1668 "a CPU number");
0cfd6a9a
LC
1669 return;
1670 }
1671
1672 f = fopen(filename, "wb");
1673 if (!f) {
618da851 1674 error_setg_file_open(errp, errno, filename);
0cfd6a9a
LC
1675 return;
1676 }
1677
1678 while (size != 0) {
1679 l = sizeof(buf);
1680 if (l > size)
1681 l = size;
2f4d0f59 1682 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
0dc9daf0
BP
1683 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1684 " specified", orig_addr, orig_size);
2f4d0f59
AK
1685 goto exit;
1686 }
0cfd6a9a 1687 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1688 error_setg(errp, QERR_IO_ERROR);
0cfd6a9a
LC
1689 goto exit;
1690 }
1691 addr += l;
1692 size -= l;
1693 }
1694
1695exit:
1696 fclose(f);
1697}
6d3962bf
LC
1698
1699void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1700 Error **errp)
1701{
1702 FILE *f;
1703 uint32_t l;
1704 uint8_t buf[1024];
1705
1706 f = fopen(filename, "wb");
1707 if (!f) {
618da851 1708 error_setg_file_open(errp, errno, filename);
6d3962bf
LC
1709 return;
1710 }
1711
1712 while (size != 0) {
1713 l = sizeof(buf);
1714 if (l > size)
1715 l = size;
eb6282f2 1716 cpu_physical_memory_read(addr, buf, l);
6d3962bf 1717 if (fwrite(buf, 1, l, f) != l) {
c6bd8c70 1718 error_setg(errp, QERR_IO_ERROR);
6d3962bf
LC
1719 goto exit;
1720 }
1721 addr += l;
1722 size -= l;
1723 }
1724
1725exit:
1726 fclose(f);
1727}
ab49ab5c
LC
1728
1729void qmp_inject_nmi(Error **errp)
1730{
9cb805fd 1731 nmi_monitor_handle(monitor_get_cpu_index(), errp);
ab49ab5c 1732}
27498bef
ST
1733
1734void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1735{
1736 if (!use_icount) {
1737 return;
1738 }
1739
1740 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1741 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1742 if (icount_align_option) {
1743 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1744 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1745 } else {
1746 cpu_fprintf(f, "Max guest delay NA\n");
1747 cpu_fprintf(f, "Max guest advance NA\n");
1748 }
1749}
This page took 0.688039 seconds and 4 git commands to generate.