4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
28 #include "monitor/monitor.h"
29 #include "qapi/qmp/qerror.h"
30 #include "sysemu/sysemu.h"
31 #include "exec/gdbstub.h"
32 #include "sysemu/dma.h"
33 #include "sysemu/kvm.h"
34 #include "qmp-commands.h"
36 #include "qemu/thread.h"
37 #include "sysemu/cpus.h"
38 #include "sysemu/qtest.h"
39 #include "qemu/main-loop.h"
40 #include "qemu/bitmap.h"
41 #include "qemu/seqlock.h"
42 #include "qapi-event.h"
46 #include "qemu/compatfd.h"
51 #include <sys/prctl.h>
54 #define PR_MCE_KILL 33
57 #ifndef PR_MCE_KILL_SET
58 #define PR_MCE_KILL_SET 1
61 #ifndef PR_MCE_KILL_EARLY
62 #define PR_MCE_KILL_EARLY 1
65 #endif /* CONFIG_LINUX */
67 static CPUState *next_cpu;
71 bool cpu_is_stopped(CPUState *cpu)
73 return cpu->stopped || !runstate_is_running();
76 static bool cpu_thread_is_idle(CPUState *cpu)
78 if (cpu->stop || cpu->queued_work_first) {
81 if (cpu_is_stopped(cpu)) {
84 if (!cpu->halted || cpu_has_work(cpu) ||
85 kvm_halt_in_kernel()) {
91 static bool all_cpu_threads_idle(void)
96 if (!cpu_thread_is_idle(cpu)) {
103 /***********************************************************/
104 /* guest cycle counter */
106 /* Protected by TimersState seqlock */
108 static bool icount_sleep = true;
109 static int64_t vm_clock_warp_start = -1;
110 /* Conversion factor from emulated instructions to virtual clock ticks. */
111 static int icount_time_shift;
112 /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
113 #define MAX_ICOUNT_SHIFT 10
115 static QEMUTimer *icount_rt_timer;
116 static QEMUTimer *icount_vm_timer;
117 static QEMUTimer *icount_warp_timer;
119 typedef struct TimersState {
120 /* Protected by BQL. */
121 int64_t cpu_ticks_prev;
122 int64_t cpu_ticks_offset;
124 /* cpu_clock_offset can be read out of BQL, so protect it with
127 QemuSeqLock vm_clock_seqlock;
128 int64_t cpu_clock_offset;
129 int32_t cpu_ticks_enabled;
132 /* Compensate for varying guest execution speed. */
133 int64_t qemu_icount_bias;
134 /* Only written by TCG thread */
138 static TimersState timers_state;
140 int64_t cpu_get_icount_raw(void)
143 CPUState *cpu = current_cpu;
145 icount = timers_state.qemu_icount;
147 if (!cpu_can_do_io(cpu)) {
148 fprintf(stderr, "Bad icount read\n");
151 icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
156 /* Return the virtual CPU time, based on the instruction counter. */
157 static int64_t cpu_get_icount_locked(void)
159 int64_t icount = cpu_get_icount_raw();
160 return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
163 int64_t cpu_get_icount(void)
169 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
170 icount = cpu_get_icount_locked();
171 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
176 int64_t cpu_icount_to_ns(int64_t icount)
178 return icount << icount_time_shift;
181 /* return the host CPU cycle counter and handle stop/restart */
182 /* Caller must hold the BQL */
183 int64_t cpu_get_ticks(void)
188 return cpu_get_icount();
191 ticks = timers_state.cpu_ticks_offset;
192 if (timers_state.cpu_ticks_enabled) {
193 ticks += cpu_get_real_ticks();
196 if (timers_state.cpu_ticks_prev > ticks) {
197 /* Note: non increasing ticks may happen if the host uses
199 timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
200 ticks = timers_state.cpu_ticks_prev;
203 timers_state.cpu_ticks_prev = ticks;
207 static int64_t cpu_get_clock_locked(void)
211 ticks = timers_state.cpu_clock_offset;
212 if (timers_state.cpu_ticks_enabled) {
213 ticks += get_clock();
219 /* return the host CPU monotonic timer and handle stop/restart */
220 int64_t cpu_get_clock(void)
226 start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
227 ti = cpu_get_clock_locked();
228 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
233 /* enable cpu_get_ticks()
234 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
236 void cpu_enable_ticks(void)
238 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
239 seqlock_write_lock(&timers_state.vm_clock_seqlock);
240 if (!timers_state.cpu_ticks_enabled) {
241 timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
242 timers_state.cpu_clock_offset -= get_clock();
243 timers_state.cpu_ticks_enabled = 1;
245 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
248 /* disable cpu_get_ticks() : the clock is stopped. You must not call
249 * cpu_get_ticks() after that.
250 * Caller must hold BQL which server as mutex for vm_clock_seqlock.
252 void cpu_disable_ticks(void)
254 /* Here, the really thing protected by seqlock is cpu_clock_offset. */
255 seqlock_write_lock(&timers_state.vm_clock_seqlock);
256 if (timers_state.cpu_ticks_enabled) {
257 timers_state.cpu_ticks_offset += cpu_get_real_ticks();
258 timers_state.cpu_clock_offset = cpu_get_clock_locked();
259 timers_state.cpu_ticks_enabled = 0;
261 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
264 /* Correlation between real and virtual time is always going to be
265 fairly approximate, so ignore small variation.
266 When the guest is idle real and virtual time will be aligned in
268 #define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
270 static void icount_adjust(void)
276 /* Protected by TimersState mutex. */
277 static int64_t last_delta;
279 /* If the VM is not running, then do nothing. */
280 if (!runstate_is_running()) {
284 seqlock_write_lock(&timers_state.vm_clock_seqlock);
285 cur_time = cpu_get_clock_locked();
286 cur_icount = cpu_get_icount_locked();
288 delta = cur_icount - cur_time;
289 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
291 && last_delta + ICOUNT_WOBBLE < delta * 2
292 && icount_time_shift > 0) {
293 /* The guest is getting too far ahead. Slow time down. */
297 && last_delta - ICOUNT_WOBBLE > delta * 2
298 && icount_time_shift < MAX_ICOUNT_SHIFT) {
299 /* The guest is getting too far behind. Speed time up. */
303 timers_state.qemu_icount_bias = cur_icount
304 - (timers_state.qemu_icount << icount_time_shift);
305 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
308 static void icount_adjust_rt(void *opaque)
310 timer_mod(icount_rt_timer,
311 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
315 static void icount_adjust_vm(void *opaque)
317 timer_mod(icount_vm_timer,
318 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
319 get_ticks_per_sec() / 10);
323 static int64_t qemu_icount_round(int64_t count)
325 return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
328 static void icount_warp_rt(void *opaque)
330 /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
331 * changes from -1 to another value, so the race here is okay.
333 if (atomic_read(&vm_clock_warp_start) == -1) {
337 seqlock_write_lock(&timers_state.vm_clock_seqlock);
338 if (runstate_is_running()) {
339 int64_t clock = cpu_get_clock_locked();
342 warp_delta = clock - vm_clock_warp_start;
343 if (use_icount == 2) {
345 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
346 * far ahead of real time.
348 int64_t cur_icount = cpu_get_icount_locked();
349 int64_t delta = clock - cur_icount;
350 warp_delta = MIN(warp_delta, delta);
352 timers_state.qemu_icount_bias += warp_delta;
354 vm_clock_warp_start = -1;
355 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
357 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
358 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
362 void qtest_clock_warp(int64_t dest)
364 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
365 AioContext *aio_context;
366 assert(qtest_enabled());
367 aio_context = qemu_get_aio_context();
368 while (clock < dest) {
369 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
370 int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
372 seqlock_write_lock(&timers_state.vm_clock_seqlock);
373 timers_state.qemu_icount_bias += warp;
374 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
376 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
377 timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
378 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
380 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
383 void qemu_clock_warp(QEMUClockType type)
389 * There are too many global variables to make the "warp" behavior
390 * applicable to other clocks. But a clock argument removes the
391 * need for if statements all over the place.
393 if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
399 * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
400 * This ensures that the deadline for the timer is computed correctly
402 * This also makes sure that the insn counter is synchronized before
403 * the CPU starts running, in case the CPU is woken by an event other
404 * than the earliest QEMU_CLOCK_VIRTUAL timer.
406 icount_warp_rt(NULL);
407 timer_del(icount_warp_timer);
409 if (!all_cpu_threads_idle()) {
413 if (qtest_enabled()) {
414 /* When testing, qtest commands advance icount. */
418 /* We want to use the earliest deadline from ALL vm_clocks */
419 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
420 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
422 static bool notified;
423 if (!icount_sleep && !notified) {
424 error_report("WARNING: icount sleep disabled and no active timers");
432 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
433 * sleep. Otherwise, the CPU might be waiting for a future timer
434 * interrupt to wake it up, but the interrupt never comes because
435 * the vCPU isn't running any insns and thus doesn't advance the
436 * QEMU_CLOCK_VIRTUAL.
440 * We never let VCPUs sleep in no sleep icount mode.
441 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
442 * to the next QEMU_CLOCK_VIRTUAL event and notify it.
443 * It is useful when we want a deterministic execution time,
444 * isolated from host latencies.
446 seqlock_write_lock(&timers_state.vm_clock_seqlock);
447 timers_state.qemu_icount_bias += deadline;
448 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
449 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
452 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
453 * "real" time, (related to the time left until the next event) has
454 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
455 * This avoids that the warps are visible externally; for example,
456 * you will not be sending network packets continuously instead of
459 seqlock_write_lock(&timers_state.vm_clock_seqlock);
460 if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
461 vm_clock_warp_start = clock;
463 seqlock_write_unlock(&timers_state.vm_clock_seqlock);
464 timer_mod_anticipate(icount_warp_timer, clock + deadline);
466 } else if (deadline == 0) {
467 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
471 static bool icount_state_needed(void *opaque)
477 * This is a subsection for icount migration.
479 static const VMStateDescription icount_vmstate_timers = {
480 .name = "timer/icount",
482 .minimum_version_id = 1,
483 .fields = (VMStateField[]) {
484 VMSTATE_INT64(qemu_icount_bias, TimersState),
485 VMSTATE_INT64(qemu_icount, TimersState),
486 VMSTATE_END_OF_LIST()
490 static const VMStateDescription vmstate_timers = {
493 .minimum_version_id = 1,
494 .fields = (VMStateField[]) {
495 VMSTATE_INT64(cpu_ticks_offset, TimersState),
496 VMSTATE_INT64(dummy, TimersState),
497 VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
498 VMSTATE_END_OF_LIST()
500 .subsections = (VMStateSubsection[]) {
502 .vmsd = &icount_vmstate_timers,
503 .needed = icount_state_needed,
510 void cpu_ticks_init(void)
512 seqlock_init(&timers_state.vm_clock_seqlock, NULL);
513 vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
516 void configure_icount(QemuOpts *opts, Error **errp)
519 char *rem_str = NULL;
521 option = qemu_opt_get(opts, "shift");
523 if (qemu_opt_get(opts, "align") != NULL) {
524 error_setg(errp, "Please specify shift option when using align");
529 icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
531 icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
532 icount_warp_rt, NULL);
535 icount_align_option = qemu_opt_get_bool(opts, "align", false);
537 if (icount_align_option && !icount_sleep) {
538 error_setg(errp, "align=on and sleep=no are incompatible");
540 if (strcmp(option, "auto") != 0) {
542 icount_time_shift = strtol(option, &rem_str, 0);
543 if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
544 error_setg(errp, "icount: Invalid shift value");
548 } else if (icount_align_option) {
549 error_setg(errp, "shift=auto and align=on are incompatible");
550 } else if (!icount_sleep) {
551 error_setg(errp, "shift=auto and sleep=no are incompatible");
556 /* 125MIPS seems a reasonable initial guess at the guest speed.
557 It will be corrected fairly quickly anyway. */
558 icount_time_shift = 3;
560 /* Have both realtime and virtual time triggers for speed adjustment.
561 The realtime trigger catches emulated time passing too slowly,
562 the virtual time trigger catches emulated time passing too fast.
563 Realtime triggers occur even when idle, so use them less frequently
565 icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
566 icount_adjust_rt, NULL);
567 timer_mod(icount_rt_timer,
568 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
569 icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
570 icount_adjust_vm, NULL);
571 timer_mod(icount_vm_timer,
572 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
573 get_ticks_per_sec() / 10);
576 /***********************************************************/
577 void hw_error(const char *fmt, ...)
583 fprintf(stderr, "qemu: hardware error: ");
584 vfprintf(stderr, fmt, ap);
585 fprintf(stderr, "\n");
587 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
588 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
594 void cpu_synchronize_all_states(void)
599 cpu_synchronize_state(cpu);
603 void cpu_synchronize_all_post_reset(void)
608 cpu_synchronize_post_reset(cpu);
612 void cpu_synchronize_all_post_init(void)
617 cpu_synchronize_post_init(cpu);
621 void cpu_clean_all_dirty(void)
626 cpu_clean_state(cpu);
630 static int do_vm_stop(RunState state)
634 if (runstate_is_running()) {
638 vm_state_notify(0, state);
639 qapi_event_send_stop(&error_abort);
643 ret = bdrv_flush_all();
648 static bool cpu_can_run(CPUState *cpu)
653 if (cpu_is_stopped(cpu)) {
659 static void cpu_handle_guest_debug(CPUState *cpu)
661 gdb_set_stop_cpu(cpu);
662 qemu_system_debug_request();
666 static void cpu_signal(int sig)
669 cpu_exit(current_cpu);
675 static void sigbus_reraise(void)
678 struct sigaction action;
680 memset(&action, 0, sizeof(action));
681 action.sa_handler = SIG_DFL;
682 if (!sigaction(SIGBUS, &action, NULL)) {
685 sigaddset(&set, SIGBUS);
686 sigprocmask(SIG_UNBLOCK, &set, NULL);
688 perror("Failed to re-raise SIGBUS!\n");
692 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
695 if (kvm_on_sigbus(siginfo->ssi_code,
696 (void *)(intptr_t)siginfo->ssi_addr)) {
701 static void qemu_init_sigbus(void)
703 struct sigaction action;
705 memset(&action, 0, sizeof(action));
706 action.sa_flags = SA_SIGINFO;
707 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
708 sigaction(SIGBUS, &action, NULL);
710 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
713 static void qemu_kvm_eat_signals(CPUState *cpu)
715 struct timespec ts = { 0, 0 };
721 sigemptyset(&waitset);
722 sigaddset(&waitset, SIG_IPI);
723 sigaddset(&waitset, SIGBUS);
726 r = sigtimedwait(&waitset, &siginfo, &ts);
727 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
728 perror("sigtimedwait");
734 if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
742 r = sigpending(&chkset);
744 perror("sigpending");
747 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
750 #else /* !CONFIG_LINUX */
752 static void qemu_init_sigbus(void)
756 static void qemu_kvm_eat_signals(CPUState *cpu)
759 #endif /* !CONFIG_LINUX */
762 static void dummy_signal(int sig)
766 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
770 struct sigaction sigact;
772 memset(&sigact, 0, sizeof(sigact));
773 sigact.sa_handler = dummy_signal;
774 sigaction(SIG_IPI, &sigact, NULL);
776 pthread_sigmask(SIG_BLOCK, NULL, &set);
777 sigdelset(&set, SIG_IPI);
778 sigdelset(&set, SIGBUS);
779 r = kvm_set_signal_mask(cpu, &set);
781 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
786 static void qemu_tcg_init_cpu_signals(void)
789 struct sigaction sigact;
791 memset(&sigact, 0, sizeof(sigact));
792 sigact.sa_handler = cpu_signal;
793 sigaction(SIG_IPI, &sigact, NULL);
796 sigaddset(&set, SIG_IPI);
797 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
801 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
806 static void qemu_tcg_init_cpu_signals(void)
811 static QemuMutex qemu_global_mutex;
812 static QemuCond qemu_io_proceeded_cond;
813 static unsigned iothread_requesting_mutex;
815 static QemuThread io_thread;
817 static QemuThread *tcg_cpu_thread;
818 static QemuCond *tcg_halt_cond;
821 static QemuCond qemu_cpu_cond;
823 static QemuCond qemu_pause_cond;
824 static QemuCond qemu_work_cond;
826 void qemu_init_cpu_loop(void)
829 qemu_cond_init(&qemu_cpu_cond);
830 qemu_cond_init(&qemu_pause_cond);
831 qemu_cond_init(&qemu_work_cond);
832 qemu_cond_init(&qemu_io_proceeded_cond);
833 qemu_mutex_init(&qemu_global_mutex);
835 qemu_thread_get_self(&io_thread);
838 void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
840 struct qemu_work_item wi;
842 if (qemu_cpu_is_self(cpu)) {
850 if (cpu->queued_work_first == NULL) {
851 cpu->queued_work_first = &wi;
853 cpu->queued_work_last->next = &wi;
855 cpu->queued_work_last = &wi;
861 CPUState *self_cpu = current_cpu;
863 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
864 current_cpu = self_cpu;
868 void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
870 struct qemu_work_item *wi;
872 if (qemu_cpu_is_self(cpu)) {
877 wi = g_malloc0(sizeof(struct qemu_work_item));
881 if (cpu->queued_work_first == NULL) {
882 cpu->queued_work_first = wi;
884 cpu->queued_work_last->next = wi;
886 cpu->queued_work_last = wi;
893 static void flush_queued_work(CPUState *cpu)
895 struct qemu_work_item *wi;
897 if (cpu->queued_work_first == NULL) {
901 while ((wi = cpu->queued_work_first)) {
902 cpu->queued_work_first = wi->next;
909 cpu->queued_work_last = NULL;
910 qemu_cond_broadcast(&qemu_work_cond);
913 static void qemu_wait_io_event_common(CPUState *cpu)
918 qemu_cond_signal(&qemu_pause_cond);
920 flush_queued_work(cpu);
921 cpu->thread_kicked = false;
924 static void qemu_tcg_wait_io_event(void)
928 while (all_cpu_threads_idle()) {
929 /* Start accounting real time to the virtual clock if the CPUs
931 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
932 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
935 while (iothread_requesting_mutex) {
936 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
940 qemu_wait_io_event_common(cpu);
944 static void qemu_kvm_wait_io_event(CPUState *cpu)
946 while (cpu_thread_is_idle(cpu)) {
947 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
950 qemu_kvm_eat_signals(cpu);
951 qemu_wait_io_event_common(cpu);
954 static void *qemu_kvm_cpu_thread_fn(void *arg)
959 qemu_mutex_lock(&qemu_global_mutex);
960 qemu_thread_get_self(cpu->thread);
961 cpu->thread_id = qemu_get_thread_id();
965 r = kvm_init_vcpu(cpu);
967 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
971 qemu_kvm_init_cpu_signals(cpu);
973 /* signal CPU creation */
975 qemu_cond_signal(&qemu_cpu_cond);
978 if (cpu_can_run(cpu)) {
979 r = kvm_cpu_exec(cpu);
980 if (r == EXCP_DEBUG) {
981 cpu_handle_guest_debug(cpu);
984 qemu_kvm_wait_io_event(cpu);
990 static void *qemu_dummy_cpu_thread_fn(void *arg)
993 fprintf(stderr, "qtest is not supported under Windows\n");
1000 qemu_mutex_lock_iothread();
1001 qemu_thread_get_self(cpu->thread);
1002 cpu->thread_id = qemu_get_thread_id();
1005 sigemptyset(&waitset);
1006 sigaddset(&waitset, SIG_IPI);
1008 /* signal CPU creation */
1009 cpu->created = true;
1010 qemu_cond_signal(&qemu_cpu_cond);
1015 qemu_mutex_unlock_iothread();
1018 r = sigwait(&waitset, &sig);
1019 } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1024 qemu_mutex_lock_iothread();
1026 qemu_wait_io_event_common(cpu);
1033 static void tcg_exec_all(void);
1035 static void *qemu_tcg_cpu_thread_fn(void *arg)
1037 CPUState *cpu = arg;
1039 qemu_tcg_init_cpu_signals();
1040 qemu_thread_get_self(cpu->thread);
1042 qemu_mutex_lock(&qemu_global_mutex);
1044 cpu->thread_id = qemu_get_thread_id();
1045 cpu->created = true;
1048 qemu_cond_signal(&qemu_cpu_cond);
1050 /* wait for initial kick-off after machine start */
1051 while (first_cpu->stopped) {
1052 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
1054 /* process any pending work */
1056 qemu_wait_io_event_common(cpu);
1060 /* process any pending work */
1067 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1069 if (deadline == 0) {
1070 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1073 qemu_tcg_wait_io_event();
1079 static void qemu_cpu_kick_thread(CPUState *cpu)
1084 err = pthread_kill(cpu->thread->thread, SIG_IPI);
1086 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1090 if (!qemu_cpu_is_self(cpu)) {
1093 if (SuspendThread(cpu->hThread) == (DWORD)-1) {
1094 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1099 /* On multi-core systems, we are not sure that the thread is actually
1100 * suspended until we can get the context.
1102 tcgContext.ContextFlags = CONTEXT_CONTROL;
1103 while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
1109 if (ResumeThread(cpu->hThread) == (DWORD)-1) {
1110 fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
1118 void qemu_cpu_kick(CPUState *cpu)
1120 qemu_cond_broadcast(cpu->halt_cond);
1121 if (!tcg_enabled() && !cpu->thread_kicked) {
1122 qemu_cpu_kick_thread(cpu);
1123 cpu->thread_kicked = true;
1127 void qemu_cpu_kick_self(void)
1130 assert(current_cpu);
1132 if (!current_cpu->thread_kicked) {
1133 qemu_cpu_kick_thread(current_cpu);
1134 current_cpu->thread_kicked = true;
1141 bool qemu_cpu_is_self(CPUState *cpu)
1143 return qemu_thread_is_self(cpu->thread);
1146 bool qemu_in_vcpu_thread(void)
1148 return current_cpu && qemu_cpu_is_self(current_cpu);
1151 void qemu_mutex_lock_iothread(void)
1153 atomic_inc(&iothread_requesting_mutex);
1154 if (!tcg_enabled() || !first_cpu || !first_cpu->thread) {
1155 qemu_mutex_lock(&qemu_global_mutex);
1156 atomic_dec(&iothread_requesting_mutex);
1158 if (qemu_mutex_trylock(&qemu_global_mutex)) {
1159 qemu_cpu_kick_thread(first_cpu);
1160 qemu_mutex_lock(&qemu_global_mutex);
1162 atomic_dec(&iothread_requesting_mutex);
1163 qemu_cond_broadcast(&qemu_io_proceeded_cond);
1167 void qemu_mutex_unlock_iothread(void)
1169 qemu_mutex_unlock(&qemu_global_mutex);
1172 static int all_vcpus_paused(void)
1177 if (!cpu->stopped) {
1185 void pause_all_vcpus(void)
1189 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1195 if (qemu_in_vcpu_thread()) {
1197 if (!kvm_enabled()) {
1200 cpu->stopped = true;
1206 while (!all_vcpus_paused()) {
1207 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1214 void cpu_resume(CPUState *cpu)
1217 cpu->stopped = false;
1221 void resume_all_vcpus(void)
1225 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1231 /* For temporary buffers for forming a name */
1232 #define VCPU_THREAD_NAME_SIZE 16
1234 static void qemu_tcg_init_vcpu(CPUState *cpu)
1236 char thread_name[VCPU_THREAD_NAME_SIZE];
1238 tcg_cpu_address_space_init(cpu, cpu->as);
1240 /* share a single thread for all cpus with TCG */
1241 if (!tcg_cpu_thread) {
1242 cpu->thread = g_malloc0(sizeof(QemuThread));
1243 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1244 qemu_cond_init(cpu->halt_cond);
1245 tcg_halt_cond = cpu->halt_cond;
1246 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1248 qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1249 cpu, QEMU_THREAD_JOINABLE);
1251 cpu->hThread = qemu_thread_get_handle(cpu->thread);
1253 while (!cpu->created) {
1254 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1256 tcg_cpu_thread = cpu->thread;
1258 cpu->thread = tcg_cpu_thread;
1259 cpu->halt_cond = tcg_halt_cond;
1263 static void qemu_kvm_start_vcpu(CPUState *cpu)
1265 char thread_name[VCPU_THREAD_NAME_SIZE];
1267 cpu->thread = g_malloc0(sizeof(QemuThread));
1268 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1269 qemu_cond_init(cpu->halt_cond);
1270 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1272 qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1273 cpu, QEMU_THREAD_JOINABLE);
1274 while (!cpu->created) {
1275 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1279 static void qemu_dummy_start_vcpu(CPUState *cpu)
1281 char thread_name[VCPU_THREAD_NAME_SIZE];
1283 cpu->thread = g_malloc0(sizeof(QemuThread));
1284 cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1285 qemu_cond_init(cpu->halt_cond);
1286 snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1288 qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1289 QEMU_THREAD_JOINABLE);
1290 while (!cpu->created) {
1291 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1295 void qemu_init_vcpu(CPUState *cpu)
1297 cpu->nr_cores = smp_cores;
1298 cpu->nr_threads = smp_threads;
1299 cpu->stopped = true;
1300 if (kvm_enabled()) {
1301 qemu_kvm_start_vcpu(cpu);
1302 } else if (tcg_enabled()) {
1303 qemu_tcg_init_vcpu(cpu);
1305 qemu_dummy_start_vcpu(cpu);
1309 void cpu_stop_current(void)
1312 current_cpu->stop = false;
1313 current_cpu->stopped = true;
1314 cpu_exit(current_cpu);
1315 qemu_cond_signal(&qemu_pause_cond);
1319 int vm_stop(RunState state)
1321 if (qemu_in_vcpu_thread()) {
1322 qemu_system_vmstop_request_prepare();
1323 qemu_system_vmstop_request(state);
1325 * FIXME: should not return to device code in case
1326 * vm_stop() has been requested.
1332 return do_vm_stop(state);
1335 /* does a state transition even if the VM is already stopped,
1336 current state is forgotten forever */
1337 int vm_stop_force_state(RunState state)
1339 if (runstate_is_running()) {
1340 return vm_stop(state);
1342 runstate_set(state);
1343 /* Make sure to return an error if the flush in a previous vm_stop()
1345 return bdrv_flush_all();
1349 static int tcg_cpu_exec(CPUArchState *env)
1351 CPUState *cpu = ENV_GET_CPU(env);
1353 #ifdef CONFIG_PROFILER
1357 #ifdef CONFIG_PROFILER
1358 ti = profile_getclock();
1364 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1365 + cpu->icount_extra);
1366 cpu->icount_decr.u16.low = 0;
1367 cpu->icount_extra = 0;
1368 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1370 /* Maintain prior (possibly buggy) behaviour where if no deadline
1371 * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1372 * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1375 if ((deadline < 0) || (deadline > INT32_MAX)) {
1376 deadline = INT32_MAX;
1379 count = qemu_icount_round(deadline);
1380 timers_state.qemu_icount += count;
1381 decr = (count > 0xffff) ? 0xffff : count;
1383 cpu->icount_decr.u16.low = decr;
1384 cpu->icount_extra = count;
1386 ret = cpu_exec(env);
1387 #ifdef CONFIG_PROFILER
1388 tcg_time += profile_getclock() - ti;
1391 /* Fold pending instructions back into the
1392 instruction counter, and clear the interrupt flag. */
1393 timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1394 + cpu->icount_extra);
1395 cpu->icount_decr.u32 = 0;
1396 cpu->icount_extra = 0;
1401 static void tcg_exec_all(void)
1405 /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
1406 qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
1408 if (next_cpu == NULL) {
1409 next_cpu = first_cpu;
1411 for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
1412 CPUState *cpu = next_cpu;
1413 CPUArchState *env = cpu->env_ptr;
1415 qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1416 (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1418 if (cpu_can_run(cpu)) {
1419 r = tcg_cpu_exec(env);
1420 if (r == EXCP_DEBUG) {
1421 cpu_handle_guest_debug(cpu);
1424 } else if (cpu->stop || cpu->stopped) {
1431 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1433 /* XXX: implement xxx_cpu_list for targets that still miss it */
1434 #if defined(cpu_list)
1435 cpu_list(f, cpu_fprintf);
1439 CpuInfoList *qmp_query_cpus(Error **errp)
1441 CpuInfoList *head = NULL, *cur_item = NULL;
1446 #if defined(TARGET_I386)
1447 X86CPU *x86_cpu = X86_CPU(cpu);
1448 CPUX86State *env = &x86_cpu->env;
1449 #elif defined(TARGET_PPC)
1450 PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1451 CPUPPCState *env = &ppc_cpu->env;
1452 #elif defined(TARGET_SPARC)
1453 SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1454 CPUSPARCState *env = &sparc_cpu->env;
1455 #elif defined(TARGET_MIPS)
1456 MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1457 CPUMIPSState *env = &mips_cpu->env;
1458 #elif defined(TARGET_TRICORE)
1459 TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1460 CPUTriCoreState *env = &tricore_cpu->env;
1463 cpu_synchronize_state(cpu);
1465 info = g_malloc0(sizeof(*info));
1466 info->value = g_malloc0(sizeof(*info->value));
1467 info->value->CPU = cpu->cpu_index;
1468 info->value->current = (cpu == first_cpu);
1469 info->value->halted = cpu->halted;
1470 info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1471 info->value->thread_id = cpu->thread_id;
1472 #if defined(TARGET_I386)
1473 info->value->has_pc = true;
1474 info->value->pc = env->eip + env->segs[R_CS].base;
1475 #elif defined(TARGET_PPC)
1476 info->value->has_nip = true;
1477 info->value->nip = env->nip;
1478 #elif defined(TARGET_SPARC)
1479 info->value->has_pc = true;
1480 info->value->pc = env->pc;
1481 info->value->has_npc = true;
1482 info->value->npc = env->npc;
1483 #elif defined(TARGET_MIPS)
1484 info->value->has_PC = true;
1485 info->value->PC = env->active_tc.PC;
1486 #elif defined(TARGET_TRICORE)
1487 info->value->has_PC = true;
1488 info->value->PC = env->PC;
1491 /* XXX: waiting for the qapi to support GSList */
1493 head = cur_item = info;
1495 cur_item->next = info;
1503 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1504 bool has_cpu, int64_t cpu_index, Error **errp)
1510 int64_t orig_addr = addr, orig_size = size;
1516 cpu = qemu_get_cpu(cpu_index);
1518 error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1523 f = fopen(filename, "wb");
1525 error_setg_file_open(errp, errno, filename);
1533 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1534 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1535 " specified", orig_addr, orig_size);
1538 if (fwrite(buf, 1, l, f) != l) {
1539 error_set(errp, QERR_IO_ERROR);
1550 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1557 f = fopen(filename, "wb");
1559 error_setg_file_open(errp, errno, filename);
1567 cpu_physical_memory_read(addr, buf, l);
1568 if (fwrite(buf, 1, l, f) != l) {
1569 error_set(errp, QERR_IO_ERROR);
1580 void qmp_inject_nmi(Error **errp)
1582 #if defined(TARGET_I386)
1586 X86CPU *cpu = X86_CPU(cs);
1588 if (!cpu->apic_state) {
1589 cpu_interrupt(cs, CPU_INTERRUPT_NMI);
1591 apic_deliver_nmi(cpu->apic_state);
1595 nmi_monitor_handle(monitor_get_cpu_index(), errp);
1599 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1605 cpu_fprintf(f, "Host - Guest clock %"PRIi64" ms\n",
1606 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1607 if (icount_align_option) {
1608 cpu_fprintf(f, "Max guest delay %"PRIi64" ms\n", -max_delay/SCALE_MS);
1609 cpu_fprintf(f, "Max guest advance %"PRIi64" ms\n", max_advance/SCALE_MS);
1611 cpu_fprintf(f, "Max guest delay NA\n");
1612 cpu_fprintf(f, "Max guest advance NA\n");