4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
34 #include "qemu-thread.h"
42 #define SIG_IPI (SIGRTMIN+4)
44 #define SIG_IPI SIGUSR1
49 #include <sys/prctl.h>
52 #define PR_MCE_KILL 33
55 #ifndef PR_MCE_KILL_SET
56 #define PR_MCE_KILL_SET 1
59 #ifndef PR_MCE_KILL_EARLY
60 #define PR_MCE_KILL_EARLY 1
63 #endif /* CONFIG_LINUX */
65 static CPUState *next_cpu;
67 /***********************************************************/
68 void hw_error(const char *fmt, ...)
74 fprintf(stderr, "qemu: hardware error: ");
75 vfprintf(stderr, fmt, ap);
76 fprintf(stderr, "\n");
77 for(env = first_cpu; env != NULL; env = env->next_cpu) {
78 fprintf(stderr, "CPU #%d:\n", env->cpu_index);
80 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
82 cpu_dump_state(env, stderr, fprintf, 0);
89 void cpu_synchronize_all_states(void)
93 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
94 cpu_synchronize_state(cpu);
98 void cpu_synchronize_all_post_reset(void)
102 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
103 cpu_synchronize_post_reset(cpu);
107 void cpu_synchronize_all_post_init(void)
111 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
112 cpu_synchronize_post_init(cpu);
116 int cpu_is_stopped(CPUState *env)
118 return !runstate_is_running() || env->stopped;
121 static void do_vm_stop(RunState state)
123 if (runstate_is_running()) {
127 vm_state_notify(0, state);
130 monitor_protocol_event(QEVENT_STOP, NULL);
134 static int cpu_can_run(CPUState *env)
139 if (env->stopped || !runstate_is_running()) {
145 static bool cpu_thread_is_idle(CPUState *env)
147 if (env->stop || env->queued_work_first) {
150 if (env->stopped || !runstate_is_running()) {
153 if (!env->halted || qemu_cpu_has_work(env) ||
154 (kvm_enabled() && kvm_irqchip_in_kernel())) {
160 bool all_cpu_threads_idle(void)
164 for (env = first_cpu; env != NULL; env = env->next_cpu) {
165 if (!cpu_thread_is_idle(env)) {
172 static void cpu_handle_guest_debug(CPUState *env)
174 gdb_set_stop_cpu(env);
175 qemu_system_debug_request();
179 static void cpu_signal(int sig)
181 if (cpu_single_env) {
182 cpu_exit(cpu_single_env);
188 static void sigbus_reraise(void)
191 struct sigaction action;
193 memset(&action, 0, sizeof(action));
194 action.sa_handler = SIG_DFL;
195 if (!sigaction(SIGBUS, &action, NULL)) {
198 sigaddset(&set, SIGBUS);
199 sigprocmask(SIG_UNBLOCK, &set, NULL);
201 perror("Failed to re-raise SIGBUS!\n");
205 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
208 if (kvm_on_sigbus(siginfo->ssi_code,
209 (void *)(intptr_t)siginfo->ssi_addr)) {
214 static void qemu_init_sigbus(void)
216 struct sigaction action;
218 memset(&action, 0, sizeof(action));
219 action.sa_flags = SA_SIGINFO;
220 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
221 sigaction(SIGBUS, &action, NULL);
223 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
226 static void qemu_kvm_eat_signals(CPUState *env)
228 struct timespec ts = { 0, 0 };
234 sigemptyset(&waitset);
235 sigaddset(&waitset, SIG_IPI);
236 sigaddset(&waitset, SIGBUS);
239 r = sigtimedwait(&waitset, &siginfo, &ts);
240 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
241 perror("sigtimedwait");
247 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
255 r = sigpending(&chkset);
257 perror("sigpending");
260 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
263 #else /* !CONFIG_LINUX */
265 static void qemu_init_sigbus(void)
269 static void qemu_kvm_eat_signals(CPUState *env)
272 #endif /* !CONFIG_LINUX */
275 static int io_thread_fd = -1;
277 static void qemu_event_increment(void)
279 /* Write 8 bytes to be compatible with eventfd. */
280 static const uint64_t val = 1;
283 if (io_thread_fd == -1) {
287 ret = write(io_thread_fd, &val, sizeof(val));
288 } while (ret < 0 && errno == EINTR);
290 /* EAGAIN is fine, a read must be pending. */
291 if (ret < 0 && errno != EAGAIN) {
292 fprintf(stderr, "qemu_event_increment: write() failed: %s\n",
298 static void qemu_event_read(void *opaque)
300 int fd = (intptr_t)opaque;
304 /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
306 len = read(fd, buffer, sizeof(buffer));
307 } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
310 static int qemu_event_init(void)
315 err = qemu_eventfd(fds);
319 err = fcntl_setfl(fds[0], O_NONBLOCK);
323 err = fcntl_setfl(fds[1], O_NONBLOCK);
327 qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
328 (void *)(intptr_t)fds[0]);
330 io_thread_fd = fds[1];
339 static void dummy_signal(int sig)
343 /* If we have signalfd, we mask out the signals we want to handle and then
344 * use signalfd to listen for them. We rely on whatever the current signal
345 * handler is to dispatch the signals when we receive them.
347 static void sigfd_handler(void *opaque)
349 int fd = (intptr_t)opaque;
350 struct qemu_signalfd_siginfo info;
351 struct sigaction action;
356 len = read(fd, &info, sizeof(info));
357 } while (len == -1 && errno == EINTR);
359 if (len == -1 && errno == EAGAIN) {
363 if (len != sizeof(info)) {
364 printf("read from sigfd returned %zd: %m\n", len);
368 sigaction(info.ssi_signo, NULL, &action);
369 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
370 action.sa_sigaction(info.ssi_signo,
371 (siginfo_t *)&info, NULL);
372 } else if (action.sa_handler) {
373 action.sa_handler(info.ssi_signo);
378 static int qemu_signal_init(void)
384 * SIG_IPI must be blocked in the main thread and must not be caught
385 * by sigwait() in the signal thread. Otherwise, the cpu thread will
386 * not catch it reliably.
389 sigaddset(&set, SIG_IPI);
390 pthread_sigmask(SIG_BLOCK, &set, NULL);
393 sigaddset(&set, SIGIO);
394 sigaddset(&set, SIGALRM);
395 sigaddset(&set, SIGBUS);
396 pthread_sigmask(SIG_BLOCK, &set, NULL);
398 sigfd = qemu_signalfd(&set);
400 fprintf(stderr, "failed to create signalfd\n");
404 fcntl_setfl(sigfd, O_NONBLOCK);
406 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
407 (void *)(intptr_t)sigfd);
412 static void qemu_kvm_init_cpu_signals(CPUState *env)
416 struct sigaction sigact;
418 memset(&sigact, 0, sizeof(sigact));
419 sigact.sa_handler = dummy_signal;
420 sigaction(SIG_IPI, &sigact, NULL);
422 pthread_sigmask(SIG_BLOCK, NULL, &set);
423 sigdelset(&set, SIG_IPI);
424 sigdelset(&set, SIGBUS);
425 r = kvm_set_signal_mask(env, &set);
427 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
431 sigdelset(&set, SIG_IPI);
432 sigdelset(&set, SIGBUS);
433 r = kvm_set_signal_mask(env, &set);
435 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
440 static void qemu_tcg_init_cpu_signals(void)
443 struct sigaction sigact;
445 memset(&sigact, 0, sizeof(sigact));
446 sigact.sa_handler = cpu_signal;
447 sigaction(SIG_IPI, &sigact, NULL);
450 sigaddset(&set, SIG_IPI);
451 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
456 HANDLE qemu_event_handle;
458 static void dummy_event_handler(void *opaque)
462 static int qemu_event_init(void)
464 qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
465 if (!qemu_event_handle) {
466 fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError());
469 qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
473 static void qemu_event_increment(void)
475 if (!SetEvent(qemu_event_handle)) {
476 fprintf(stderr, "qemu_event_increment: SetEvent failed: %ld\n",
482 static int qemu_signal_init(void)
487 static void qemu_kvm_init_cpu_signals(CPUState *env)
492 static void qemu_tcg_init_cpu_signals(void)
497 QemuMutex qemu_global_mutex;
498 static QemuCond qemu_io_proceeded_cond;
499 static bool iothread_requesting_mutex;
501 static QemuThread io_thread;
503 static QemuThread *tcg_cpu_thread;
504 static QemuCond *tcg_halt_cond;
507 static QemuCond qemu_cpu_cond;
509 static QemuCond qemu_pause_cond;
510 static QemuCond qemu_work_cond;
512 int qemu_init_main_loop(void)
518 ret = qemu_signal_init();
523 /* Note eventfd must be drained before signalfd handlers run */
524 ret = qemu_event_init();
529 qemu_cond_init(&qemu_cpu_cond);
530 qemu_cond_init(&qemu_pause_cond);
531 qemu_cond_init(&qemu_work_cond);
532 qemu_cond_init(&qemu_io_proceeded_cond);
533 qemu_mutex_init(&qemu_global_mutex);
534 qemu_mutex_lock(&qemu_global_mutex);
536 qemu_thread_get_self(&io_thread);
541 void qemu_main_loop_start(void)
546 void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
548 struct qemu_work_item wi;
550 if (qemu_cpu_is_self(env)) {
557 if (!env->queued_work_first) {
558 env->queued_work_first = &wi;
560 env->queued_work_last->next = &wi;
562 env->queued_work_last = &wi;
568 CPUState *self_env = cpu_single_env;
570 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
571 cpu_single_env = self_env;
575 static void flush_queued_work(CPUState *env)
577 struct qemu_work_item *wi;
579 if (!env->queued_work_first) {
583 while ((wi = env->queued_work_first)) {
584 env->queued_work_first = wi->next;
588 env->queued_work_last = NULL;
589 qemu_cond_broadcast(&qemu_work_cond);
592 static void qemu_wait_io_event_common(CPUState *env)
597 qemu_cond_signal(&qemu_pause_cond);
599 flush_queued_work(env);
600 env->thread_kicked = false;
603 static void qemu_tcg_wait_io_event(void)
607 while (all_cpu_threads_idle()) {
608 /* Start accounting real time to the virtual clock if the CPUs
610 qemu_clock_warp(vm_clock);
611 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
614 while (iothread_requesting_mutex) {
615 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
618 for (env = first_cpu; env != NULL; env = env->next_cpu) {
619 qemu_wait_io_event_common(env);
623 static void qemu_kvm_wait_io_event(CPUState *env)
625 while (cpu_thread_is_idle(env)) {
626 qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
629 qemu_kvm_eat_signals(env);
630 qemu_wait_io_event_common(env);
633 static void *qemu_kvm_cpu_thread_fn(void *arg)
638 qemu_mutex_lock(&qemu_global_mutex);
639 qemu_thread_get_self(env->thread);
640 env->thread_id = qemu_get_thread_id();
642 r = kvm_init_vcpu(env);
644 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
648 qemu_kvm_init_cpu_signals(env);
650 /* signal CPU creation */
652 qemu_cond_signal(&qemu_cpu_cond);
655 if (cpu_can_run(env)) {
656 r = kvm_cpu_exec(env);
657 if (r == EXCP_DEBUG) {
658 cpu_handle_guest_debug(env);
661 qemu_kvm_wait_io_event(env);
667 static void *qemu_tcg_cpu_thread_fn(void *arg)
671 qemu_tcg_init_cpu_signals();
672 qemu_thread_get_self(env->thread);
674 /* signal CPU creation */
675 qemu_mutex_lock(&qemu_global_mutex);
676 for (env = first_cpu; env != NULL; env = env->next_cpu) {
677 env->thread_id = qemu_get_thread_id();
680 qemu_cond_signal(&qemu_cpu_cond);
682 /* wait for initial kick-off after machine start */
683 while (first_cpu->stopped) {
684 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
689 if (use_icount && qemu_next_icount_deadline() <= 0) {
692 qemu_tcg_wait_io_event();
698 static void qemu_cpu_kick_thread(CPUState *env)
703 err = pthread_kill(env->thread->thread, SIG_IPI);
705 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
709 if (!qemu_cpu_is_self(env)) {
710 SuspendThread(env->thread->thread);
712 ResumeThread(env->thread->thread);
717 void qemu_cpu_kick(void *_env)
719 CPUState *env = _env;
721 qemu_cond_broadcast(env->halt_cond);
722 if (kvm_enabled() && !env->thread_kicked) {
723 qemu_cpu_kick_thread(env);
724 env->thread_kicked = true;
728 void qemu_cpu_kick_self(void)
731 assert(cpu_single_env);
733 if (!cpu_single_env->thread_kicked) {
734 qemu_cpu_kick_thread(cpu_single_env);
735 cpu_single_env->thread_kicked = true;
742 int qemu_cpu_is_self(void *_env)
744 CPUState *env = _env;
746 return qemu_thread_is_self(env->thread);
749 void qemu_mutex_lock_iothread(void)
752 qemu_mutex_lock(&qemu_global_mutex);
754 iothread_requesting_mutex = true;
755 if (qemu_mutex_trylock(&qemu_global_mutex)) {
756 qemu_cpu_kick_thread(first_cpu);
757 qemu_mutex_lock(&qemu_global_mutex);
759 iothread_requesting_mutex = false;
760 qemu_cond_broadcast(&qemu_io_proceeded_cond);
764 void qemu_mutex_unlock_iothread(void)
766 qemu_mutex_unlock(&qemu_global_mutex);
769 static int all_vcpus_paused(void)
771 CPUState *penv = first_cpu;
774 if (!penv->stopped) {
777 penv = (CPUState *)penv->next_cpu;
783 void pause_all_vcpus(void)
785 CPUState *penv = first_cpu;
790 penv = (CPUState *)penv->next_cpu;
793 while (!all_vcpus_paused()) {
794 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
798 penv = (CPUState *)penv->next_cpu;
803 void resume_all_vcpus(void)
805 CPUState *penv = first_cpu;
811 penv = (CPUState *)penv->next_cpu;
815 static void qemu_tcg_init_vcpu(void *_env)
817 CPUState *env = _env;
819 /* share a single thread for all cpus with TCG */
820 if (!tcg_cpu_thread) {
821 env->thread = g_malloc0(sizeof(QemuThread));
822 env->halt_cond = g_malloc0(sizeof(QemuCond));
823 qemu_cond_init(env->halt_cond);
824 tcg_halt_cond = env->halt_cond;
825 qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env);
826 while (env->created == 0) {
827 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
829 tcg_cpu_thread = env->thread;
831 env->thread = tcg_cpu_thread;
832 env->halt_cond = tcg_halt_cond;
836 static void qemu_kvm_start_vcpu(CPUState *env)
838 env->thread = g_malloc0(sizeof(QemuThread));
839 env->halt_cond = g_malloc0(sizeof(QemuCond));
840 qemu_cond_init(env->halt_cond);
841 qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env);
842 while (env->created == 0) {
843 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
847 void qemu_init_vcpu(void *_env)
849 CPUState *env = _env;
851 env->nr_cores = smp_cores;
852 env->nr_threads = smp_threads;
855 qemu_kvm_start_vcpu(env);
857 qemu_tcg_init_vcpu(env);
861 void qemu_notify_event(void)
863 qemu_event_increment();
866 void cpu_stop_current(void)
868 if (cpu_single_env) {
869 cpu_single_env->stop = 0;
870 cpu_single_env->stopped = 1;
871 cpu_exit(cpu_single_env);
872 qemu_cond_signal(&qemu_pause_cond);
876 void vm_stop(RunState state)
878 if (!qemu_thread_is_self(&io_thread)) {
879 qemu_system_vmstop_request(state);
881 * FIXME: should not return to device code in case
882 * vm_stop() has been requested.
890 /* does a state transition even if the VM is already stopped,
891 current state is forgotten forever */
892 void vm_stop_force_state(RunState state)
894 if (runstate_is_running()) {
901 static int tcg_cpu_exec(CPUState *env)
904 #ifdef CONFIG_PROFILER
908 #ifdef CONFIG_PROFILER
909 ti = profile_getclock();
914 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
915 env->icount_decr.u16.low = 0;
916 env->icount_extra = 0;
917 count = qemu_icount_round(qemu_next_icount_deadline());
918 qemu_icount += count;
919 decr = (count > 0xffff) ? 0xffff : count;
921 env->icount_decr.u16.low = decr;
922 env->icount_extra = count;
925 #ifdef CONFIG_PROFILER
926 qemu_time += profile_getclock() - ti;
929 /* Fold pending instructions back into the
930 instruction counter, and clear the interrupt flag. */
931 qemu_icount -= (env->icount_decr.u16.low
932 + env->icount_extra);
933 env->icount_decr.u32 = 0;
934 env->icount_extra = 0;
939 bool cpu_exec_all(void)
943 /* Account partial waits to the vm_clock. */
944 qemu_clock_warp(vm_clock);
946 if (next_cpu == NULL) {
947 next_cpu = first_cpu;
949 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
950 CPUState *env = next_cpu;
952 qemu_clock_enable(vm_clock,
953 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
955 if (cpu_can_run(env)) {
957 r = kvm_cpu_exec(env);
958 qemu_kvm_eat_signals(env);
960 r = tcg_cpu_exec(env);
962 if (r == EXCP_DEBUG) {
963 cpu_handle_guest_debug(env);
966 } else if (env->stop || env->stopped) {
971 return !all_cpu_threads_idle();
974 void set_numa_modes(void)
979 for (env = first_cpu; env != NULL; env = env->next_cpu) {
980 for (i = 0; i < nb_numa_nodes; i++) {
981 if (node_cpumask[i] & (1 << env->cpu_index)) {
988 void set_cpu_log(const char *optarg)
991 const CPULogItem *item;
993 mask = cpu_str_to_log_mask(optarg);
995 printf("Log items (comma separated):\n");
996 for (item = cpu_log_items; item->mask != 0; item++) {
997 printf("%-10s %s\n", item->name, item->help);
1004 void set_cpu_log_filename(const char *optarg)
1006 cpu_set_log_filename(optarg);
1009 /* Return the virtual CPU time, based on the instruction counter. */
1010 int64_t cpu_get_icount(void)
1013 CPUState *env = cpu_single_env;;
1015 icount = qemu_icount;
1017 if (!can_do_io(env)) {
1018 fprintf(stderr, "Bad clock read\n");
1020 icount -= (env->icount_decr.u16.low + env->icount_extra);
1022 return qemu_icount_bias + (icount << icount_time_shift);
1025 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1027 /* XXX: implement xxx_cpu_list for targets that still miss it */
1028 #if defined(cpu_list_id)
1029 cpu_list_id(f, cpu_fprintf, optarg);
1030 #elif defined(cpu_list)
1031 cpu_list(f, cpu_fprintf); /* deprecated */