4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
34 #include "qemu-thread.h"
42 #define SIG_IPI (SIGRTMIN+4)
44 #define SIG_IPI SIGUSR1
49 #include <sys/prctl.h>
52 #define PR_MCE_KILL 33
55 #ifndef PR_MCE_KILL_SET
56 #define PR_MCE_KILL_SET 1
59 #ifndef PR_MCE_KILL_EARLY
60 #define PR_MCE_KILL_EARLY 1
63 #endif /* CONFIG_LINUX */
65 static CPUState *next_cpu;
67 /***********************************************************/
68 void hw_error(const char *fmt, ...)
74 fprintf(stderr, "qemu: hardware error: ");
75 vfprintf(stderr, fmt, ap);
76 fprintf(stderr, "\n");
77 for(env = first_cpu; env != NULL; env = env->next_cpu) {
78 fprintf(stderr, "CPU #%d:\n", env->cpu_index);
80 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
82 cpu_dump_state(env, stderr, fprintf, 0);
89 void cpu_synchronize_all_states(void)
93 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
94 cpu_synchronize_state(cpu);
98 void cpu_synchronize_all_post_reset(void)
102 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
103 cpu_synchronize_post_reset(cpu);
107 void cpu_synchronize_all_post_init(void)
111 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
112 cpu_synchronize_post_init(cpu);
116 int cpu_is_stopped(CPUState *env)
118 return !vm_running || env->stopped;
121 static void do_vm_stop(RunState state)
127 vm_state_notify(0, state);
130 monitor_protocol_event(QEVENT_STOP, NULL);
134 static int cpu_can_run(CPUState *env)
139 if (env->stopped || !vm_running) {
145 static bool cpu_thread_is_idle(CPUState *env)
147 if (env->stop || env->queued_work_first) {
150 if (env->stopped || !vm_running) {
153 if (!env->halted || qemu_cpu_has_work(env) ||
154 (kvm_enabled() && kvm_irqchip_in_kernel())) {
160 bool all_cpu_threads_idle(void)
164 for (env = first_cpu; env != NULL; env = env->next_cpu) {
165 if (!cpu_thread_is_idle(env)) {
172 static void cpu_handle_guest_debug(CPUState *env)
174 gdb_set_stop_cpu(env);
175 qemu_system_debug_request();
179 static void cpu_signal(int sig)
181 if (cpu_single_env) {
182 cpu_exit(cpu_single_env);
188 static void sigbus_reraise(void)
191 struct sigaction action;
193 memset(&action, 0, sizeof(action));
194 action.sa_handler = SIG_DFL;
195 if (!sigaction(SIGBUS, &action, NULL)) {
198 sigaddset(&set, SIGBUS);
199 sigprocmask(SIG_UNBLOCK, &set, NULL);
201 perror("Failed to re-raise SIGBUS!\n");
205 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
208 if (kvm_on_sigbus(siginfo->ssi_code,
209 (void *)(intptr_t)siginfo->ssi_addr)) {
214 static void qemu_init_sigbus(void)
216 struct sigaction action;
218 memset(&action, 0, sizeof(action));
219 action.sa_flags = SA_SIGINFO;
220 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
221 sigaction(SIGBUS, &action, NULL);
223 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
226 static void qemu_kvm_eat_signals(CPUState *env)
228 struct timespec ts = { 0, 0 };
234 sigemptyset(&waitset);
235 sigaddset(&waitset, SIG_IPI);
236 sigaddset(&waitset, SIGBUS);
239 r = sigtimedwait(&waitset, &siginfo, &ts);
240 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
241 perror("sigtimedwait");
247 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
255 r = sigpending(&chkset);
257 perror("sigpending");
260 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
263 #else /* !CONFIG_LINUX */
265 static void qemu_init_sigbus(void)
269 static void qemu_kvm_eat_signals(CPUState *env)
272 #endif /* !CONFIG_LINUX */
275 static int io_thread_fd = -1;
277 static void qemu_event_increment(void)
279 /* Write 8 bytes to be compatible with eventfd. */
280 static const uint64_t val = 1;
283 if (io_thread_fd == -1) {
287 ret = write(io_thread_fd, &val, sizeof(val));
288 } while (ret < 0 && errno == EINTR);
290 /* EAGAIN is fine, a read must be pending. */
291 if (ret < 0 && errno != EAGAIN) {
292 fprintf(stderr, "qemu_event_increment: write() failed: %s\n",
298 static void qemu_event_read(void *opaque)
300 int fd = (intptr_t)opaque;
304 /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
306 len = read(fd, buffer, sizeof(buffer));
307 } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
310 static int qemu_event_init(void)
315 err = qemu_eventfd(fds);
319 err = fcntl_setfl(fds[0], O_NONBLOCK);
323 err = fcntl_setfl(fds[1], O_NONBLOCK);
327 qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
328 (void *)(intptr_t)fds[0]);
330 io_thread_fd = fds[1];
339 static void dummy_signal(int sig)
343 /* If we have signalfd, we mask out the signals we want to handle and then
344 * use signalfd to listen for them. We rely on whatever the current signal
345 * handler is to dispatch the signals when we receive them.
347 static void sigfd_handler(void *opaque)
349 int fd = (intptr_t)opaque;
350 struct qemu_signalfd_siginfo info;
351 struct sigaction action;
356 len = read(fd, &info, sizeof(info));
357 } while (len == -1 && errno == EINTR);
359 if (len == -1 && errno == EAGAIN) {
363 if (len != sizeof(info)) {
364 printf("read from sigfd returned %zd: %m\n", len);
368 sigaction(info.ssi_signo, NULL, &action);
369 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
370 action.sa_sigaction(info.ssi_signo,
371 (siginfo_t *)&info, NULL);
372 } else if (action.sa_handler) {
373 action.sa_handler(info.ssi_signo);
378 static int qemu_signal_init(void)
383 /* SIGUSR2 used by posix-aio-compat.c */
385 sigaddset(&set, SIGUSR2);
386 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
389 * SIG_IPI must be blocked in the main thread and must not be caught
390 * by sigwait() in the signal thread. Otherwise, the cpu thread will
391 * not catch it reliably.
394 sigaddset(&set, SIG_IPI);
395 pthread_sigmask(SIG_BLOCK, &set, NULL);
398 sigaddset(&set, SIGIO);
399 sigaddset(&set, SIGALRM);
400 sigaddset(&set, SIGBUS);
401 pthread_sigmask(SIG_BLOCK, &set, NULL);
403 sigfd = qemu_signalfd(&set);
405 fprintf(stderr, "failed to create signalfd\n");
409 fcntl_setfl(sigfd, O_NONBLOCK);
411 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
412 (void *)(intptr_t)sigfd);
417 static void qemu_kvm_init_cpu_signals(CPUState *env)
421 struct sigaction sigact;
423 memset(&sigact, 0, sizeof(sigact));
424 sigact.sa_handler = dummy_signal;
425 sigaction(SIG_IPI, &sigact, NULL);
427 pthread_sigmask(SIG_BLOCK, NULL, &set);
428 sigdelset(&set, SIG_IPI);
429 sigdelset(&set, SIGBUS);
430 r = kvm_set_signal_mask(env, &set);
432 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
436 sigdelset(&set, SIG_IPI);
437 sigdelset(&set, SIGBUS);
438 r = kvm_set_signal_mask(env, &set);
440 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
445 static void qemu_tcg_init_cpu_signals(void)
448 struct sigaction sigact;
450 memset(&sigact, 0, sizeof(sigact));
451 sigact.sa_handler = cpu_signal;
452 sigaction(SIG_IPI, &sigact, NULL);
455 sigaddset(&set, SIG_IPI);
456 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
461 HANDLE qemu_event_handle;
463 static void dummy_event_handler(void *opaque)
467 static int qemu_event_init(void)
469 qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
470 if (!qemu_event_handle) {
471 fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError());
474 qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
478 static void qemu_event_increment(void)
480 if (!SetEvent(qemu_event_handle)) {
481 fprintf(stderr, "qemu_event_increment: SetEvent failed: %ld\n",
487 static int qemu_signal_init(void)
492 static void qemu_kvm_init_cpu_signals(CPUState *env)
497 static void qemu_tcg_init_cpu_signals(void)
502 QemuMutex qemu_global_mutex;
503 static QemuCond qemu_io_proceeded_cond;
504 static bool iothread_requesting_mutex;
506 static QemuThread io_thread;
508 static QemuThread *tcg_cpu_thread;
509 static QemuCond *tcg_halt_cond;
512 static QemuCond qemu_cpu_cond;
514 static QemuCond qemu_pause_cond;
515 static QemuCond qemu_work_cond;
517 int qemu_init_main_loop(void)
523 ret = qemu_signal_init();
528 /* Note eventfd must be drained before signalfd handlers run */
529 ret = qemu_event_init();
534 qemu_cond_init(&qemu_cpu_cond);
535 qemu_cond_init(&qemu_pause_cond);
536 qemu_cond_init(&qemu_work_cond);
537 qemu_cond_init(&qemu_io_proceeded_cond);
538 qemu_mutex_init(&qemu_global_mutex);
539 qemu_mutex_lock(&qemu_global_mutex);
541 qemu_thread_get_self(&io_thread);
546 void qemu_main_loop_start(void)
551 void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
553 struct qemu_work_item wi;
555 if (qemu_cpu_is_self(env)) {
562 if (!env->queued_work_first) {
563 env->queued_work_first = &wi;
565 env->queued_work_last->next = &wi;
567 env->queued_work_last = &wi;
573 CPUState *self_env = cpu_single_env;
575 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
576 cpu_single_env = self_env;
580 static void flush_queued_work(CPUState *env)
582 struct qemu_work_item *wi;
584 if (!env->queued_work_first) {
588 while ((wi = env->queued_work_first)) {
589 env->queued_work_first = wi->next;
593 env->queued_work_last = NULL;
594 qemu_cond_broadcast(&qemu_work_cond);
597 static void qemu_wait_io_event_common(CPUState *env)
602 qemu_cond_signal(&qemu_pause_cond);
604 flush_queued_work(env);
605 env->thread_kicked = false;
608 static void qemu_tcg_wait_io_event(void)
612 while (all_cpu_threads_idle()) {
613 /* Start accounting real time to the virtual clock if the CPUs
615 qemu_clock_warp(vm_clock);
616 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
619 while (iothread_requesting_mutex) {
620 qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
623 for (env = first_cpu; env != NULL; env = env->next_cpu) {
624 qemu_wait_io_event_common(env);
628 static void qemu_kvm_wait_io_event(CPUState *env)
630 while (cpu_thread_is_idle(env)) {
631 qemu_cond_wait(env->halt_cond, &qemu_global_mutex);
634 qemu_kvm_eat_signals(env);
635 qemu_wait_io_event_common(env);
638 static void *qemu_kvm_cpu_thread_fn(void *arg)
643 qemu_mutex_lock(&qemu_global_mutex);
644 qemu_thread_get_self(env->thread);
645 env->thread_id = qemu_get_thread_id();
647 r = kvm_init_vcpu(env);
649 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
653 qemu_kvm_init_cpu_signals(env);
655 /* signal CPU creation */
657 qemu_cond_signal(&qemu_cpu_cond);
660 if (cpu_can_run(env)) {
661 r = kvm_cpu_exec(env);
662 if (r == EXCP_DEBUG) {
663 cpu_handle_guest_debug(env);
666 qemu_kvm_wait_io_event(env);
672 static void *qemu_tcg_cpu_thread_fn(void *arg)
676 qemu_tcg_init_cpu_signals();
677 qemu_thread_get_self(env->thread);
679 /* signal CPU creation */
680 qemu_mutex_lock(&qemu_global_mutex);
681 for (env = first_cpu; env != NULL; env = env->next_cpu) {
682 env->thread_id = qemu_get_thread_id();
685 qemu_cond_signal(&qemu_cpu_cond);
687 /* wait for initial kick-off after machine start */
688 while (first_cpu->stopped) {
689 qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
694 if (use_icount && qemu_next_icount_deadline() <= 0) {
697 qemu_tcg_wait_io_event();
703 static void qemu_cpu_kick_thread(CPUState *env)
708 err = pthread_kill(env->thread->thread, SIG_IPI);
710 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
714 if (!qemu_cpu_is_self(env)) {
715 SuspendThread(env->thread->thread);
717 ResumeThread(env->thread->thread);
722 void qemu_cpu_kick(void *_env)
724 CPUState *env = _env;
726 qemu_cond_broadcast(env->halt_cond);
727 if (kvm_enabled() && !env->thread_kicked) {
728 qemu_cpu_kick_thread(env);
729 env->thread_kicked = true;
733 void qemu_cpu_kick_self(void)
736 assert(cpu_single_env);
738 if (!cpu_single_env->thread_kicked) {
739 qemu_cpu_kick_thread(cpu_single_env);
740 cpu_single_env->thread_kicked = true;
747 int qemu_cpu_is_self(void *_env)
749 CPUState *env = _env;
751 return qemu_thread_is_self(env->thread);
754 void qemu_mutex_lock_iothread(void)
757 qemu_mutex_lock(&qemu_global_mutex);
759 iothread_requesting_mutex = true;
760 if (qemu_mutex_trylock(&qemu_global_mutex)) {
761 qemu_cpu_kick_thread(first_cpu);
762 qemu_mutex_lock(&qemu_global_mutex);
764 iothread_requesting_mutex = false;
765 qemu_cond_broadcast(&qemu_io_proceeded_cond);
769 void qemu_mutex_unlock_iothread(void)
771 qemu_mutex_unlock(&qemu_global_mutex);
774 static int all_vcpus_paused(void)
776 CPUState *penv = first_cpu;
779 if (!penv->stopped) {
782 penv = (CPUState *)penv->next_cpu;
788 void pause_all_vcpus(void)
790 CPUState *penv = first_cpu;
795 penv = (CPUState *)penv->next_cpu;
798 while (!all_vcpus_paused()) {
799 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
803 penv = (CPUState *)penv->next_cpu;
808 void resume_all_vcpus(void)
810 CPUState *penv = first_cpu;
816 penv = (CPUState *)penv->next_cpu;
820 static void qemu_tcg_init_vcpu(void *_env)
822 CPUState *env = _env;
824 /* share a single thread for all cpus with TCG */
825 if (!tcg_cpu_thread) {
826 env->thread = g_malloc0(sizeof(QemuThread));
827 env->halt_cond = g_malloc0(sizeof(QemuCond));
828 qemu_cond_init(env->halt_cond);
829 tcg_halt_cond = env->halt_cond;
830 qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env);
831 while (env->created == 0) {
832 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
834 tcg_cpu_thread = env->thread;
836 env->thread = tcg_cpu_thread;
837 env->halt_cond = tcg_halt_cond;
841 static void qemu_kvm_start_vcpu(CPUState *env)
843 env->thread = g_malloc0(sizeof(QemuThread));
844 env->halt_cond = g_malloc0(sizeof(QemuCond));
845 qemu_cond_init(env->halt_cond);
846 qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env);
847 while (env->created == 0) {
848 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
852 void qemu_init_vcpu(void *_env)
854 CPUState *env = _env;
856 env->nr_cores = smp_cores;
857 env->nr_threads = smp_threads;
860 qemu_kvm_start_vcpu(env);
862 qemu_tcg_init_vcpu(env);
866 void qemu_notify_event(void)
868 qemu_event_increment();
871 void cpu_stop_current(void)
873 if (cpu_single_env) {
874 cpu_single_env->stop = 0;
875 cpu_single_env->stopped = 1;
876 cpu_exit(cpu_single_env);
877 qemu_cond_signal(&qemu_pause_cond);
881 void vm_stop(RunState state)
883 if (!qemu_thread_is_self(&io_thread)) {
884 qemu_system_vmstop_request(state);
886 * FIXME: should not return to device code in case
887 * vm_stop() has been requested.
895 static int tcg_cpu_exec(CPUState *env)
898 #ifdef CONFIG_PROFILER
902 #ifdef CONFIG_PROFILER
903 ti = profile_getclock();
908 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
909 env->icount_decr.u16.low = 0;
910 env->icount_extra = 0;
911 count = qemu_icount_round(qemu_next_icount_deadline());
912 qemu_icount += count;
913 decr = (count > 0xffff) ? 0xffff : count;
915 env->icount_decr.u16.low = decr;
916 env->icount_extra = count;
919 #ifdef CONFIG_PROFILER
920 qemu_time += profile_getclock() - ti;
923 /* Fold pending instructions back into the
924 instruction counter, and clear the interrupt flag. */
925 qemu_icount -= (env->icount_decr.u16.low
926 + env->icount_extra);
927 env->icount_decr.u32 = 0;
928 env->icount_extra = 0;
933 bool cpu_exec_all(void)
937 /* Account partial waits to the vm_clock. */
938 qemu_clock_warp(vm_clock);
940 if (next_cpu == NULL) {
941 next_cpu = first_cpu;
943 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
944 CPUState *env = next_cpu;
946 qemu_clock_enable(vm_clock,
947 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
949 if (cpu_can_run(env)) {
951 r = kvm_cpu_exec(env);
952 qemu_kvm_eat_signals(env);
954 r = tcg_cpu_exec(env);
956 if (r == EXCP_DEBUG) {
957 cpu_handle_guest_debug(env);
960 } else if (env->stop || env->stopped) {
965 return !all_cpu_threads_idle();
968 void set_numa_modes(void)
973 for (env = first_cpu; env != NULL; env = env->next_cpu) {
974 for (i = 0; i < nb_numa_nodes; i++) {
975 if (node_cpumask[i] & (1 << env->cpu_index)) {
982 void set_cpu_log(const char *optarg)
985 const CPULogItem *item;
987 mask = cpu_str_to_log_mask(optarg);
989 printf("Log items (comma separated):\n");
990 for (item = cpu_log_items; item->mask != 0; item++) {
991 printf("%-10s %s\n", item->name, item->help);
998 void set_cpu_log_filename(const char *optarg)
1000 cpu_set_log_filename(optarg);
1003 /* Return the virtual CPU time, based on the instruction counter. */
1004 int64_t cpu_get_icount(void)
1007 CPUState *env = cpu_single_env;;
1009 icount = qemu_icount;
1011 if (!can_do_io(env)) {
1012 fprintf(stderr, "Bad clock read\n");
1014 icount -= (env->icount_decr.u16.low + env->icount_extra);
1016 return qemu_icount_bias + (icount << icount_time_shift);
1019 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1021 /* XXX: implement xxx_cpu_list for targets that still miss it */
1022 #if defined(cpu_list_id)
1023 cpu_list_id(f, cpu_fprintf, optarg);
1024 #elif defined(cpu_list)
1025 cpu_list(f, cpu_fprintf); /* deprecated */