4 * Copyright (c) 2003-2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* Needed early for CONFIG_BSD etc. */
26 #include "config-host.h"
38 #include <sys/prctl.h>
42 #define SIG_IPI (SIGRTMIN+4)
44 #define SIG_IPI SIGUSR1
48 #define PR_MCE_KILL 33
51 static CPUState *next_cpu;
53 /***********************************************************/
54 void hw_error(const char *fmt, ...)
60 fprintf(stderr, "qemu: hardware error: ");
61 vfprintf(stderr, fmt, ap);
62 fprintf(stderr, "\n");
63 for(env = first_cpu; env != NULL; env = env->next_cpu) {
64 fprintf(stderr, "CPU #%d:\n", env->cpu_index);
66 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
68 cpu_dump_state(env, stderr, fprintf, 0);
75 void cpu_synchronize_all_states(void)
79 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
80 cpu_synchronize_state(cpu);
84 void cpu_synchronize_all_post_reset(void)
88 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
89 cpu_synchronize_post_reset(cpu);
93 void cpu_synchronize_all_post_init(void)
97 for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
98 cpu_synchronize_post_init(cpu);
102 int cpu_is_stopped(CPUState *env)
104 return !vm_running || env->stopped;
107 static void do_vm_stop(int reason)
113 vm_state_notify(0, reason);
116 monitor_protocol_event(QEVENT_STOP, NULL);
120 static int cpu_can_run(CPUState *env)
124 if (env->stopped || !vm_running)
129 static int cpu_has_work(CPUState *env)
133 if (env->queued_work_first)
135 if (env->stopped || !vm_running)
139 if (qemu_cpu_has_work(env))
144 static int any_cpu_has_work(void)
148 for (env = first_cpu; env != NULL; env = env->next_cpu)
149 if (cpu_has_work(env))
154 static void cpu_debug_handler(CPUState *env)
156 gdb_set_stop_cpu(env);
157 debug_requested = EXCP_DEBUG;
162 static int io_thread_fd = -1;
164 static void qemu_event_increment(void)
166 /* Write 8 bytes to be compatible with eventfd. */
167 static const uint64_t val = 1;
170 if (io_thread_fd == -1)
174 ret = write(io_thread_fd, &val, sizeof(val));
175 } while (ret < 0 && errno == EINTR);
177 /* EAGAIN is fine, a read must be pending. */
178 if (ret < 0 && errno != EAGAIN) {
179 fprintf(stderr, "qemu_event_increment: write() filed: %s\n",
185 static void qemu_event_read(void *opaque)
187 int fd = (unsigned long)opaque;
191 /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
193 len = read(fd, buffer, sizeof(buffer));
194 } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
197 static int qemu_event_init(void)
202 err = qemu_eventfd(fds);
206 err = fcntl_setfl(fds[0], O_NONBLOCK);
210 err = fcntl_setfl(fds[1], O_NONBLOCK);
214 qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
215 (void *)(unsigned long)fds[0]);
217 io_thread_fd = fds[1];
226 static void dummy_signal(int sig)
230 /* If we have signalfd, we mask out the signals we want to handle and then
231 * use signalfd to listen for them. We rely on whatever the current signal
232 * handler is to dispatch the signals when we receive them.
234 static void sigfd_handler(void *opaque)
236 int fd = (unsigned long) opaque;
237 struct qemu_signalfd_siginfo info;
238 struct sigaction action;
243 len = read(fd, &info, sizeof(info));
244 } while (len == -1 && errno == EINTR);
246 if (len == -1 && errno == EAGAIN) {
250 if (len != sizeof(info)) {
251 printf("read from sigfd returned %zd: %m\n", len);
255 sigaction(info.ssi_signo, NULL, &action);
256 if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
257 action.sa_sigaction(info.ssi_signo,
258 (siginfo_t *)&info, NULL);
259 } else if (action.sa_handler) {
260 action.sa_handler(info.ssi_signo);
265 static int qemu_signalfd_init(sigset_t mask)
269 sigfd = qemu_signalfd(&mask);
271 fprintf(stderr, "failed to create signalfd\n");
275 fcntl_setfl(sigfd, O_NONBLOCK);
277 qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL,
278 (void *)(unsigned long) sigfd);
283 static void sigbus_reraise(void);
285 static void qemu_kvm_eat_signals(CPUState *env)
287 struct timespec ts = { 0, 0 };
293 sigemptyset(&waitset);
294 sigaddset(&waitset, SIG_IPI);
295 sigaddset(&waitset, SIGBUS);
298 r = sigtimedwait(&waitset, &siginfo, &ts);
299 if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
300 perror("sigtimedwait");
305 #ifdef CONFIG_IOTHREAD
307 if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
316 r = sigpending(&chkset);
318 perror("sigpending");
321 } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
323 #ifndef CONFIG_IOTHREAD
324 if (sigismember(&chkset, SIGIO) || sigismember(&chkset, SIGALRM)) {
332 HANDLE qemu_event_handle;
334 static void dummy_event_handler(void *opaque)
338 static int qemu_event_init(void)
340 qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
341 if (!qemu_event_handle) {
342 fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError());
345 qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
349 static void qemu_event_increment(void)
351 if (!SetEvent(qemu_event_handle)) {
352 fprintf(stderr, "qemu_event_increment: SetEvent failed: %ld\n",
358 static void qemu_kvm_eat_signals(CPUState *env)
363 #ifndef CONFIG_IOTHREAD
364 static void qemu_kvm_init_cpu_signals(CPUState *env)
369 struct sigaction sigact;
371 memset(&sigact, 0, sizeof(sigact));
372 sigact.sa_handler = dummy_signal;
373 sigaction(SIG_IPI, &sigact, NULL);
376 sigaddset(&set, SIG_IPI);
377 sigaddset(&set, SIGIO);
378 sigaddset(&set, SIGALRM);
379 pthread_sigmask(SIG_BLOCK, &set, NULL);
381 pthread_sigmask(SIG_BLOCK, NULL, &set);
382 sigdelset(&set, SIG_IPI);
383 sigdelset(&set, SIGBUS);
384 sigdelset(&set, SIGIO);
385 sigdelset(&set, SIGALRM);
386 r = kvm_set_signal_mask(env, &set);
388 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
395 static sigset_t block_synchronous_signals(void)
402 * We need to process timer signals synchronously to avoid a race
403 * between exit_request check and KVM vcpu entry.
405 sigaddset(&set, SIGIO);
406 sigaddset(&set, SIGALRM);
413 int qemu_init_main_loop(void)
416 sigset_t blocked_signals;
419 blocked_signals = block_synchronous_signals();
421 ret = qemu_signalfd_init(blocked_signals);
426 cpu_set_debug_excp_handler(cpu_debug_handler);
428 return qemu_event_init();
431 void qemu_main_loop_start(void)
435 void qemu_init_vcpu(void *_env)
437 CPUState *env = _env;
440 env->nr_cores = smp_cores;
441 env->nr_threads = smp_threads;
444 r = kvm_init_vcpu(env);
446 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
449 qemu_kvm_init_cpu_signals(env);
453 int qemu_cpu_self(void *env)
458 void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
463 void resume_all_vcpus(void)
467 void pause_all_vcpus(void)
471 void qemu_cpu_kick(void *env)
476 void qemu_notify_event(void)
478 CPUState *env = cpu_single_env;
480 qemu_event_increment ();
484 if (next_cpu && env != next_cpu) {
490 void qemu_mutex_lock_iothread(void) {}
491 void qemu_mutex_unlock_iothread(void) {}
493 void cpu_stop_current(void)
497 void vm_stop(int reason)
502 #else /* CONFIG_IOTHREAD */
504 #include "qemu-thread.h"
506 QemuMutex qemu_global_mutex;
507 static QemuMutex qemu_fair_mutex;
509 static QemuThread io_thread;
511 static QemuThread *tcg_cpu_thread;
512 static QemuCond *tcg_halt_cond;
514 static int qemu_system_ready;
516 static QemuCond qemu_cpu_cond;
518 static QemuCond qemu_system_cond;
519 static QemuCond qemu_pause_cond;
520 static QemuCond qemu_work_cond;
522 static void cpu_signal(int sig)
524 if (cpu_single_env) {
525 cpu_exit(cpu_single_env);
530 static void qemu_kvm_init_cpu_signals(CPUState *env)
534 struct sigaction sigact;
536 memset(&sigact, 0, sizeof(sigact));
537 sigact.sa_handler = dummy_signal;
538 sigaction(SIG_IPI, &sigact, NULL);
540 pthread_sigmask(SIG_BLOCK, NULL, &set);
541 sigdelset(&set, SIG_IPI);
542 sigdelset(&set, SIGBUS);
543 r = kvm_set_signal_mask(env, &set);
545 fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
550 static void qemu_tcg_init_cpu_signals(void)
553 struct sigaction sigact;
555 memset(&sigact, 0, sizeof(sigact));
556 sigact.sa_handler = cpu_signal;
557 sigaction(SIG_IPI, &sigact, NULL);
560 sigaddset(&set, SIG_IPI);
561 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
564 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
567 static sigset_t block_io_signals(void)
570 struct sigaction action;
572 /* SIGUSR2 used by posix-aio-compat.c */
574 sigaddset(&set, SIGUSR2);
575 pthread_sigmask(SIG_UNBLOCK, &set, NULL);
578 sigaddset(&set, SIGIO);
579 sigaddset(&set, SIGALRM);
580 sigaddset(&set, SIG_IPI);
581 sigaddset(&set, SIGBUS);
582 pthread_sigmask(SIG_BLOCK, &set, NULL);
584 memset(&action, 0, sizeof(action));
585 action.sa_flags = SA_SIGINFO;
586 action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
587 sigaction(SIGBUS, &action, NULL);
588 prctl(PR_MCE_KILL, 1, 1, 0, 0);
593 int qemu_init_main_loop(void)
596 sigset_t blocked_signals;
598 cpu_set_debug_excp_handler(cpu_debug_handler);
600 blocked_signals = block_io_signals();
602 ret = qemu_signalfd_init(blocked_signals);
606 /* Note eventfd must be drained before signalfd handlers run */
607 ret = qemu_event_init();
611 qemu_cond_init(&qemu_pause_cond);
612 qemu_cond_init(&qemu_system_cond);
613 qemu_mutex_init(&qemu_fair_mutex);
614 qemu_mutex_init(&qemu_global_mutex);
615 qemu_mutex_lock(&qemu_global_mutex);
617 qemu_thread_self(&io_thread);
622 void qemu_main_loop_start(void)
624 qemu_system_ready = 1;
625 qemu_cond_broadcast(&qemu_system_cond);
628 void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
630 struct qemu_work_item wi;
632 if (qemu_cpu_self(env)) {
639 if (!env->queued_work_first)
640 env->queued_work_first = &wi;
642 env->queued_work_last->next = &wi;
643 env->queued_work_last = &wi;
649 CPUState *self_env = cpu_single_env;
651 qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
652 cpu_single_env = self_env;
656 static void flush_queued_work(CPUState *env)
658 struct qemu_work_item *wi;
660 if (!env->queued_work_first)
663 while ((wi = env->queued_work_first)) {
664 env->queued_work_first = wi->next;
668 env->queued_work_last = NULL;
669 qemu_cond_broadcast(&qemu_work_cond);
672 static void qemu_wait_io_event_common(CPUState *env)
677 qemu_cond_signal(&qemu_pause_cond);
679 flush_queued_work(env);
680 env->thread_kicked = false;
683 static void qemu_tcg_wait_io_event(void)
687 while (!any_cpu_has_work())
688 qemu_cond_timedwait(tcg_halt_cond, &qemu_global_mutex, 1000);
690 qemu_mutex_unlock(&qemu_global_mutex);
693 * Users of qemu_global_mutex can be starved, having no chance
694 * to acquire it since this path will get to it first.
695 * So use another lock to provide fairness.
697 qemu_mutex_lock(&qemu_fair_mutex);
698 qemu_mutex_unlock(&qemu_fair_mutex);
700 qemu_mutex_lock(&qemu_global_mutex);
702 for (env = first_cpu; env != NULL; env = env->next_cpu) {
703 qemu_wait_io_event_common(env);
707 static void sigbus_reraise(void)
710 struct sigaction action;
712 memset(&action, 0, sizeof(action));
713 action.sa_handler = SIG_DFL;
714 if (!sigaction(SIGBUS, &action, NULL)) {
717 sigaddset(&set, SIGBUS);
718 sigprocmask(SIG_UNBLOCK, &set, NULL);
720 perror("Failed to re-raise SIGBUS!\n");
724 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
727 if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr)) {
732 static void qemu_kvm_wait_io_event(CPUState *env)
734 while (!cpu_has_work(env))
735 qemu_cond_timedwait(env->halt_cond, &qemu_global_mutex, 1000);
737 qemu_kvm_eat_signals(env);
738 qemu_wait_io_event_common(env);
741 static int qemu_cpu_exec(CPUState *env);
743 static void *kvm_cpu_thread_fn(void *arg)
748 qemu_mutex_lock(&qemu_global_mutex);
749 qemu_thread_self(env->thread);
751 r = kvm_init_vcpu(env);
753 fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
757 qemu_kvm_init_cpu_signals(env);
759 /* signal CPU creation */
761 qemu_cond_signal(&qemu_cpu_cond);
763 /* and wait for machine initialization */
764 while (!qemu_system_ready)
765 qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
768 if (cpu_can_run(env))
770 qemu_kvm_wait_io_event(env);
776 static void *tcg_cpu_thread_fn(void *arg)
780 qemu_tcg_init_cpu_signals();
781 qemu_thread_self(env->thread);
783 /* signal CPU creation */
784 qemu_mutex_lock(&qemu_global_mutex);
785 for (env = first_cpu; env != NULL; env = env->next_cpu)
787 qemu_cond_signal(&qemu_cpu_cond);
789 /* and wait for machine initialization */
790 while (!qemu_system_ready)
791 qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
795 qemu_tcg_wait_io_event();
801 void qemu_cpu_kick(void *_env)
803 CPUState *env = _env;
804 qemu_cond_broadcast(env->halt_cond);
805 if (!env->thread_kicked) {
806 qemu_thread_signal(env->thread, SIG_IPI);
807 env->thread_kicked = true;
811 int qemu_cpu_self(void *_env)
813 CPUState *env = _env;
816 qemu_thread_self(&this);
818 return qemu_thread_equal(&this, env->thread);
821 void qemu_mutex_lock_iothread(void)
824 qemu_mutex_lock(&qemu_global_mutex);
826 qemu_mutex_lock(&qemu_fair_mutex);
827 if (qemu_mutex_trylock(&qemu_global_mutex)) {
828 qemu_thread_signal(tcg_cpu_thread, SIG_IPI);
829 qemu_mutex_lock(&qemu_global_mutex);
831 qemu_mutex_unlock(&qemu_fair_mutex);
835 void qemu_mutex_unlock_iothread(void)
837 qemu_mutex_unlock(&qemu_global_mutex);
840 static int all_vcpus_paused(void)
842 CPUState *penv = first_cpu;
847 penv = (CPUState *)penv->next_cpu;
853 void pause_all_vcpus(void)
855 CPUState *penv = first_cpu;
860 penv = (CPUState *)penv->next_cpu;
863 while (!all_vcpus_paused()) {
864 qemu_cond_timedwait(&qemu_pause_cond, &qemu_global_mutex, 100);
868 penv = (CPUState *)penv->next_cpu;
873 void resume_all_vcpus(void)
875 CPUState *penv = first_cpu;
881 penv = (CPUState *)penv->next_cpu;
885 static void tcg_init_vcpu(void *_env)
887 CPUState *env = _env;
888 /* share a single thread for all cpus with TCG */
889 if (!tcg_cpu_thread) {
890 env->thread = qemu_mallocz(sizeof(QemuThread));
891 env->halt_cond = qemu_mallocz(sizeof(QemuCond));
892 qemu_cond_init(env->halt_cond);
893 qemu_thread_create(env->thread, tcg_cpu_thread_fn, env);
894 while (env->created == 0)
895 qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
896 tcg_cpu_thread = env->thread;
897 tcg_halt_cond = env->halt_cond;
899 env->thread = tcg_cpu_thread;
900 env->halt_cond = tcg_halt_cond;
904 static void kvm_start_vcpu(CPUState *env)
906 env->thread = qemu_mallocz(sizeof(QemuThread));
907 env->halt_cond = qemu_mallocz(sizeof(QemuCond));
908 qemu_cond_init(env->halt_cond);
909 qemu_thread_create(env->thread, kvm_cpu_thread_fn, env);
910 while (env->created == 0)
911 qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
914 void qemu_init_vcpu(void *_env)
916 CPUState *env = _env;
918 env->nr_cores = smp_cores;
919 env->nr_threads = smp_threads;
926 void qemu_notify_event(void)
928 qemu_event_increment();
931 static void qemu_system_vmstop_request(int reason)
933 vmstop_requested = reason;
937 void cpu_stop_current(void)
939 if (cpu_single_env) {
940 cpu_single_env->stopped = 1;
941 cpu_exit(cpu_single_env);
945 void vm_stop(int reason)
948 qemu_thread_self(&me);
950 if (!qemu_thread_equal(&me, &io_thread)) {
951 qemu_system_vmstop_request(reason);
953 * FIXME: should not return to device code in case
954 * vm_stop() has been requested.
964 static int qemu_cpu_exec(CPUState *env)
967 #ifdef CONFIG_PROFILER
971 #ifdef CONFIG_PROFILER
972 ti = profile_getclock();
977 qemu_icount -= (env->icount_decr.u16.low + env->icount_extra);
978 env->icount_decr.u16.low = 0;
979 env->icount_extra = 0;
980 count = qemu_icount_round (qemu_next_deadline());
981 qemu_icount += count;
982 decr = (count > 0xffff) ? 0xffff : count;
984 env->icount_decr.u16.low = decr;
985 env->icount_extra = count;
988 #ifdef CONFIG_PROFILER
989 qemu_time += profile_getclock() - ti;
992 /* Fold pending instructions back into the
993 instruction counter, and clear the interrupt flag. */
994 qemu_icount -= (env->icount_decr.u16.low
995 + env->icount_extra);
996 env->icount_decr.u32 = 0;
997 env->icount_extra = 0;
1002 bool cpu_exec_all(void)
1006 if (next_cpu == NULL)
1007 next_cpu = first_cpu;
1008 for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
1009 CPUState *env = next_cpu;
1011 qemu_clock_enable(vm_clock,
1012 (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
1014 if (qemu_alarm_pending())
1016 if (cpu_can_run(env)) {
1017 r = qemu_cpu_exec(env);
1018 if (kvm_enabled()) {
1019 qemu_kvm_eat_signals(env);
1021 if (r == EXCP_DEBUG) {
1024 } else if (env->stop) {
1029 return any_cpu_has_work();
1032 void set_numa_modes(void)
1037 for (env = first_cpu; env != NULL; env = env->next_cpu) {
1038 for (i = 0; i < nb_numa_nodes; i++) {
1039 if (node_cpumask[i] & (1 << env->cpu_index)) {
1046 void set_cpu_log(const char *optarg)
1049 const CPULogItem *item;
1051 mask = cpu_str_to_log_mask(optarg);
1053 printf("Log items (comma separated):\n");
1054 for (item = cpu_log_items; item->mask != 0; item++) {
1055 printf("%-10s %s\n", item->name, item->help);
1062 /* Return the virtual CPU time, based on the instruction counter. */
1063 int64_t cpu_get_icount(void)
1066 CPUState *env = cpu_single_env;;
1068 icount = qemu_icount;
1070 if (!can_do_io(env)) {
1071 fprintf(stderr, "Bad clock read\n");
1073 icount -= (env->icount_decr.u16.low + env->icount_extra);
1075 return qemu_icount_bias + (icount << icount_time_shift);
1078 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1080 /* XXX: implement xxx_cpu_list for targets that still miss it */
1081 #if defined(cpu_list_id)
1082 cpu_list_id(f, cpu_fprintf, optarg);
1083 #elif defined(cpu_list)
1084 cpu_list(f, cpu_fprintf); /* deprecated */