]> Git Repo - qemu.git/blob - cpus.c
Merge remote-tracking branch 'remotes/famz/tags/for-upstream' into staging
[qemu.git] / cpus.c
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "monitor/monitor.h"
30 #include "qapi/qmp/qerror.h"
31 #include "qemu/error-report.h"
32 #include "sysemu/sysemu.h"
33 #include "sysemu/block-backend.h"
34 #include "exec/gdbstub.h"
35 #include "sysemu/dma.h"
36 #include "sysemu/hw_accel.h"
37 #include "sysemu/kvm.h"
38 #include "sysemu/hax.h"
39 #include "qmp-commands.h"
40 #include "exec/exec-all.h"
41
42 #include "qemu/thread.h"
43 #include "sysemu/cpus.h"
44 #include "sysemu/qtest.h"
45 #include "qemu/main-loop.h"
46 #include "qemu/bitmap.h"
47 #include "qemu/seqlock.h"
48 #include "qapi-event.h"
49 #include "hw/nmi.h"
50 #include "sysemu/replay.h"
51
52 #ifndef _WIN32
53 #include "qemu/compatfd.h"
54 #endif
55
56 #ifdef CONFIG_LINUX
57
58 #include <sys/prctl.h>
59
60 #ifndef PR_MCE_KILL
61 #define PR_MCE_KILL 33
62 #endif
63
64 #ifndef PR_MCE_KILL_SET
65 #define PR_MCE_KILL_SET 1
66 #endif
67
68 #ifndef PR_MCE_KILL_EARLY
69 #define PR_MCE_KILL_EARLY 1
70 #endif
71
72 #endif /* CONFIG_LINUX */
73
74 int64_t max_delay;
75 int64_t max_advance;
76
77 /* vcpu throttling controls */
78 static QEMUTimer *throttle_timer;
79 static unsigned int throttle_percentage;
80
81 #define CPU_THROTTLE_PCT_MIN 1
82 #define CPU_THROTTLE_PCT_MAX 99
83 #define CPU_THROTTLE_TIMESLICE_NS 10000000
84
85 bool cpu_is_stopped(CPUState *cpu)
86 {
87     return cpu->stopped || !runstate_is_running();
88 }
89
90 static bool cpu_thread_is_idle(CPUState *cpu)
91 {
92     if (cpu->stop || cpu->queued_work_first) {
93         return false;
94     }
95     if (cpu_is_stopped(cpu)) {
96         return true;
97     }
98     if (!cpu->halted || cpu_has_work(cpu) ||
99         kvm_halt_in_kernel()) {
100         return false;
101     }
102     return true;
103 }
104
105 static bool all_cpu_threads_idle(void)
106 {
107     CPUState *cpu;
108
109     CPU_FOREACH(cpu) {
110         if (!cpu_thread_is_idle(cpu)) {
111             return false;
112         }
113     }
114     return true;
115 }
116
117 /***********************************************************/
118 /* guest cycle counter */
119
120 /* Protected by TimersState seqlock */
121
122 static bool icount_sleep = true;
123 static int64_t vm_clock_warp_start = -1;
124 /* Conversion factor from emulated instructions to virtual clock ticks.  */
125 static int icount_time_shift;
126 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
127 #define MAX_ICOUNT_SHIFT 10
128
129 static QEMUTimer *icount_rt_timer;
130 static QEMUTimer *icount_vm_timer;
131 static QEMUTimer *icount_warp_timer;
132
133 typedef struct TimersState {
134     /* Protected by BQL.  */
135     int64_t cpu_ticks_prev;
136     int64_t cpu_ticks_offset;
137
138     /* cpu_clock_offset can be read out of BQL, so protect it with
139      * this lock.
140      */
141     QemuSeqLock vm_clock_seqlock;
142     int64_t cpu_clock_offset;
143     int32_t cpu_ticks_enabled;
144     int64_t dummy;
145
146     /* Compensate for varying guest execution speed.  */
147     int64_t qemu_icount_bias;
148     /* Only written by TCG thread */
149     int64_t qemu_icount;
150 } TimersState;
151
152 static TimersState timers_state;
153
154 int64_t cpu_get_icount_raw(void)
155 {
156     int64_t icount;
157     CPUState *cpu = current_cpu;
158
159     icount = timers_state.qemu_icount;
160     if (cpu) {
161         if (!cpu->can_do_io) {
162             fprintf(stderr, "Bad icount read\n");
163             exit(1);
164         }
165         icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
166     }
167     return icount;
168 }
169
170 /* Return the virtual CPU time, based on the instruction counter.  */
171 static int64_t cpu_get_icount_locked(void)
172 {
173     int64_t icount = cpu_get_icount_raw();
174     return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
175 }
176
177 int64_t cpu_get_icount(void)
178 {
179     int64_t icount;
180     unsigned start;
181
182     do {
183         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
184         icount = cpu_get_icount_locked();
185     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
186
187     return icount;
188 }
189
190 int64_t cpu_icount_to_ns(int64_t icount)
191 {
192     return icount << icount_time_shift;
193 }
194
195 /* return the time elapsed in VM between vm_start and vm_stop.  Unless
196  * icount is active, cpu_get_ticks() uses units of the host CPU cycle
197  * counter.
198  *
199  * Caller must hold the BQL
200  */
201 int64_t cpu_get_ticks(void)
202 {
203     int64_t ticks;
204
205     if (use_icount) {
206         return cpu_get_icount();
207     }
208
209     ticks = timers_state.cpu_ticks_offset;
210     if (timers_state.cpu_ticks_enabled) {
211         ticks += cpu_get_host_ticks();
212     }
213
214     if (timers_state.cpu_ticks_prev > ticks) {
215         /* Note: non increasing ticks may happen if the host uses
216            software suspend */
217         timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
218         ticks = timers_state.cpu_ticks_prev;
219     }
220
221     timers_state.cpu_ticks_prev = ticks;
222     return ticks;
223 }
224
225 static int64_t cpu_get_clock_locked(void)
226 {
227     int64_t time;
228
229     time = timers_state.cpu_clock_offset;
230     if (timers_state.cpu_ticks_enabled) {
231         time += get_clock();
232     }
233
234     return time;
235 }
236
237 /* Return the monotonic time elapsed in VM, i.e.,
238  * the time between vm_start and vm_stop
239  */
240 int64_t cpu_get_clock(void)
241 {
242     int64_t ti;
243     unsigned start;
244
245     do {
246         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
247         ti = cpu_get_clock_locked();
248     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
249
250     return ti;
251 }
252
253 /* enable cpu_get_ticks()
254  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
255  */
256 void cpu_enable_ticks(void)
257 {
258     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
259     seqlock_write_begin(&timers_state.vm_clock_seqlock);
260     if (!timers_state.cpu_ticks_enabled) {
261         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
262         timers_state.cpu_clock_offset -= get_clock();
263         timers_state.cpu_ticks_enabled = 1;
264     }
265     seqlock_write_end(&timers_state.vm_clock_seqlock);
266 }
267
268 /* disable cpu_get_ticks() : the clock is stopped. You must not call
269  * cpu_get_ticks() after that.
270  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
271  */
272 void cpu_disable_ticks(void)
273 {
274     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
275     seqlock_write_begin(&timers_state.vm_clock_seqlock);
276     if (timers_state.cpu_ticks_enabled) {
277         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
278         timers_state.cpu_clock_offset = cpu_get_clock_locked();
279         timers_state.cpu_ticks_enabled = 0;
280     }
281     seqlock_write_end(&timers_state.vm_clock_seqlock);
282 }
283
284 /* Correlation between real and virtual time is always going to be
285    fairly approximate, so ignore small variation.
286    When the guest is idle real and virtual time will be aligned in
287    the IO wait loop.  */
288 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
289
290 static void icount_adjust(void)
291 {
292     int64_t cur_time;
293     int64_t cur_icount;
294     int64_t delta;
295
296     /* Protected by TimersState mutex.  */
297     static int64_t last_delta;
298
299     /* If the VM is not running, then do nothing.  */
300     if (!runstate_is_running()) {
301         return;
302     }
303
304     seqlock_write_begin(&timers_state.vm_clock_seqlock);
305     cur_time = cpu_get_clock_locked();
306     cur_icount = cpu_get_icount_locked();
307
308     delta = cur_icount - cur_time;
309     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
310     if (delta > 0
311         && last_delta + ICOUNT_WOBBLE < delta * 2
312         && icount_time_shift > 0) {
313         /* The guest is getting too far ahead.  Slow time down.  */
314         icount_time_shift--;
315     }
316     if (delta < 0
317         && last_delta - ICOUNT_WOBBLE > delta * 2
318         && icount_time_shift < MAX_ICOUNT_SHIFT) {
319         /* The guest is getting too far behind.  Speed time up.  */
320         icount_time_shift++;
321     }
322     last_delta = delta;
323     timers_state.qemu_icount_bias = cur_icount
324                               - (timers_state.qemu_icount << icount_time_shift);
325     seqlock_write_end(&timers_state.vm_clock_seqlock);
326 }
327
328 static void icount_adjust_rt(void *opaque)
329 {
330     timer_mod(icount_rt_timer,
331               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
332     icount_adjust();
333 }
334
335 static void icount_adjust_vm(void *opaque)
336 {
337     timer_mod(icount_vm_timer,
338                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
339                    NANOSECONDS_PER_SECOND / 10);
340     icount_adjust();
341 }
342
343 static int64_t qemu_icount_round(int64_t count)
344 {
345     return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
346 }
347
348 static void icount_warp_rt(void)
349 {
350     unsigned seq;
351     int64_t warp_start;
352
353     /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
354      * changes from -1 to another value, so the race here is okay.
355      */
356     do {
357         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
358         warp_start = vm_clock_warp_start;
359     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
360
361     if (warp_start == -1) {
362         return;
363     }
364
365     seqlock_write_begin(&timers_state.vm_clock_seqlock);
366     if (runstate_is_running()) {
367         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
368                                      cpu_get_clock_locked());
369         int64_t warp_delta;
370
371         warp_delta = clock - vm_clock_warp_start;
372         if (use_icount == 2) {
373             /*
374              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
375              * far ahead of real time.
376              */
377             int64_t cur_icount = cpu_get_icount_locked();
378             int64_t delta = clock - cur_icount;
379             warp_delta = MIN(warp_delta, delta);
380         }
381         timers_state.qemu_icount_bias += warp_delta;
382     }
383     vm_clock_warp_start = -1;
384     seqlock_write_end(&timers_state.vm_clock_seqlock);
385
386     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
387         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
388     }
389 }
390
391 static void icount_timer_cb(void *opaque)
392 {
393     /* No need for a checkpoint because the timer already synchronizes
394      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
395      */
396     icount_warp_rt();
397 }
398
399 void qtest_clock_warp(int64_t dest)
400 {
401     int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
402     AioContext *aio_context;
403     assert(qtest_enabled());
404     aio_context = qemu_get_aio_context();
405     while (clock < dest) {
406         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
407         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
408
409         seqlock_write_begin(&timers_state.vm_clock_seqlock);
410         timers_state.qemu_icount_bias += warp;
411         seqlock_write_end(&timers_state.vm_clock_seqlock);
412
413         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
414         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
415         clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
416     }
417     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
418 }
419
420 void qemu_start_warp_timer(void)
421 {
422     int64_t clock;
423     int64_t deadline;
424
425     if (!use_icount) {
426         return;
427     }
428
429     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
430      * do not fire, so computing the deadline does not make sense.
431      */
432     if (!runstate_is_running()) {
433         return;
434     }
435
436     /* warp clock deterministically in record/replay mode */
437     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
438         return;
439     }
440
441     if (!all_cpu_threads_idle()) {
442         return;
443     }
444
445     if (qtest_enabled()) {
446         /* When testing, qtest commands advance icount.  */
447         return;
448     }
449
450     /* We want to use the earliest deadline from ALL vm_clocks */
451     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
452     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
453     if (deadline < 0) {
454         static bool notified;
455         if (!icount_sleep && !notified) {
456             error_report("WARNING: icount sleep disabled and no active timers");
457             notified = true;
458         }
459         return;
460     }
461
462     if (deadline > 0) {
463         /*
464          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
465          * sleep.  Otherwise, the CPU might be waiting for a future timer
466          * interrupt to wake it up, but the interrupt never comes because
467          * the vCPU isn't running any insns and thus doesn't advance the
468          * QEMU_CLOCK_VIRTUAL.
469          */
470         if (!icount_sleep) {
471             /*
472              * We never let VCPUs sleep in no sleep icount mode.
473              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
474              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
475              * It is useful when we want a deterministic execution time,
476              * isolated from host latencies.
477              */
478             seqlock_write_begin(&timers_state.vm_clock_seqlock);
479             timers_state.qemu_icount_bias += deadline;
480             seqlock_write_end(&timers_state.vm_clock_seqlock);
481             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
482         } else {
483             /*
484              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
485              * "real" time, (related to the time left until the next event) has
486              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
487              * This avoids that the warps are visible externally; for example,
488              * you will not be sending network packets continuously instead of
489              * every 100ms.
490              */
491             seqlock_write_begin(&timers_state.vm_clock_seqlock);
492             if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
493                 vm_clock_warp_start = clock;
494             }
495             seqlock_write_end(&timers_state.vm_clock_seqlock);
496             timer_mod_anticipate(icount_warp_timer, clock + deadline);
497         }
498     } else if (deadline == 0) {
499         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
500     }
501 }
502
503 static void qemu_account_warp_timer(void)
504 {
505     if (!use_icount || !icount_sleep) {
506         return;
507     }
508
509     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
510      * do not fire, so computing the deadline does not make sense.
511      */
512     if (!runstate_is_running()) {
513         return;
514     }
515
516     /* warp clock deterministically in record/replay mode */
517     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
518         return;
519     }
520
521     timer_del(icount_warp_timer);
522     icount_warp_rt();
523 }
524
525 static bool icount_state_needed(void *opaque)
526 {
527     return use_icount;
528 }
529
530 /*
531  * This is a subsection for icount migration.
532  */
533 static const VMStateDescription icount_vmstate_timers = {
534     .name = "timer/icount",
535     .version_id = 1,
536     .minimum_version_id = 1,
537     .needed = icount_state_needed,
538     .fields = (VMStateField[]) {
539         VMSTATE_INT64(qemu_icount_bias, TimersState),
540         VMSTATE_INT64(qemu_icount, TimersState),
541         VMSTATE_END_OF_LIST()
542     }
543 };
544
545 static const VMStateDescription vmstate_timers = {
546     .name = "timer",
547     .version_id = 2,
548     .minimum_version_id = 1,
549     .fields = (VMStateField[]) {
550         VMSTATE_INT64(cpu_ticks_offset, TimersState),
551         VMSTATE_INT64(dummy, TimersState),
552         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
553         VMSTATE_END_OF_LIST()
554     },
555     .subsections = (const VMStateDescription*[]) {
556         &icount_vmstate_timers,
557         NULL
558     }
559 };
560
561 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
562 {
563     double pct;
564     double throttle_ratio;
565     long sleeptime_ns;
566
567     if (!cpu_throttle_get_percentage()) {
568         return;
569     }
570
571     pct = (double)cpu_throttle_get_percentage()/100;
572     throttle_ratio = pct / (1 - pct);
573     sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
574
575     qemu_mutex_unlock_iothread();
576     atomic_set(&cpu->throttle_thread_scheduled, 0);
577     g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
578     qemu_mutex_lock_iothread();
579 }
580
581 static void cpu_throttle_timer_tick(void *opaque)
582 {
583     CPUState *cpu;
584     double pct;
585
586     /* Stop the timer if needed */
587     if (!cpu_throttle_get_percentage()) {
588         return;
589     }
590     CPU_FOREACH(cpu) {
591         if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
592             async_run_on_cpu(cpu, cpu_throttle_thread,
593                              RUN_ON_CPU_NULL);
594         }
595     }
596
597     pct = (double)cpu_throttle_get_percentage()/100;
598     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
599                                    CPU_THROTTLE_TIMESLICE_NS / (1-pct));
600 }
601
602 void cpu_throttle_set(int new_throttle_pct)
603 {
604     /* Ensure throttle percentage is within valid range */
605     new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
606     new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
607
608     atomic_set(&throttle_percentage, new_throttle_pct);
609
610     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
611                                        CPU_THROTTLE_TIMESLICE_NS);
612 }
613
614 void cpu_throttle_stop(void)
615 {
616     atomic_set(&throttle_percentage, 0);
617 }
618
619 bool cpu_throttle_active(void)
620 {
621     return (cpu_throttle_get_percentage() != 0);
622 }
623
624 int cpu_throttle_get_percentage(void)
625 {
626     return atomic_read(&throttle_percentage);
627 }
628
629 void cpu_ticks_init(void)
630 {
631     seqlock_init(&timers_state.vm_clock_seqlock);
632     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
633     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
634                                            cpu_throttle_timer_tick, NULL);
635 }
636
637 void configure_icount(QemuOpts *opts, Error **errp)
638 {
639     const char *option;
640     char *rem_str = NULL;
641
642     option = qemu_opt_get(opts, "shift");
643     if (!option) {
644         if (qemu_opt_get(opts, "align") != NULL) {
645             error_setg(errp, "Please specify shift option when using align");
646         }
647         return;
648     }
649
650     icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
651     if (icount_sleep) {
652         icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
653                                          icount_timer_cb, NULL);
654     }
655
656     icount_align_option = qemu_opt_get_bool(opts, "align", false);
657
658     if (icount_align_option && !icount_sleep) {
659         error_setg(errp, "align=on and sleep=off are incompatible");
660     }
661     if (strcmp(option, "auto") != 0) {
662         errno = 0;
663         icount_time_shift = strtol(option, &rem_str, 0);
664         if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
665             error_setg(errp, "icount: Invalid shift value");
666         }
667         use_icount = 1;
668         return;
669     } else if (icount_align_option) {
670         error_setg(errp, "shift=auto and align=on are incompatible");
671     } else if (!icount_sleep) {
672         error_setg(errp, "shift=auto and sleep=off are incompatible");
673     }
674
675     use_icount = 2;
676
677     /* 125MIPS seems a reasonable initial guess at the guest speed.
678        It will be corrected fairly quickly anyway.  */
679     icount_time_shift = 3;
680
681     /* Have both realtime and virtual time triggers for speed adjustment.
682        The realtime trigger catches emulated time passing too slowly,
683        the virtual time trigger catches emulated time passing too fast.
684        Realtime triggers occur even when idle, so use them less frequently
685        than VM triggers.  */
686     icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
687                                    icount_adjust_rt, NULL);
688     timer_mod(icount_rt_timer,
689                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
690     icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
691                                         icount_adjust_vm, NULL);
692     timer_mod(icount_vm_timer,
693                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
694                    NANOSECONDS_PER_SECOND / 10);
695 }
696
697 /***********************************************************/
698 void hw_error(const char *fmt, ...)
699 {
700     va_list ap;
701     CPUState *cpu;
702
703     va_start(ap, fmt);
704     fprintf(stderr, "qemu: hardware error: ");
705     vfprintf(stderr, fmt, ap);
706     fprintf(stderr, "\n");
707     CPU_FOREACH(cpu) {
708         fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
709         cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
710     }
711     va_end(ap);
712     abort();
713 }
714
715 void cpu_synchronize_all_states(void)
716 {
717     CPUState *cpu;
718
719     CPU_FOREACH(cpu) {
720         cpu_synchronize_state(cpu);
721     }
722 }
723
724 void cpu_synchronize_all_post_reset(void)
725 {
726     CPUState *cpu;
727
728     CPU_FOREACH(cpu) {
729         cpu_synchronize_post_reset(cpu);
730     }
731 }
732
733 void cpu_synchronize_all_post_init(void)
734 {
735     CPUState *cpu;
736
737     CPU_FOREACH(cpu) {
738         cpu_synchronize_post_init(cpu);
739     }
740 }
741
742 static int do_vm_stop(RunState state)
743 {
744     int ret = 0;
745
746     if (runstate_is_running()) {
747         cpu_disable_ticks();
748         pause_all_vcpus();
749         runstate_set(state);
750         vm_state_notify(0, state);
751         qapi_event_send_stop(&error_abort);
752     }
753
754     bdrv_drain_all();
755     replay_disable_events();
756     ret = bdrv_flush_all();
757
758     return ret;
759 }
760
761 static bool cpu_can_run(CPUState *cpu)
762 {
763     if (cpu->stop) {
764         return false;
765     }
766     if (cpu_is_stopped(cpu)) {
767         return false;
768     }
769     return true;
770 }
771
772 static void cpu_handle_guest_debug(CPUState *cpu)
773 {
774     gdb_set_stop_cpu(cpu);
775     qemu_system_debug_request();
776     cpu->stopped = true;
777 }
778
779 #ifdef CONFIG_LINUX
780 static void sigbus_reraise(void)
781 {
782     sigset_t set;
783     struct sigaction action;
784
785     memset(&action, 0, sizeof(action));
786     action.sa_handler = SIG_DFL;
787     if (!sigaction(SIGBUS, &action, NULL)) {
788         raise(SIGBUS);
789         sigemptyset(&set);
790         sigaddset(&set, SIGBUS);
791         pthread_sigmask(SIG_UNBLOCK, &set, NULL);
792     }
793     perror("Failed to re-raise SIGBUS!\n");
794     abort();
795 }
796
797 static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
798                            void *ctx)
799 {
800     if (kvm_on_sigbus(siginfo->ssi_code,
801                       (void *)(intptr_t)siginfo->ssi_addr)) {
802         sigbus_reraise();
803     }
804 }
805
806 static void qemu_init_sigbus(void)
807 {
808     struct sigaction action;
809
810     memset(&action, 0, sizeof(action));
811     action.sa_flags = SA_SIGINFO;
812     action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
813     sigaction(SIGBUS, &action, NULL);
814
815     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
816 }
817
818 static void qemu_kvm_eat_signals(CPUState *cpu)
819 {
820     struct timespec ts = { 0, 0 };
821     siginfo_t siginfo;
822     sigset_t waitset;
823     sigset_t chkset;
824     int r;
825
826     sigemptyset(&waitset);
827     sigaddset(&waitset, SIG_IPI);
828     sigaddset(&waitset, SIGBUS);
829
830     do {
831         r = sigtimedwait(&waitset, &siginfo, &ts);
832         if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
833             perror("sigtimedwait");
834             exit(1);
835         }
836
837         switch (r) {
838         case SIGBUS:
839             if (kvm_on_sigbus_vcpu(cpu, siginfo.si_code, siginfo.si_addr)) {
840                 sigbus_reraise();
841             }
842             break;
843         default:
844             break;
845         }
846
847         r = sigpending(&chkset);
848         if (r == -1) {
849             perror("sigpending");
850             exit(1);
851         }
852     } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
853 }
854
855 #else /* !CONFIG_LINUX */
856
857 static void qemu_init_sigbus(void)
858 {
859 }
860
861 static void qemu_kvm_eat_signals(CPUState *cpu)
862 {
863 }
864 #endif /* !CONFIG_LINUX */
865
866 #ifndef _WIN32
867 static void dummy_signal(int sig)
868 {
869 }
870
871 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
872 {
873     int r;
874     sigset_t set;
875     struct sigaction sigact;
876
877     memset(&sigact, 0, sizeof(sigact));
878     sigact.sa_handler = dummy_signal;
879     sigaction(SIG_IPI, &sigact, NULL);
880
881     pthread_sigmask(SIG_BLOCK, NULL, &set);
882     sigdelset(&set, SIG_IPI);
883     sigdelset(&set, SIGBUS);
884     r = kvm_set_signal_mask(cpu, &set);
885     if (r) {
886         fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
887         exit(1);
888     }
889 }
890
891 #else /* _WIN32 */
892 static void qemu_kvm_init_cpu_signals(CPUState *cpu)
893 {
894     abort();
895 }
896 #endif /* _WIN32 */
897
898 static QemuMutex qemu_global_mutex;
899 static QemuCond qemu_io_proceeded_cond;
900 static unsigned iothread_requesting_mutex;
901
902 static QemuThread io_thread;
903
904 /* cpu creation */
905 static QemuCond qemu_cpu_cond;
906 /* system init */
907 static QemuCond qemu_pause_cond;
908
909 void qemu_init_cpu_loop(void)
910 {
911     qemu_init_sigbus();
912     qemu_cond_init(&qemu_cpu_cond);
913     qemu_cond_init(&qemu_pause_cond);
914     qemu_cond_init(&qemu_io_proceeded_cond);
915     qemu_mutex_init(&qemu_global_mutex);
916
917     qemu_thread_get_self(&io_thread);
918 }
919
920 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
921 {
922     do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
923 }
924
925 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
926 {
927     if (kvm_destroy_vcpu(cpu) < 0) {
928         error_report("kvm_destroy_vcpu failed");
929         exit(EXIT_FAILURE);
930     }
931 }
932
933 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
934 {
935 }
936
937 static void qemu_wait_io_event_common(CPUState *cpu)
938 {
939     if (cpu->stop) {
940         cpu->stop = false;
941         cpu->stopped = true;
942         qemu_cond_broadcast(&qemu_pause_cond);
943     }
944     process_queued_cpu_work(cpu);
945     cpu->thread_kicked = false;
946 }
947
948 static void qemu_tcg_wait_io_event(CPUState *cpu)
949 {
950     while (all_cpu_threads_idle()) {
951         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
952     }
953
954     while (iothread_requesting_mutex) {
955         qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
956     }
957
958     CPU_FOREACH(cpu) {
959         qemu_wait_io_event_common(cpu);
960     }
961 }
962
963 static void qemu_kvm_wait_io_event(CPUState *cpu)
964 {
965     while (cpu_thread_is_idle(cpu)) {
966         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
967     }
968
969     qemu_kvm_eat_signals(cpu);
970     qemu_wait_io_event_common(cpu);
971 }
972
973 static void *qemu_kvm_cpu_thread_fn(void *arg)
974 {
975     CPUState *cpu = arg;
976     int r;
977
978     rcu_register_thread();
979
980     qemu_mutex_lock_iothread();
981     qemu_thread_get_self(cpu->thread);
982     cpu->thread_id = qemu_get_thread_id();
983     cpu->can_do_io = 1;
984     current_cpu = cpu;
985
986     r = kvm_init_vcpu(cpu);
987     if (r < 0) {
988         fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
989         exit(1);
990     }
991
992     qemu_kvm_init_cpu_signals(cpu);
993
994     /* signal CPU creation */
995     cpu->created = true;
996     qemu_cond_signal(&qemu_cpu_cond);
997
998     do {
999         if (cpu_can_run(cpu)) {
1000             r = kvm_cpu_exec(cpu);
1001             if (r == EXCP_DEBUG) {
1002                 cpu_handle_guest_debug(cpu);
1003             }
1004         }
1005         qemu_kvm_wait_io_event(cpu);
1006     } while (!cpu->unplug || cpu_can_run(cpu));
1007
1008     qemu_kvm_destroy_vcpu(cpu);
1009     cpu->created = false;
1010     qemu_cond_signal(&qemu_cpu_cond);
1011     qemu_mutex_unlock_iothread();
1012     return NULL;
1013 }
1014
1015 static void *qemu_dummy_cpu_thread_fn(void *arg)
1016 {
1017 #ifdef _WIN32
1018     fprintf(stderr, "qtest is not supported under Windows\n");
1019     exit(1);
1020 #else
1021     CPUState *cpu = arg;
1022     sigset_t waitset;
1023     int r;
1024
1025     rcu_register_thread();
1026
1027     qemu_mutex_lock_iothread();
1028     qemu_thread_get_self(cpu->thread);
1029     cpu->thread_id = qemu_get_thread_id();
1030     cpu->can_do_io = 1;
1031
1032     sigemptyset(&waitset);
1033     sigaddset(&waitset, SIG_IPI);
1034
1035     /* signal CPU creation */
1036     cpu->created = true;
1037     qemu_cond_signal(&qemu_cpu_cond);
1038
1039     current_cpu = cpu;
1040     while (1) {
1041         current_cpu = NULL;
1042         qemu_mutex_unlock_iothread();
1043         do {
1044             int sig;
1045             r = sigwait(&waitset, &sig);
1046         } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1047         if (r == -1) {
1048             perror("sigwait");
1049             exit(1);
1050         }
1051         qemu_mutex_lock_iothread();
1052         current_cpu = cpu;
1053         qemu_wait_io_event_common(cpu);
1054     }
1055
1056     return NULL;
1057 #endif
1058 }
1059
1060 static int64_t tcg_get_icount_limit(void)
1061 {
1062     int64_t deadline;
1063
1064     if (replay_mode != REPLAY_MODE_PLAY) {
1065         deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1066
1067         /* Maintain prior (possibly buggy) behaviour where if no deadline
1068          * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1069          * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1070          * nanoseconds.
1071          */
1072         if ((deadline < 0) || (deadline > INT32_MAX)) {
1073             deadline = INT32_MAX;
1074         }
1075
1076         return qemu_icount_round(deadline);
1077     } else {
1078         return replay_get_instructions();
1079     }
1080 }
1081
1082 static void handle_icount_deadline(void)
1083 {
1084     if (use_icount) {
1085         int64_t deadline =
1086             qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1087
1088         if (deadline == 0) {
1089             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1090         }
1091     }
1092 }
1093
1094 static int tcg_cpu_exec(CPUState *cpu)
1095 {
1096     int ret;
1097 #ifdef CONFIG_PROFILER
1098     int64_t ti;
1099 #endif
1100
1101 #ifdef CONFIG_PROFILER
1102     ti = profile_getclock();
1103 #endif
1104     if (use_icount) {
1105         int64_t count;
1106         int decr;
1107         timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1108                                     + cpu->icount_extra);
1109         cpu->icount_decr.u16.low = 0;
1110         cpu->icount_extra = 0;
1111         count = tcg_get_icount_limit();
1112         timers_state.qemu_icount += count;
1113         decr = (count > 0xffff) ? 0xffff : count;
1114         count -= decr;
1115         cpu->icount_decr.u16.low = decr;
1116         cpu->icount_extra = count;
1117     }
1118     cpu_exec_start(cpu);
1119     ret = cpu_exec(cpu);
1120     cpu_exec_end(cpu);
1121 #ifdef CONFIG_PROFILER
1122     tcg_time += profile_getclock() - ti;
1123 #endif
1124     if (use_icount) {
1125         /* Fold pending instructions back into the
1126            instruction counter, and clear the interrupt flag.  */
1127         timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1128                         + cpu->icount_extra);
1129         cpu->icount_decr.u32 = 0;
1130         cpu->icount_extra = 0;
1131         replay_account_executed_instructions();
1132     }
1133     return ret;
1134 }
1135
1136 /* Destroy any remaining vCPUs which have been unplugged and have
1137  * finished running
1138  */
1139 static void deal_with_unplugged_cpus(void)
1140 {
1141     CPUState *cpu;
1142
1143     CPU_FOREACH(cpu) {
1144         if (cpu->unplug && !cpu_can_run(cpu)) {
1145             qemu_tcg_destroy_vcpu(cpu);
1146             cpu->created = false;
1147             qemu_cond_signal(&qemu_cpu_cond);
1148             break;
1149         }
1150     }
1151 }
1152
1153 static void *qemu_tcg_cpu_thread_fn(void *arg)
1154 {
1155     CPUState *cpu = arg;
1156
1157     rcu_register_thread();
1158
1159     qemu_mutex_lock_iothread();
1160     qemu_thread_get_self(cpu->thread);
1161
1162     CPU_FOREACH(cpu) {
1163         cpu->thread_id = qemu_get_thread_id();
1164         cpu->created = true;
1165         cpu->can_do_io = 1;
1166     }
1167     qemu_cond_signal(&qemu_cpu_cond);
1168
1169     /* wait for initial kick-off after machine start */
1170     while (first_cpu->stopped) {
1171         qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1172
1173         /* process any pending work */
1174         CPU_FOREACH(cpu) {
1175             qemu_wait_io_event_common(cpu);
1176         }
1177     }
1178
1179     /* process any pending work */
1180     atomic_mb_set(&exit_request, 1);
1181
1182     cpu = first_cpu;
1183
1184     while (1) {
1185         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1186         qemu_account_warp_timer();
1187
1188         if (!cpu) {
1189             cpu = first_cpu;
1190         }
1191
1192         for (; cpu != NULL && !exit_request; cpu = CPU_NEXT(cpu)) {
1193
1194             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1195                               (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1196
1197             if (cpu_can_run(cpu)) {
1198                 int r;
1199                 r = tcg_cpu_exec(cpu);
1200                 if (r == EXCP_DEBUG) {
1201                     cpu_handle_guest_debug(cpu);
1202                     break;
1203                 }
1204             } else if (cpu->stop || cpu->stopped) {
1205                 if (cpu->unplug) {
1206                     cpu = CPU_NEXT(cpu);
1207                 }
1208                 break;
1209             }
1210
1211         } /* for cpu.. */
1212
1213         /* Pairs with smp_wmb in qemu_cpu_kick.  */
1214         atomic_mb_set(&exit_request, 0);
1215
1216         handle_icount_deadline();
1217
1218         qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
1219         deal_with_unplugged_cpus();
1220     }
1221
1222     return NULL;
1223 }
1224
1225 static void *qemu_hax_cpu_thread_fn(void *arg)
1226 {
1227     CPUState *cpu = arg;
1228     int r;
1229     qemu_thread_get_self(cpu->thread);
1230     qemu_mutex_lock(&qemu_global_mutex);
1231
1232     cpu->thread_id = qemu_get_thread_id();
1233     cpu->created = true;
1234     cpu->halted = 0;
1235     current_cpu = cpu;
1236
1237     hax_init_vcpu(cpu);
1238     qemu_cond_signal(&qemu_cpu_cond);
1239
1240     while (1) {
1241         if (cpu_can_run(cpu)) {
1242             r = hax_smp_cpu_exec(cpu);
1243             if (r == EXCP_DEBUG) {
1244                 cpu_handle_guest_debug(cpu);
1245             }
1246         }
1247
1248         while (cpu_thread_is_idle(cpu)) {
1249             qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1250         }
1251 #ifdef _WIN32
1252         SleepEx(0, TRUE);
1253 #endif
1254         qemu_wait_io_event_common(cpu);
1255     }
1256     return NULL;
1257 }
1258
1259 #ifdef _WIN32
1260 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1261 {
1262 }
1263 #endif
1264
1265 static void qemu_cpu_kick_thread(CPUState *cpu)
1266 {
1267 #ifndef _WIN32
1268     int err;
1269
1270     if (cpu->thread_kicked) {
1271         return;
1272     }
1273     cpu->thread_kicked = true;
1274     err = pthread_kill(cpu->thread->thread, SIG_IPI);
1275     if (err) {
1276         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1277         exit(1);
1278     }
1279 #else /* _WIN32 */
1280     if (!qemu_cpu_is_self(cpu)) {
1281         if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1282             fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1283                     __func__, GetLastError());
1284             exit(1);
1285         }
1286     }
1287 #endif
1288 }
1289
1290 static void qemu_cpu_kick_no_halt(void)
1291 {
1292     CPUState *cpu;
1293     /* Ensure whatever caused the exit has reached the CPU threads before
1294      * writing exit_request.
1295      */
1296     atomic_mb_set(&exit_request, 1);
1297     cpu = atomic_mb_read(&tcg_current_cpu);
1298     if (cpu) {
1299         cpu_exit(cpu);
1300     }
1301 }
1302
1303 void qemu_cpu_kick(CPUState *cpu)
1304 {
1305     qemu_cond_broadcast(cpu->halt_cond);
1306     if (tcg_enabled()) {
1307         qemu_cpu_kick_no_halt();
1308     } else {
1309         if (hax_enabled()) {
1310             /*
1311              * FIXME: race condition with the exit_request check in
1312              * hax_vcpu_hax_exec
1313              */
1314             cpu->exit_request = 1;
1315         }
1316         qemu_cpu_kick_thread(cpu);
1317     }
1318 }
1319
1320 void qemu_cpu_kick_self(void)
1321 {
1322     assert(current_cpu);
1323     qemu_cpu_kick_thread(current_cpu);
1324 }
1325
1326 bool qemu_cpu_is_self(CPUState *cpu)
1327 {
1328     return qemu_thread_is_self(cpu->thread);
1329 }
1330
1331 bool qemu_in_vcpu_thread(void)
1332 {
1333     return current_cpu && qemu_cpu_is_self(current_cpu);
1334 }
1335
1336 static __thread bool iothread_locked = false;
1337
1338 bool qemu_mutex_iothread_locked(void)
1339 {
1340     return iothread_locked;
1341 }
1342
1343 void qemu_mutex_lock_iothread(void)
1344 {
1345     atomic_inc(&iothread_requesting_mutex);
1346     /* In the simple case there is no need to bump the VCPU thread out of
1347      * TCG code execution.
1348      */
1349     if (!tcg_enabled() || qemu_in_vcpu_thread() ||
1350         !first_cpu || !first_cpu->created) {
1351         qemu_mutex_lock(&qemu_global_mutex);
1352         atomic_dec(&iothread_requesting_mutex);
1353     } else {
1354         if (qemu_mutex_trylock(&qemu_global_mutex)) {
1355             qemu_cpu_kick_no_halt();
1356             qemu_mutex_lock(&qemu_global_mutex);
1357         }
1358         atomic_dec(&iothread_requesting_mutex);
1359         qemu_cond_broadcast(&qemu_io_proceeded_cond);
1360     }
1361     iothread_locked = true;
1362 }
1363
1364 void qemu_mutex_unlock_iothread(void)
1365 {
1366     iothread_locked = false;
1367     qemu_mutex_unlock(&qemu_global_mutex);
1368 }
1369
1370 static bool all_vcpus_paused(void)
1371 {
1372     CPUState *cpu;
1373
1374     CPU_FOREACH(cpu) {
1375         if (!cpu->stopped) {
1376             return false;
1377         }
1378     }
1379
1380     return true;
1381 }
1382
1383 void pause_all_vcpus(void)
1384 {
1385     CPUState *cpu;
1386
1387     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1388     CPU_FOREACH(cpu) {
1389         cpu->stop = true;
1390         qemu_cpu_kick(cpu);
1391     }
1392
1393     if (qemu_in_vcpu_thread()) {
1394         cpu_stop_current();
1395         if (!kvm_enabled()) {
1396             CPU_FOREACH(cpu) {
1397                 cpu->stop = false;
1398                 cpu->stopped = true;
1399             }
1400             return;
1401         }
1402     }
1403
1404     while (!all_vcpus_paused()) {
1405         qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1406         CPU_FOREACH(cpu) {
1407             qemu_cpu_kick(cpu);
1408         }
1409     }
1410 }
1411
1412 void cpu_resume(CPUState *cpu)
1413 {
1414     cpu->stop = false;
1415     cpu->stopped = false;
1416     qemu_cpu_kick(cpu);
1417 }
1418
1419 void resume_all_vcpus(void)
1420 {
1421     CPUState *cpu;
1422
1423     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1424     CPU_FOREACH(cpu) {
1425         cpu_resume(cpu);
1426     }
1427 }
1428
1429 void cpu_remove(CPUState *cpu)
1430 {
1431     cpu->stop = true;
1432     cpu->unplug = true;
1433     qemu_cpu_kick(cpu);
1434 }
1435
1436 void cpu_remove_sync(CPUState *cpu)
1437 {
1438     cpu_remove(cpu);
1439     while (cpu->created) {
1440         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1441     }
1442 }
1443
1444 /* For temporary buffers for forming a name */
1445 #define VCPU_THREAD_NAME_SIZE 16
1446
1447 static void qemu_tcg_init_vcpu(CPUState *cpu)
1448 {
1449     char thread_name[VCPU_THREAD_NAME_SIZE];
1450     static QemuCond *tcg_halt_cond;
1451     static QemuThread *tcg_cpu_thread;
1452
1453     /* share a single thread for all cpus with TCG */
1454     if (!tcg_cpu_thread) {
1455         cpu->thread = g_malloc0(sizeof(QemuThread));
1456         cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1457         qemu_cond_init(cpu->halt_cond);
1458         tcg_halt_cond = cpu->halt_cond;
1459         snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1460                  cpu->cpu_index);
1461         qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1462                            cpu, QEMU_THREAD_JOINABLE);
1463 #ifdef _WIN32
1464         cpu->hThread = qemu_thread_get_handle(cpu->thread);
1465 #endif
1466         while (!cpu->created) {
1467             qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1468         }
1469         tcg_cpu_thread = cpu->thread;
1470     } else {
1471         cpu->thread = tcg_cpu_thread;
1472         cpu->halt_cond = tcg_halt_cond;
1473     }
1474 }
1475
1476 static void qemu_hax_start_vcpu(CPUState *cpu)
1477 {
1478     char thread_name[VCPU_THREAD_NAME_SIZE];
1479
1480     cpu->thread = g_malloc0(sizeof(QemuThread));
1481     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1482     qemu_cond_init(cpu->halt_cond);
1483
1484     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1485              cpu->cpu_index);
1486     qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1487                        cpu, QEMU_THREAD_JOINABLE);
1488 #ifdef _WIN32
1489     cpu->hThread = qemu_thread_get_handle(cpu->thread);
1490 #endif
1491     while (!cpu->created) {
1492         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1493     }
1494 }
1495
1496 static void qemu_kvm_start_vcpu(CPUState *cpu)
1497 {
1498     char thread_name[VCPU_THREAD_NAME_SIZE];
1499
1500     cpu->thread = g_malloc0(sizeof(QemuThread));
1501     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1502     qemu_cond_init(cpu->halt_cond);
1503     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1504              cpu->cpu_index);
1505     qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1506                        cpu, QEMU_THREAD_JOINABLE);
1507     while (!cpu->created) {
1508         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1509     }
1510 }
1511
1512 static void qemu_dummy_start_vcpu(CPUState *cpu)
1513 {
1514     char thread_name[VCPU_THREAD_NAME_SIZE];
1515
1516     cpu->thread = g_malloc0(sizeof(QemuThread));
1517     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1518     qemu_cond_init(cpu->halt_cond);
1519     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1520              cpu->cpu_index);
1521     qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1522                        QEMU_THREAD_JOINABLE);
1523     while (!cpu->created) {
1524         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1525     }
1526 }
1527
1528 void qemu_init_vcpu(CPUState *cpu)
1529 {
1530     cpu->nr_cores = smp_cores;
1531     cpu->nr_threads = smp_threads;
1532     cpu->stopped = true;
1533
1534     if (!cpu->as) {
1535         /* If the target cpu hasn't set up any address spaces itself,
1536          * give it the default one.
1537          */
1538         AddressSpace *as = address_space_init_shareable(cpu->memory,
1539                                                         "cpu-memory");
1540         cpu->num_ases = 1;
1541         cpu_address_space_init(cpu, as, 0);
1542     }
1543
1544     if (kvm_enabled()) {
1545         qemu_kvm_start_vcpu(cpu);
1546     } else if (hax_enabled()) {
1547         qemu_hax_start_vcpu(cpu);
1548     } else if (tcg_enabled()) {
1549         qemu_tcg_init_vcpu(cpu);
1550     } else {
1551         qemu_dummy_start_vcpu(cpu);
1552     }
1553 }
1554
1555 void cpu_stop_current(void)
1556 {
1557     if (current_cpu) {
1558         current_cpu->stop = false;
1559         current_cpu->stopped = true;
1560         cpu_exit(current_cpu);
1561         qemu_cond_broadcast(&qemu_pause_cond);
1562     }
1563 }
1564
1565 int vm_stop(RunState state)
1566 {
1567     if (qemu_in_vcpu_thread()) {
1568         qemu_system_vmstop_request_prepare();
1569         qemu_system_vmstop_request(state);
1570         /*
1571          * FIXME: should not return to device code in case
1572          * vm_stop() has been requested.
1573          */
1574         cpu_stop_current();
1575         return 0;
1576     }
1577
1578     return do_vm_stop(state);
1579 }
1580
1581 /* does a state transition even if the VM is already stopped,
1582    current state is forgotten forever */
1583 int vm_stop_force_state(RunState state)
1584 {
1585     if (runstate_is_running()) {
1586         return vm_stop(state);
1587     } else {
1588         runstate_set(state);
1589
1590         bdrv_drain_all();
1591         /* Make sure to return an error if the flush in a previous vm_stop()
1592          * failed. */
1593         return bdrv_flush_all();
1594     }
1595 }
1596
1597 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1598 {
1599     /* XXX: implement xxx_cpu_list for targets that still miss it */
1600 #if defined(cpu_list)
1601     cpu_list(f, cpu_fprintf);
1602 #endif
1603 }
1604
1605 CpuInfoList *qmp_query_cpus(Error **errp)
1606 {
1607     CpuInfoList *head = NULL, *cur_item = NULL;
1608     CPUState *cpu;
1609
1610     CPU_FOREACH(cpu) {
1611         CpuInfoList *info;
1612 #if defined(TARGET_I386)
1613         X86CPU *x86_cpu = X86_CPU(cpu);
1614         CPUX86State *env = &x86_cpu->env;
1615 #elif defined(TARGET_PPC)
1616         PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1617         CPUPPCState *env = &ppc_cpu->env;
1618 #elif defined(TARGET_SPARC)
1619         SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1620         CPUSPARCState *env = &sparc_cpu->env;
1621 #elif defined(TARGET_MIPS)
1622         MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1623         CPUMIPSState *env = &mips_cpu->env;
1624 #elif defined(TARGET_TRICORE)
1625         TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1626         CPUTriCoreState *env = &tricore_cpu->env;
1627 #endif
1628
1629         cpu_synchronize_state(cpu);
1630
1631         info = g_malloc0(sizeof(*info));
1632         info->value = g_malloc0(sizeof(*info->value));
1633         info->value->CPU = cpu->cpu_index;
1634         info->value->current = (cpu == first_cpu);
1635         info->value->halted = cpu->halted;
1636         info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1637         info->value->thread_id = cpu->thread_id;
1638 #if defined(TARGET_I386)
1639         info->value->arch = CPU_INFO_ARCH_X86;
1640         info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1641 #elif defined(TARGET_PPC)
1642         info->value->arch = CPU_INFO_ARCH_PPC;
1643         info->value->u.ppc.nip = env->nip;
1644 #elif defined(TARGET_SPARC)
1645         info->value->arch = CPU_INFO_ARCH_SPARC;
1646         info->value->u.q_sparc.pc = env->pc;
1647         info->value->u.q_sparc.npc = env->npc;
1648 #elif defined(TARGET_MIPS)
1649         info->value->arch = CPU_INFO_ARCH_MIPS;
1650         info->value->u.q_mips.PC = env->active_tc.PC;
1651 #elif defined(TARGET_TRICORE)
1652         info->value->arch = CPU_INFO_ARCH_TRICORE;
1653         info->value->u.tricore.PC = env->PC;
1654 #else
1655         info->value->arch = CPU_INFO_ARCH_OTHER;
1656 #endif
1657
1658         /* XXX: waiting for the qapi to support GSList */
1659         if (!cur_item) {
1660             head = cur_item = info;
1661         } else {
1662             cur_item->next = info;
1663             cur_item = info;
1664         }
1665     }
1666
1667     return head;
1668 }
1669
1670 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1671                  bool has_cpu, int64_t cpu_index, Error **errp)
1672 {
1673     FILE *f;
1674     uint32_t l;
1675     CPUState *cpu;
1676     uint8_t buf[1024];
1677     int64_t orig_addr = addr, orig_size = size;
1678
1679     if (!has_cpu) {
1680         cpu_index = 0;
1681     }
1682
1683     cpu = qemu_get_cpu(cpu_index);
1684     if (cpu == NULL) {
1685         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1686                    "a CPU number");
1687         return;
1688     }
1689
1690     f = fopen(filename, "wb");
1691     if (!f) {
1692         error_setg_file_open(errp, errno, filename);
1693         return;
1694     }
1695
1696     while (size != 0) {
1697         l = sizeof(buf);
1698         if (l > size)
1699             l = size;
1700         if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1701             error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1702                              " specified", orig_addr, orig_size);
1703             goto exit;
1704         }
1705         if (fwrite(buf, 1, l, f) != l) {
1706             error_setg(errp, QERR_IO_ERROR);
1707             goto exit;
1708         }
1709         addr += l;
1710         size -= l;
1711     }
1712
1713 exit:
1714     fclose(f);
1715 }
1716
1717 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1718                   Error **errp)
1719 {
1720     FILE *f;
1721     uint32_t l;
1722     uint8_t buf[1024];
1723
1724     f = fopen(filename, "wb");
1725     if (!f) {
1726         error_setg_file_open(errp, errno, filename);
1727         return;
1728     }
1729
1730     while (size != 0) {
1731         l = sizeof(buf);
1732         if (l > size)
1733             l = size;
1734         cpu_physical_memory_read(addr, buf, l);
1735         if (fwrite(buf, 1, l, f) != l) {
1736             error_setg(errp, QERR_IO_ERROR);
1737             goto exit;
1738         }
1739         addr += l;
1740         size -= l;
1741     }
1742
1743 exit:
1744     fclose(f);
1745 }
1746
1747 void qmp_inject_nmi(Error **errp)
1748 {
1749     nmi_monitor_handle(monitor_get_cpu_index(), errp);
1750 }
1751
1752 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1753 {
1754     if (!use_icount) {
1755         return;
1756     }
1757
1758     cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
1759                 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1760     if (icount_align_option) {
1761         cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
1762         cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
1763     } else {
1764         cpu_fprintf(f, "Max guest delay     NA\n");
1765         cpu_fprintf(f, "Max guest advance   NA\n");
1766     }
1767 }
This page took 0.116114 seconds and 4 git commands to generate.