]> Git Repo - qemu.git/blob - cpus.c
block: More detailed syntax error reporting for JSON filenames
[qemu.git] / cpus.c
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
29 #include "cpu.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "qmp-commands.h"
41 #include "exec/exec-all.h"
42
43 #include "qemu/thread.h"
44 #include "sysemu/cpus.h"
45 #include "sysemu/qtest.h"
46 #include "qemu/main-loop.h"
47 #include "qemu/bitmap.h"
48 #include "qemu/seqlock.h"
49 #include "tcg.h"
50 #include "qapi-event.h"
51 #include "hw/nmi.h"
52 #include "sysemu/replay.h"
53
54 #ifdef CONFIG_LINUX
55
56 #include <sys/prctl.h>
57
58 #ifndef PR_MCE_KILL
59 #define PR_MCE_KILL 33
60 #endif
61
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
64 #endif
65
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
68 #endif
69
70 #endif /* CONFIG_LINUX */
71
72 int64_t max_delay;
73 int64_t max_advance;
74
75 /* vcpu throttling controls */
76 static QEMUTimer *throttle_timer;
77 static unsigned int throttle_percentage;
78
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
82
83 bool cpu_is_stopped(CPUState *cpu)
84 {
85     return cpu->stopped || !runstate_is_running();
86 }
87
88 static bool cpu_thread_is_idle(CPUState *cpu)
89 {
90     if (cpu->stop || cpu->queued_work_first) {
91         return false;
92     }
93     if (cpu_is_stopped(cpu)) {
94         return true;
95     }
96     if (!cpu->halted || cpu_has_work(cpu) ||
97         kvm_halt_in_kernel()) {
98         return false;
99     }
100     return true;
101 }
102
103 static bool all_cpu_threads_idle(void)
104 {
105     CPUState *cpu;
106
107     CPU_FOREACH(cpu) {
108         if (!cpu_thread_is_idle(cpu)) {
109             return false;
110         }
111     }
112     return true;
113 }
114
115 /***********************************************************/
116 /* guest cycle counter */
117
118 /* Protected by TimersState seqlock */
119
120 static bool icount_sleep = true;
121 static int64_t vm_clock_warp_start = -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks.  */
123 static int icount_time_shift;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
125 #define MAX_ICOUNT_SHIFT 10
126
127 static QEMUTimer *icount_rt_timer;
128 static QEMUTimer *icount_vm_timer;
129 static QEMUTimer *icount_warp_timer;
130
131 typedef struct TimersState {
132     /* Protected by BQL.  */
133     int64_t cpu_ticks_prev;
134     int64_t cpu_ticks_offset;
135
136     /* cpu_clock_offset can be read out of BQL, so protect it with
137      * this lock.
138      */
139     QemuSeqLock vm_clock_seqlock;
140     int64_t cpu_clock_offset;
141     int32_t cpu_ticks_enabled;
142     int64_t dummy;
143
144     /* Compensate for varying guest execution speed.  */
145     int64_t qemu_icount_bias;
146     /* Only written by TCG thread */
147     int64_t qemu_icount;
148 } TimersState;
149
150 static TimersState timers_state;
151 bool mttcg_enabled;
152
153 /*
154  * We default to false if we know other options have been enabled
155  * which are currently incompatible with MTTCG. Otherwise when each
156  * guest (target) has been updated to support:
157  *   - atomic instructions
158  *   - memory ordering primitives (barriers)
159  * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
160  *
161  * Once a guest architecture has been converted to the new primitives
162  * there are two remaining limitations to check.
163  *
164  * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
165  * - The host must have a stronger memory order than the guest
166  *
167  * It may be possible in future to support strong guests on weak hosts
168  * but that will require tagging all load/stores in a guest with their
169  * implicit memory order requirements which would likely slow things
170  * down a lot.
171  */
172
173 static bool check_tcg_memory_orders_compatible(void)
174 {
175 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
176     return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
177 #else
178     return false;
179 #endif
180 }
181
182 static bool default_mttcg_enabled(void)
183 {
184     QemuOpts *icount_opts = qemu_find_opts_singleton("icount");
185     const char *rr = qemu_opt_get(icount_opts, "rr");
186
187     if (rr || TCG_OVERSIZED_GUEST) {
188         return false;
189     } else {
190 #ifdef TARGET_SUPPORTS_MTTCG
191         return check_tcg_memory_orders_compatible();
192 #else
193         return false;
194 #endif
195     }
196 }
197
198 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
199 {
200     const char *t = qemu_opt_get(opts, "thread");
201     if (t) {
202         if (strcmp(t, "multi") == 0) {
203             if (TCG_OVERSIZED_GUEST) {
204                 error_setg(errp, "No MTTCG when guest word size > hosts");
205             } else {
206                 if (!check_tcg_memory_orders_compatible()) {
207                     error_report("Guest expects a stronger memory ordering "
208                                  "than the host provides");
209                     error_printf("This may cause strange/hard to debug errors");
210                 }
211                 mttcg_enabled = true;
212             }
213         } else if (strcmp(t, "single") == 0) {
214             mttcg_enabled = false;
215         } else {
216             error_setg(errp, "Invalid 'thread' setting %s", t);
217         }
218     } else {
219         mttcg_enabled = default_mttcg_enabled();
220     }
221 }
222
223 int64_t cpu_get_icount_raw(void)
224 {
225     int64_t icount;
226     CPUState *cpu = current_cpu;
227
228     icount = timers_state.qemu_icount;
229     if (cpu) {
230         if (!cpu->can_do_io) {
231             fprintf(stderr, "Bad icount read\n");
232             exit(1);
233         }
234         icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
235     }
236     return icount;
237 }
238
239 /* Return the virtual CPU time, based on the instruction counter.  */
240 static int64_t cpu_get_icount_locked(void)
241 {
242     int64_t icount = cpu_get_icount_raw();
243     return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
244 }
245
246 int64_t cpu_get_icount(void)
247 {
248     int64_t icount;
249     unsigned start;
250
251     do {
252         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
253         icount = cpu_get_icount_locked();
254     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
255
256     return icount;
257 }
258
259 int64_t cpu_icount_to_ns(int64_t icount)
260 {
261     return icount << icount_time_shift;
262 }
263
264 /* return the time elapsed in VM between vm_start and vm_stop.  Unless
265  * icount is active, cpu_get_ticks() uses units of the host CPU cycle
266  * counter.
267  *
268  * Caller must hold the BQL
269  */
270 int64_t cpu_get_ticks(void)
271 {
272     int64_t ticks;
273
274     if (use_icount) {
275         return cpu_get_icount();
276     }
277
278     ticks = timers_state.cpu_ticks_offset;
279     if (timers_state.cpu_ticks_enabled) {
280         ticks += cpu_get_host_ticks();
281     }
282
283     if (timers_state.cpu_ticks_prev > ticks) {
284         /* Note: non increasing ticks may happen if the host uses
285            software suspend */
286         timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
287         ticks = timers_state.cpu_ticks_prev;
288     }
289
290     timers_state.cpu_ticks_prev = ticks;
291     return ticks;
292 }
293
294 static int64_t cpu_get_clock_locked(void)
295 {
296     int64_t time;
297
298     time = timers_state.cpu_clock_offset;
299     if (timers_state.cpu_ticks_enabled) {
300         time += get_clock();
301     }
302
303     return time;
304 }
305
306 /* Return the monotonic time elapsed in VM, i.e.,
307  * the time between vm_start and vm_stop
308  */
309 int64_t cpu_get_clock(void)
310 {
311     int64_t ti;
312     unsigned start;
313
314     do {
315         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
316         ti = cpu_get_clock_locked();
317     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
318
319     return ti;
320 }
321
322 /* enable cpu_get_ticks()
323  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
324  */
325 void cpu_enable_ticks(void)
326 {
327     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
328     seqlock_write_begin(&timers_state.vm_clock_seqlock);
329     if (!timers_state.cpu_ticks_enabled) {
330         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
331         timers_state.cpu_clock_offset -= get_clock();
332         timers_state.cpu_ticks_enabled = 1;
333     }
334     seqlock_write_end(&timers_state.vm_clock_seqlock);
335 }
336
337 /* disable cpu_get_ticks() : the clock is stopped. You must not call
338  * cpu_get_ticks() after that.
339  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
340  */
341 void cpu_disable_ticks(void)
342 {
343     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
344     seqlock_write_begin(&timers_state.vm_clock_seqlock);
345     if (timers_state.cpu_ticks_enabled) {
346         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
347         timers_state.cpu_clock_offset = cpu_get_clock_locked();
348         timers_state.cpu_ticks_enabled = 0;
349     }
350     seqlock_write_end(&timers_state.vm_clock_seqlock);
351 }
352
353 /* Correlation between real and virtual time is always going to be
354    fairly approximate, so ignore small variation.
355    When the guest is idle real and virtual time will be aligned in
356    the IO wait loop.  */
357 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
358
359 static void icount_adjust(void)
360 {
361     int64_t cur_time;
362     int64_t cur_icount;
363     int64_t delta;
364
365     /* Protected by TimersState mutex.  */
366     static int64_t last_delta;
367
368     /* If the VM is not running, then do nothing.  */
369     if (!runstate_is_running()) {
370         return;
371     }
372
373     seqlock_write_begin(&timers_state.vm_clock_seqlock);
374     cur_time = cpu_get_clock_locked();
375     cur_icount = cpu_get_icount_locked();
376
377     delta = cur_icount - cur_time;
378     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
379     if (delta > 0
380         && last_delta + ICOUNT_WOBBLE < delta * 2
381         && icount_time_shift > 0) {
382         /* The guest is getting too far ahead.  Slow time down.  */
383         icount_time_shift--;
384     }
385     if (delta < 0
386         && last_delta - ICOUNT_WOBBLE > delta * 2
387         && icount_time_shift < MAX_ICOUNT_SHIFT) {
388         /* The guest is getting too far behind.  Speed time up.  */
389         icount_time_shift++;
390     }
391     last_delta = delta;
392     timers_state.qemu_icount_bias = cur_icount
393                               - (timers_state.qemu_icount << icount_time_shift);
394     seqlock_write_end(&timers_state.vm_clock_seqlock);
395 }
396
397 static void icount_adjust_rt(void *opaque)
398 {
399     timer_mod(icount_rt_timer,
400               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
401     icount_adjust();
402 }
403
404 static void icount_adjust_vm(void *opaque)
405 {
406     timer_mod(icount_vm_timer,
407                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
408                    NANOSECONDS_PER_SECOND / 10);
409     icount_adjust();
410 }
411
412 static int64_t qemu_icount_round(int64_t count)
413 {
414     return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
415 }
416
417 static void icount_warp_rt(void)
418 {
419     unsigned seq;
420     int64_t warp_start;
421
422     /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
423      * changes from -1 to another value, so the race here is okay.
424      */
425     do {
426         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
427         warp_start = vm_clock_warp_start;
428     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
429
430     if (warp_start == -1) {
431         return;
432     }
433
434     seqlock_write_begin(&timers_state.vm_clock_seqlock);
435     if (runstate_is_running()) {
436         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
437                                      cpu_get_clock_locked());
438         int64_t warp_delta;
439
440         warp_delta = clock - vm_clock_warp_start;
441         if (use_icount == 2) {
442             /*
443              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
444              * far ahead of real time.
445              */
446             int64_t cur_icount = cpu_get_icount_locked();
447             int64_t delta = clock - cur_icount;
448             warp_delta = MIN(warp_delta, delta);
449         }
450         timers_state.qemu_icount_bias += warp_delta;
451     }
452     vm_clock_warp_start = -1;
453     seqlock_write_end(&timers_state.vm_clock_seqlock);
454
455     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
456         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
457     }
458 }
459
460 static void icount_timer_cb(void *opaque)
461 {
462     /* No need for a checkpoint because the timer already synchronizes
463      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
464      */
465     icount_warp_rt();
466 }
467
468 void qtest_clock_warp(int64_t dest)
469 {
470     int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
471     AioContext *aio_context;
472     assert(qtest_enabled());
473     aio_context = qemu_get_aio_context();
474     while (clock < dest) {
475         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
476         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
477
478         seqlock_write_begin(&timers_state.vm_clock_seqlock);
479         timers_state.qemu_icount_bias += warp;
480         seqlock_write_end(&timers_state.vm_clock_seqlock);
481
482         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
483         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
484         clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
485     }
486     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
487 }
488
489 void qemu_start_warp_timer(void)
490 {
491     int64_t clock;
492     int64_t deadline;
493
494     if (!use_icount) {
495         return;
496     }
497
498     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
499      * do not fire, so computing the deadline does not make sense.
500      */
501     if (!runstate_is_running()) {
502         return;
503     }
504
505     /* warp clock deterministically in record/replay mode */
506     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
507         return;
508     }
509
510     if (!all_cpu_threads_idle()) {
511         return;
512     }
513
514     if (qtest_enabled()) {
515         /* When testing, qtest commands advance icount.  */
516         return;
517     }
518
519     /* We want to use the earliest deadline from ALL vm_clocks */
520     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
521     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
522     if (deadline < 0) {
523         static bool notified;
524         if (!icount_sleep && !notified) {
525             error_report("WARNING: icount sleep disabled and no active timers");
526             notified = true;
527         }
528         return;
529     }
530
531     if (deadline > 0) {
532         /*
533          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
534          * sleep.  Otherwise, the CPU might be waiting for a future timer
535          * interrupt to wake it up, but the interrupt never comes because
536          * the vCPU isn't running any insns and thus doesn't advance the
537          * QEMU_CLOCK_VIRTUAL.
538          */
539         if (!icount_sleep) {
540             /*
541              * We never let VCPUs sleep in no sleep icount mode.
542              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
543              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
544              * It is useful when we want a deterministic execution time,
545              * isolated from host latencies.
546              */
547             seqlock_write_begin(&timers_state.vm_clock_seqlock);
548             timers_state.qemu_icount_bias += deadline;
549             seqlock_write_end(&timers_state.vm_clock_seqlock);
550             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
551         } else {
552             /*
553              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
554              * "real" time, (related to the time left until the next event) has
555              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
556              * This avoids that the warps are visible externally; for example,
557              * you will not be sending network packets continuously instead of
558              * every 100ms.
559              */
560             seqlock_write_begin(&timers_state.vm_clock_seqlock);
561             if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
562                 vm_clock_warp_start = clock;
563             }
564             seqlock_write_end(&timers_state.vm_clock_seqlock);
565             timer_mod_anticipate(icount_warp_timer, clock + deadline);
566         }
567     } else if (deadline == 0) {
568         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
569     }
570 }
571
572 static void qemu_account_warp_timer(void)
573 {
574     if (!use_icount || !icount_sleep) {
575         return;
576     }
577
578     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
579      * do not fire, so computing the deadline does not make sense.
580      */
581     if (!runstate_is_running()) {
582         return;
583     }
584
585     /* warp clock deterministically in record/replay mode */
586     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
587         return;
588     }
589
590     timer_del(icount_warp_timer);
591     icount_warp_rt();
592 }
593
594 static bool icount_state_needed(void *opaque)
595 {
596     return use_icount;
597 }
598
599 /*
600  * This is a subsection for icount migration.
601  */
602 static const VMStateDescription icount_vmstate_timers = {
603     .name = "timer/icount",
604     .version_id = 1,
605     .minimum_version_id = 1,
606     .needed = icount_state_needed,
607     .fields = (VMStateField[]) {
608         VMSTATE_INT64(qemu_icount_bias, TimersState),
609         VMSTATE_INT64(qemu_icount, TimersState),
610         VMSTATE_END_OF_LIST()
611     }
612 };
613
614 static const VMStateDescription vmstate_timers = {
615     .name = "timer",
616     .version_id = 2,
617     .minimum_version_id = 1,
618     .fields = (VMStateField[]) {
619         VMSTATE_INT64(cpu_ticks_offset, TimersState),
620         VMSTATE_INT64(dummy, TimersState),
621         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
622         VMSTATE_END_OF_LIST()
623     },
624     .subsections = (const VMStateDescription*[]) {
625         &icount_vmstate_timers,
626         NULL
627     }
628 };
629
630 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
631 {
632     double pct;
633     double throttle_ratio;
634     long sleeptime_ns;
635
636     if (!cpu_throttle_get_percentage()) {
637         return;
638     }
639
640     pct = (double)cpu_throttle_get_percentage()/100;
641     throttle_ratio = pct / (1 - pct);
642     sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
643
644     qemu_mutex_unlock_iothread();
645     atomic_set(&cpu->throttle_thread_scheduled, 0);
646     g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
647     qemu_mutex_lock_iothread();
648 }
649
650 static void cpu_throttle_timer_tick(void *opaque)
651 {
652     CPUState *cpu;
653     double pct;
654
655     /* Stop the timer if needed */
656     if (!cpu_throttle_get_percentage()) {
657         return;
658     }
659     CPU_FOREACH(cpu) {
660         if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
661             async_run_on_cpu(cpu, cpu_throttle_thread,
662                              RUN_ON_CPU_NULL);
663         }
664     }
665
666     pct = (double)cpu_throttle_get_percentage()/100;
667     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
668                                    CPU_THROTTLE_TIMESLICE_NS / (1-pct));
669 }
670
671 void cpu_throttle_set(int new_throttle_pct)
672 {
673     /* Ensure throttle percentage is within valid range */
674     new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
675     new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
676
677     atomic_set(&throttle_percentage, new_throttle_pct);
678
679     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
680                                        CPU_THROTTLE_TIMESLICE_NS);
681 }
682
683 void cpu_throttle_stop(void)
684 {
685     atomic_set(&throttle_percentage, 0);
686 }
687
688 bool cpu_throttle_active(void)
689 {
690     return (cpu_throttle_get_percentage() != 0);
691 }
692
693 int cpu_throttle_get_percentage(void)
694 {
695     return atomic_read(&throttle_percentage);
696 }
697
698 void cpu_ticks_init(void)
699 {
700     seqlock_init(&timers_state.vm_clock_seqlock);
701     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
702     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
703                                            cpu_throttle_timer_tick, NULL);
704 }
705
706 void configure_icount(QemuOpts *opts, Error **errp)
707 {
708     const char *option;
709     char *rem_str = NULL;
710
711     option = qemu_opt_get(opts, "shift");
712     if (!option) {
713         if (qemu_opt_get(opts, "align") != NULL) {
714             error_setg(errp, "Please specify shift option when using align");
715         }
716         return;
717     }
718
719     icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
720     if (icount_sleep) {
721         icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
722                                          icount_timer_cb, NULL);
723     }
724
725     icount_align_option = qemu_opt_get_bool(opts, "align", false);
726
727     if (icount_align_option && !icount_sleep) {
728         error_setg(errp, "align=on and sleep=off are incompatible");
729     }
730     if (strcmp(option, "auto") != 0) {
731         errno = 0;
732         icount_time_shift = strtol(option, &rem_str, 0);
733         if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
734             error_setg(errp, "icount: Invalid shift value");
735         }
736         use_icount = 1;
737         return;
738     } else if (icount_align_option) {
739         error_setg(errp, "shift=auto and align=on are incompatible");
740     } else if (!icount_sleep) {
741         error_setg(errp, "shift=auto and sleep=off are incompatible");
742     }
743
744     use_icount = 2;
745
746     /* 125MIPS seems a reasonable initial guess at the guest speed.
747        It will be corrected fairly quickly anyway.  */
748     icount_time_shift = 3;
749
750     /* Have both realtime and virtual time triggers for speed adjustment.
751        The realtime trigger catches emulated time passing too slowly,
752        the virtual time trigger catches emulated time passing too fast.
753        Realtime triggers occur even when idle, so use them less frequently
754        than VM triggers.  */
755     icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
756                                    icount_adjust_rt, NULL);
757     timer_mod(icount_rt_timer,
758                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
759     icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
760                                         icount_adjust_vm, NULL);
761     timer_mod(icount_vm_timer,
762                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
763                    NANOSECONDS_PER_SECOND / 10);
764 }
765
766 /***********************************************************/
767 /* TCG vCPU kick timer
768  *
769  * The kick timer is responsible for moving single threaded vCPU
770  * emulation on to the next vCPU. If more than one vCPU is running a
771  * timer event with force a cpu->exit so the next vCPU can get
772  * scheduled.
773  *
774  * The timer is removed if all vCPUs are idle and restarted again once
775  * idleness is complete.
776  */
777
778 static QEMUTimer *tcg_kick_vcpu_timer;
779 static CPUState *tcg_current_rr_cpu;
780
781 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
782
783 static inline int64_t qemu_tcg_next_kick(void)
784 {
785     return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
786 }
787
788 /* Kick the currently round-robin scheduled vCPU */
789 static void qemu_cpu_kick_rr_cpu(void)
790 {
791     CPUState *cpu;
792     do {
793         cpu = atomic_mb_read(&tcg_current_rr_cpu);
794         if (cpu) {
795             cpu_exit(cpu);
796         }
797     } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
798 }
799
800 static void kick_tcg_thread(void *opaque)
801 {
802     timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
803     qemu_cpu_kick_rr_cpu();
804 }
805
806 static void start_tcg_kick_timer(void)
807 {
808     if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
809         tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
810                                            kick_tcg_thread, NULL);
811         timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
812     }
813 }
814
815 static void stop_tcg_kick_timer(void)
816 {
817     if (tcg_kick_vcpu_timer) {
818         timer_del(tcg_kick_vcpu_timer);
819         tcg_kick_vcpu_timer = NULL;
820     }
821 }
822
823 /***********************************************************/
824 void hw_error(const char *fmt, ...)
825 {
826     va_list ap;
827     CPUState *cpu;
828
829     va_start(ap, fmt);
830     fprintf(stderr, "qemu: hardware error: ");
831     vfprintf(stderr, fmt, ap);
832     fprintf(stderr, "\n");
833     CPU_FOREACH(cpu) {
834         fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
835         cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
836     }
837     va_end(ap);
838     abort();
839 }
840
841 void cpu_synchronize_all_states(void)
842 {
843     CPUState *cpu;
844
845     CPU_FOREACH(cpu) {
846         cpu_synchronize_state(cpu);
847     }
848 }
849
850 void cpu_synchronize_all_post_reset(void)
851 {
852     CPUState *cpu;
853
854     CPU_FOREACH(cpu) {
855         cpu_synchronize_post_reset(cpu);
856     }
857 }
858
859 void cpu_synchronize_all_post_init(void)
860 {
861     CPUState *cpu;
862
863     CPU_FOREACH(cpu) {
864         cpu_synchronize_post_init(cpu);
865     }
866 }
867
868 static int do_vm_stop(RunState state)
869 {
870     int ret = 0;
871
872     if (runstate_is_running()) {
873         cpu_disable_ticks();
874         pause_all_vcpus();
875         runstate_set(state);
876         vm_state_notify(0, state);
877         qapi_event_send_stop(&error_abort);
878     }
879
880     bdrv_drain_all();
881     replay_disable_events();
882     ret = bdrv_flush_all();
883
884     return ret;
885 }
886
887 static bool cpu_can_run(CPUState *cpu)
888 {
889     if (cpu->stop) {
890         return false;
891     }
892     if (cpu_is_stopped(cpu)) {
893         return false;
894     }
895     return true;
896 }
897
898 static void cpu_handle_guest_debug(CPUState *cpu)
899 {
900     gdb_set_stop_cpu(cpu);
901     qemu_system_debug_request();
902     cpu->stopped = true;
903 }
904
905 #ifdef CONFIG_LINUX
906 static void sigbus_reraise(void)
907 {
908     sigset_t set;
909     struct sigaction action;
910
911     memset(&action, 0, sizeof(action));
912     action.sa_handler = SIG_DFL;
913     if (!sigaction(SIGBUS, &action, NULL)) {
914         raise(SIGBUS);
915         sigemptyset(&set);
916         sigaddset(&set, SIGBUS);
917         pthread_sigmask(SIG_UNBLOCK, &set, NULL);
918     }
919     perror("Failed to re-raise SIGBUS!\n");
920     abort();
921 }
922
923 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
924 {
925     if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
926         sigbus_reraise();
927     }
928
929     if (current_cpu) {
930         /* Called asynchronously in VCPU thread.  */
931         if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
932             sigbus_reraise();
933         }
934     } else {
935         /* Called synchronously (via signalfd) in main thread.  */
936         if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
937             sigbus_reraise();
938         }
939     }
940 }
941
942 static void qemu_init_sigbus(void)
943 {
944     struct sigaction action;
945
946     memset(&action, 0, sizeof(action));
947     action.sa_flags = SA_SIGINFO;
948     action.sa_sigaction = sigbus_handler;
949     sigaction(SIGBUS, &action, NULL);
950
951     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
952 }
953 #else /* !CONFIG_LINUX */
954 static void qemu_init_sigbus(void)
955 {
956 }
957 #endif /* !CONFIG_LINUX */
958
959 static QemuMutex qemu_global_mutex;
960
961 static QemuThread io_thread;
962
963 /* cpu creation */
964 static QemuCond qemu_cpu_cond;
965 /* system init */
966 static QemuCond qemu_pause_cond;
967
968 void qemu_init_cpu_loop(void)
969 {
970     qemu_init_sigbus();
971     qemu_cond_init(&qemu_cpu_cond);
972     qemu_cond_init(&qemu_pause_cond);
973     qemu_mutex_init(&qemu_global_mutex);
974
975     qemu_thread_get_self(&io_thread);
976 }
977
978 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
979 {
980     do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
981 }
982
983 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
984 {
985     if (kvm_destroy_vcpu(cpu) < 0) {
986         error_report("kvm_destroy_vcpu failed");
987         exit(EXIT_FAILURE);
988     }
989 }
990
991 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
992 {
993 }
994
995 static void qemu_wait_io_event_common(CPUState *cpu)
996 {
997     atomic_mb_set(&cpu->thread_kicked, false);
998     if (cpu->stop) {
999         cpu->stop = false;
1000         cpu->stopped = true;
1001         qemu_cond_broadcast(&qemu_pause_cond);
1002     }
1003     process_queued_cpu_work(cpu);
1004 }
1005
1006 static bool qemu_tcg_should_sleep(CPUState *cpu)
1007 {
1008     if (mttcg_enabled) {
1009         return cpu_thread_is_idle(cpu);
1010     } else {
1011         return all_cpu_threads_idle();
1012     }
1013 }
1014
1015 static void qemu_tcg_wait_io_event(CPUState *cpu)
1016 {
1017     while (qemu_tcg_should_sleep(cpu)) {
1018         stop_tcg_kick_timer();
1019         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1020     }
1021
1022     start_tcg_kick_timer();
1023
1024     qemu_wait_io_event_common(cpu);
1025 }
1026
1027 static void qemu_kvm_wait_io_event(CPUState *cpu)
1028 {
1029     while (cpu_thread_is_idle(cpu)) {
1030         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1031     }
1032
1033     qemu_wait_io_event_common(cpu);
1034 }
1035
1036 static void *qemu_kvm_cpu_thread_fn(void *arg)
1037 {
1038     CPUState *cpu = arg;
1039     int r;
1040
1041     rcu_register_thread();
1042
1043     qemu_mutex_lock_iothread();
1044     qemu_thread_get_self(cpu->thread);
1045     cpu->thread_id = qemu_get_thread_id();
1046     cpu->can_do_io = 1;
1047     current_cpu = cpu;
1048
1049     r = kvm_init_vcpu(cpu);
1050     if (r < 0) {
1051         fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1052         exit(1);
1053     }
1054
1055     kvm_init_cpu_signals(cpu);
1056
1057     /* signal CPU creation */
1058     cpu->created = true;
1059     qemu_cond_signal(&qemu_cpu_cond);
1060
1061     do {
1062         if (cpu_can_run(cpu)) {
1063             r = kvm_cpu_exec(cpu);
1064             if (r == EXCP_DEBUG) {
1065                 cpu_handle_guest_debug(cpu);
1066             }
1067         }
1068         qemu_kvm_wait_io_event(cpu);
1069     } while (!cpu->unplug || cpu_can_run(cpu));
1070
1071     qemu_kvm_destroy_vcpu(cpu);
1072     cpu->created = false;
1073     qemu_cond_signal(&qemu_cpu_cond);
1074     qemu_mutex_unlock_iothread();
1075     return NULL;
1076 }
1077
1078 static void *qemu_dummy_cpu_thread_fn(void *arg)
1079 {
1080 #ifdef _WIN32
1081     fprintf(stderr, "qtest is not supported under Windows\n");
1082     exit(1);
1083 #else
1084     CPUState *cpu = arg;
1085     sigset_t waitset;
1086     int r;
1087
1088     rcu_register_thread();
1089
1090     qemu_mutex_lock_iothread();
1091     qemu_thread_get_self(cpu->thread);
1092     cpu->thread_id = qemu_get_thread_id();
1093     cpu->can_do_io = 1;
1094     current_cpu = cpu;
1095
1096     sigemptyset(&waitset);
1097     sigaddset(&waitset, SIG_IPI);
1098
1099     /* signal CPU creation */
1100     cpu->created = true;
1101     qemu_cond_signal(&qemu_cpu_cond);
1102
1103     while (1) {
1104         qemu_mutex_unlock_iothread();
1105         do {
1106             int sig;
1107             r = sigwait(&waitset, &sig);
1108         } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1109         if (r == -1) {
1110             perror("sigwait");
1111             exit(1);
1112         }
1113         qemu_mutex_lock_iothread();
1114         qemu_wait_io_event_common(cpu);
1115     }
1116
1117     return NULL;
1118 #endif
1119 }
1120
1121 static int64_t tcg_get_icount_limit(void)
1122 {
1123     int64_t deadline;
1124
1125     if (replay_mode != REPLAY_MODE_PLAY) {
1126         deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1127
1128         /* Maintain prior (possibly buggy) behaviour where if no deadline
1129          * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1130          * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1131          * nanoseconds.
1132          */
1133         if ((deadline < 0) || (deadline > INT32_MAX)) {
1134             deadline = INT32_MAX;
1135         }
1136
1137         return qemu_icount_round(deadline);
1138     } else {
1139         return replay_get_instructions();
1140     }
1141 }
1142
1143 static void handle_icount_deadline(void)
1144 {
1145     if (use_icount) {
1146         int64_t deadline =
1147             qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1148
1149         if (deadline == 0) {
1150             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1151         }
1152     }
1153 }
1154
1155 static int tcg_cpu_exec(CPUState *cpu)
1156 {
1157     int ret;
1158 #ifdef CONFIG_PROFILER
1159     int64_t ti;
1160 #endif
1161
1162 #ifdef CONFIG_PROFILER
1163     ti = profile_getclock();
1164 #endif
1165     if (use_icount) {
1166         int64_t count;
1167         int decr;
1168         timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1169                                     + cpu->icount_extra);
1170         cpu->icount_decr.u16.low = 0;
1171         cpu->icount_extra = 0;
1172         count = tcg_get_icount_limit();
1173         timers_state.qemu_icount += count;
1174         decr = (count > 0xffff) ? 0xffff : count;
1175         count -= decr;
1176         cpu->icount_decr.u16.low = decr;
1177         cpu->icount_extra = count;
1178     }
1179     qemu_mutex_unlock_iothread();
1180     cpu_exec_start(cpu);
1181     ret = cpu_exec(cpu);
1182     cpu_exec_end(cpu);
1183     qemu_mutex_lock_iothread();
1184 #ifdef CONFIG_PROFILER
1185     tcg_time += profile_getclock() - ti;
1186 #endif
1187     if (use_icount) {
1188         /* Fold pending instructions back into the
1189            instruction counter, and clear the interrupt flag.  */
1190         timers_state.qemu_icount -= (cpu->icount_decr.u16.low
1191                         + cpu->icount_extra);
1192         cpu->icount_decr.u32 = 0;
1193         cpu->icount_extra = 0;
1194         replay_account_executed_instructions();
1195     }
1196     return ret;
1197 }
1198
1199 /* Destroy any remaining vCPUs which have been unplugged and have
1200  * finished running
1201  */
1202 static void deal_with_unplugged_cpus(void)
1203 {
1204     CPUState *cpu;
1205
1206     CPU_FOREACH(cpu) {
1207         if (cpu->unplug && !cpu_can_run(cpu)) {
1208             qemu_tcg_destroy_vcpu(cpu);
1209             cpu->created = false;
1210             qemu_cond_signal(&qemu_cpu_cond);
1211             break;
1212         }
1213     }
1214 }
1215
1216 /* Single-threaded TCG
1217  *
1218  * In the single-threaded case each vCPU is simulated in turn. If
1219  * there is more than a single vCPU we create a simple timer to kick
1220  * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1221  * This is done explicitly rather than relying on side-effects
1222  * elsewhere.
1223  */
1224
1225 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1226 {
1227     CPUState *cpu = arg;
1228
1229     rcu_register_thread();
1230
1231     qemu_mutex_lock_iothread();
1232     qemu_thread_get_self(cpu->thread);
1233
1234     CPU_FOREACH(cpu) {
1235         cpu->thread_id = qemu_get_thread_id();
1236         cpu->created = true;
1237         cpu->can_do_io = 1;
1238     }
1239     qemu_cond_signal(&qemu_cpu_cond);
1240
1241     /* wait for initial kick-off after machine start */
1242     while (first_cpu->stopped) {
1243         qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1244
1245         /* process any pending work */
1246         CPU_FOREACH(cpu) {
1247             current_cpu = cpu;
1248             qemu_wait_io_event_common(cpu);
1249         }
1250     }
1251
1252     start_tcg_kick_timer();
1253
1254     cpu = first_cpu;
1255
1256     /* process any pending work */
1257     cpu->exit_request = 1;
1258
1259     while (1) {
1260         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1261         qemu_account_warp_timer();
1262
1263         if (!cpu) {
1264             cpu = first_cpu;
1265         }
1266
1267         while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1268
1269             atomic_mb_set(&tcg_current_rr_cpu, cpu);
1270             current_cpu = cpu;
1271
1272             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1273                               (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1274
1275             if (cpu_can_run(cpu)) {
1276                 int r;
1277                 r = tcg_cpu_exec(cpu);
1278                 if (r == EXCP_DEBUG) {
1279                     cpu_handle_guest_debug(cpu);
1280                     break;
1281                 } else if (r == EXCP_ATOMIC) {
1282                     qemu_mutex_unlock_iothread();
1283                     cpu_exec_step_atomic(cpu);
1284                     qemu_mutex_lock_iothread();
1285                     break;
1286                 }
1287             } else if (cpu->stop) {
1288                 if (cpu->unplug) {
1289                     cpu = CPU_NEXT(cpu);
1290                 }
1291                 break;
1292             }
1293
1294             cpu = CPU_NEXT(cpu);
1295         } /* while (cpu && !cpu->exit_request).. */
1296
1297         /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
1298         atomic_set(&tcg_current_rr_cpu, NULL);
1299
1300         if (cpu && cpu->exit_request) {
1301             atomic_mb_set(&cpu->exit_request, 0);
1302         }
1303
1304         handle_icount_deadline();
1305
1306         qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1307         deal_with_unplugged_cpus();
1308     }
1309
1310     return NULL;
1311 }
1312
1313 static void *qemu_hax_cpu_thread_fn(void *arg)
1314 {
1315     CPUState *cpu = arg;
1316     int r;
1317     qemu_thread_get_self(cpu->thread);
1318     qemu_mutex_lock(&qemu_global_mutex);
1319
1320     cpu->thread_id = qemu_get_thread_id();
1321     cpu->created = true;
1322     cpu->halted = 0;
1323     current_cpu = cpu;
1324
1325     hax_init_vcpu(cpu);
1326     qemu_cond_signal(&qemu_cpu_cond);
1327
1328     while (1) {
1329         if (cpu_can_run(cpu)) {
1330             r = hax_smp_cpu_exec(cpu);
1331             if (r == EXCP_DEBUG) {
1332                 cpu_handle_guest_debug(cpu);
1333             }
1334         }
1335
1336         while (cpu_thread_is_idle(cpu)) {
1337             qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1338         }
1339 #ifdef _WIN32
1340         SleepEx(0, TRUE);
1341 #endif
1342         qemu_wait_io_event_common(cpu);
1343     }
1344     return NULL;
1345 }
1346
1347 #ifdef _WIN32
1348 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1349 {
1350 }
1351 #endif
1352
1353 /* Multi-threaded TCG
1354  *
1355  * In the multi-threaded case each vCPU has its own thread. The TLS
1356  * variable current_cpu can be used deep in the code to find the
1357  * current CPUState for a given thread.
1358  */
1359
1360 static void *qemu_tcg_cpu_thread_fn(void *arg)
1361 {
1362     CPUState *cpu = arg;
1363
1364     rcu_register_thread();
1365
1366     qemu_mutex_lock_iothread();
1367     qemu_thread_get_self(cpu->thread);
1368
1369     cpu->thread_id = qemu_get_thread_id();
1370     cpu->created = true;
1371     cpu->can_do_io = 1;
1372     current_cpu = cpu;
1373     qemu_cond_signal(&qemu_cpu_cond);
1374
1375     /* process any pending work */
1376     cpu->exit_request = 1;
1377
1378     while (1) {
1379         if (cpu_can_run(cpu)) {
1380             int r;
1381             r = tcg_cpu_exec(cpu);
1382             switch (r) {
1383             case EXCP_DEBUG:
1384                 cpu_handle_guest_debug(cpu);
1385                 break;
1386             case EXCP_HALTED:
1387                 /* during start-up the vCPU is reset and the thread is
1388                  * kicked several times. If we don't ensure we go back
1389                  * to sleep in the halted state we won't cleanly
1390                  * start-up when the vCPU is enabled.
1391                  *
1392                  * cpu->halted should ensure we sleep in wait_io_event
1393                  */
1394                 g_assert(cpu->halted);
1395                 break;
1396             case EXCP_ATOMIC:
1397                 qemu_mutex_unlock_iothread();
1398                 cpu_exec_step_atomic(cpu);
1399                 qemu_mutex_lock_iothread();
1400             default:
1401                 /* Ignore everything else? */
1402                 break;
1403             }
1404         }
1405
1406         handle_icount_deadline();
1407
1408         atomic_mb_set(&cpu->exit_request, 0);
1409         qemu_tcg_wait_io_event(cpu);
1410     }
1411
1412     return NULL;
1413 }
1414
1415 static void qemu_cpu_kick_thread(CPUState *cpu)
1416 {
1417 #ifndef _WIN32
1418     int err;
1419
1420     if (cpu->thread_kicked) {
1421         return;
1422     }
1423     cpu->thread_kicked = true;
1424     err = pthread_kill(cpu->thread->thread, SIG_IPI);
1425     if (err) {
1426         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1427         exit(1);
1428     }
1429 #else /* _WIN32 */
1430     if (!qemu_cpu_is_self(cpu)) {
1431         if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1432             fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1433                     __func__, GetLastError());
1434             exit(1);
1435         }
1436     }
1437 #endif
1438 }
1439
1440 void qemu_cpu_kick(CPUState *cpu)
1441 {
1442     qemu_cond_broadcast(cpu->halt_cond);
1443     if (tcg_enabled()) {
1444         cpu_exit(cpu);
1445         /* NOP unless doing single-thread RR */
1446         qemu_cpu_kick_rr_cpu();
1447     } else {
1448         if (hax_enabled()) {
1449             /*
1450              * FIXME: race condition with the exit_request check in
1451              * hax_vcpu_hax_exec
1452              */
1453             cpu->exit_request = 1;
1454         }
1455         qemu_cpu_kick_thread(cpu);
1456     }
1457 }
1458
1459 void qemu_cpu_kick_self(void)
1460 {
1461     assert(current_cpu);
1462     qemu_cpu_kick_thread(current_cpu);
1463 }
1464
1465 bool qemu_cpu_is_self(CPUState *cpu)
1466 {
1467     return qemu_thread_is_self(cpu->thread);
1468 }
1469
1470 bool qemu_in_vcpu_thread(void)
1471 {
1472     return current_cpu && qemu_cpu_is_self(current_cpu);
1473 }
1474
1475 static __thread bool iothread_locked = false;
1476
1477 bool qemu_mutex_iothread_locked(void)
1478 {
1479     return iothread_locked;
1480 }
1481
1482 void qemu_mutex_lock_iothread(void)
1483 {
1484     g_assert(!qemu_mutex_iothread_locked());
1485     qemu_mutex_lock(&qemu_global_mutex);
1486     iothread_locked = true;
1487 }
1488
1489 void qemu_mutex_unlock_iothread(void)
1490 {
1491     g_assert(qemu_mutex_iothread_locked());
1492     iothread_locked = false;
1493     qemu_mutex_unlock(&qemu_global_mutex);
1494 }
1495
1496 static bool all_vcpus_paused(void)
1497 {
1498     CPUState *cpu;
1499
1500     CPU_FOREACH(cpu) {
1501         if (!cpu->stopped) {
1502             return false;
1503         }
1504     }
1505
1506     return true;
1507 }
1508
1509 void pause_all_vcpus(void)
1510 {
1511     CPUState *cpu;
1512
1513     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1514     CPU_FOREACH(cpu) {
1515         cpu->stop = true;
1516         qemu_cpu_kick(cpu);
1517     }
1518
1519     if (qemu_in_vcpu_thread()) {
1520         cpu_stop_current();
1521     }
1522
1523     while (!all_vcpus_paused()) {
1524         qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1525         CPU_FOREACH(cpu) {
1526             qemu_cpu_kick(cpu);
1527         }
1528     }
1529 }
1530
1531 void cpu_resume(CPUState *cpu)
1532 {
1533     cpu->stop = false;
1534     cpu->stopped = false;
1535     qemu_cpu_kick(cpu);
1536 }
1537
1538 void resume_all_vcpus(void)
1539 {
1540     CPUState *cpu;
1541
1542     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1543     CPU_FOREACH(cpu) {
1544         cpu_resume(cpu);
1545     }
1546 }
1547
1548 void cpu_remove(CPUState *cpu)
1549 {
1550     cpu->stop = true;
1551     cpu->unplug = true;
1552     qemu_cpu_kick(cpu);
1553 }
1554
1555 void cpu_remove_sync(CPUState *cpu)
1556 {
1557     cpu_remove(cpu);
1558     while (cpu->created) {
1559         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1560     }
1561 }
1562
1563 /* For temporary buffers for forming a name */
1564 #define VCPU_THREAD_NAME_SIZE 16
1565
1566 static void qemu_tcg_init_vcpu(CPUState *cpu)
1567 {
1568     char thread_name[VCPU_THREAD_NAME_SIZE];
1569     static QemuCond *single_tcg_halt_cond;
1570     static QemuThread *single_tcg_cpu_thread;
1571
1572     if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1573         cpu->thread = g_malloc0(sizeof(QemuThread));
1574         cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1575         qemu_cond_init(cpu->halt_cond);
1576
1577         if (qemu_tcg_mttcg_enabled()) {
1578             /* create a thread per vCPU with TCG (MTTCG) */
1579             parallel_cpus = true;
1580             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1581                  cpu->cpu_index);
1582
1583             qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1584                                cpu, QEMU_THREAD_JOINABLE);
1585
1586         } else {
1587             /* share a single thread for all cpus with TCG */
1588             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1589             qemu_thread_create(cpu->thread, thread_name,
1590                                qemu_tcg_rr_cpu_thread_fn,
1591                                cpu, QEMU_THREAD_JOINABLE);
1592
1593             single_tcg_halt_cond = cpu->halt_cond;
1594             single_tcg_cpu_thread = cpu->thread;
1595         }
1596 #ifdef _WIN32
1597         cpu->hThread = qemu_thread_get_handle(cpu->thread);
1598 #endif
1599         while (!cpu->created) {
1600             qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1601         }
1602     } else {
1603         /* For non-MTTCG cases we share the thread */
1604         cpu->thread = single_tcg_cpu_thread;
1605         cpu->halt_cond = single_tcg_halt_cond;
1606     }
1607 }
1608
1609 static void qemu_hax_start_vcpu(CPUState *cpu)
1610 {
1611     char thread_name[VCPU_THREAD_NAME_SIZE];
1612
1613     cpu->thread = g_malloc0(sizeof(QemuThread));
1614     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1615     qemu_cond_init(cpu->halt_cond);
1616
1617     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1618              cpu->cpu_index);
1619     qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1620                        cpu, QEMU_THREAD_JOINABLE);
1621 #ifdef _WIN32
1622     cpu->hThread = qemu_thread_get_handle(cpu->thread);
1623 #endif
1624     while (!cpu->created) {
1625         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1626     }
1627 }
1628
1629 static void qemu_kvm_start_vcpu(CPUState *cpu)
1630 {
1631     char thread_name[VCPU_THREAD_NAME_SIZE];
1632
1633     cpu->thread = g_malloc0(sizeof(QemuThread));
1634     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1635     qemu_cond_init(cpu->halt_cond);
1636     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1637              cpu->cpu_index);
1638     qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1639                        cpu, QEMU_THREAD_JOINABLE);
1640     while (!cpu->created) {
1641         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1642     }
1643 }
1644
1645 static void qemu_dummy_start_vcpu(CPUState *cpu)
1646 {
1647     char thread_name[VCPU_THREAD_NAME_SIZE];
1648
1649     cpu->thread = g_malloc0(sizeof(QemuThread));
1650     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1651     qemu_cond_init(cpu->halt_cond);
1652     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1653              cpu->cpu_index);
1654     qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1655                        QEMU_THREAD_JOINABLE);
1656     while (!cpu->created) {
1657         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1658     }
1659 }
1660
1661 void qemu_init_vcpu(CPUState *cpu)
1662 {
1663     cpu->nr_cores = smp_cores;
1664     cpu->nr_threads = smp_threads;
1665     cpu->stopped = true;
1666
1667     if (!cpu->as) {
1668         /* If the target cpu hasn't set up any address spaces itself,
1669          * give it the default one.
1670          */
1671         AddressSpace *as = address_space_init_shareable(cpu->memory,
1672                                                         "cpu-memory");
1673         cpu->num_ases = 1;
1674         cpu_address_space_init(cpu, as, 0);
1675     }
1676
1677     if (kvm_enabled()) {
1678         qemu_kvm_start_vcpu(cpu);
1679     } else if (hax_enabled()) {
1680         qemu_hax_start_vcpu(cpu);
1681     } else if (tcg_enabled()) {
1682         qemu_tcg_init_vcpu(cpu);
1683     } else {
1684         qemu_dummy_start_vcpu(cpu);
1685     }
1686 }
1687
1688 void cpu_stop_current(void)
1689 {
1690     if (current_cpu) {
1691         current_cpu->stop = false;
1692         current_cpu->stopped = true;
1693         cpu_exit(current_cpu);
1694         qemu_cond_broadcast(&qemu_pause_cond);
1695     }
1696 }
1697
1698 int vm_stop(RunState state)
1699 {
1700     if (qemu_in_vcpu_thread()) {
1701         qemu_system_vmstop_request_prepare();
1702         qemu_system_vmstop_request(state);
1703         /*
1704          * FIXME: should not return to device code in case
1705          * vm_stop() has been requested.
1706          */
1707         cpu_stop_current();
1708         return 0;
1709     }
1710
1711     return do_vm_stop(state);
1712 }
1713
1714 /**
1715  * Prepare for (re)starting the VM.
1716  * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1717  * running or in case of an error condition), 0 otherwise.
1718  */
1719 int vm_prepare_start(void)
1720 {
1721     RunState requested;
1722     int res = 0;
1723
1724     qemu_vmstop_requested(&requested);
1725     if (runstate_is_running() && requested == RUN_STATE__MAX) {
1726         return -1;
1727     }
1728
1729     /* Ensure that a STOP/RESUME pair of events is emitted if a
1730      * vmstop request was pending.  The BLOCK_IO_ERROR event, for
1731      * example, according to documentation is always followed by
1732      * the STOP event.
1733      */
1734     if (runstate_is_running()) {
1735         qapi_event_send_stop(&error_abort);
1736         res = -1;
1737     } else {
1738         replay_enable_events();
1739         cpu_enable_ticks();
1740         runstate_set(RUN_STATE_RUNNING);
1741         vm_state_notify(1, RUN_STATE_RUNNING);
1742     }
1743
1744     /* We are sending this now, but the CPUs will be resumed shortly later */
1745     qapi_event_send_resume(&error_abort);
1746     return res;
1747 }
1748
1749 void vm_start(void)
1750 {
1751     if (!vm_prepare_start()) {
1752         resume_all_vcpus();
1753     }
1754 }
1755
1756 /* does a state transition even if the VM is already stopped,
1757    current state is forgotten forever */
1758 int vm_stop_force_state(RunState state)
1759 {
1760     if (runstate_is_running()) {
1761         return vm_stop(state);
1762     } else {
1763         runstate_set(state);
1764
1765         bdrv_drain_all();
1766         /* Make sure to return an error if the flush in a previous vm_stop()
1767          * failed. */
1768         return bdrv_flush_all();
1769     }
1770 }
1771
1772 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1773 {
1774     /* XXX: implement xxx_cpu_list for targets that still miss it */
1775 #if defined(cpu_list)
1776     cpu_list(f, cpu_fprintf);
1777 #endif
1778 }
1779
1780 CpuInfoList *qmp_query_cpus(Error **errp)
1781 {
1782     CpuInfoList *head = NULL, *cur_item = NULL;
1783     CPUState *cpu;
1784
1785     CPU_FOREACH(cpu) {
1786         CpuInfoList *info;
1787 #if defined(TARGET_I386)
1788         X86CPU *x86_cpu = X86_CPU(cpu);
1789         CPUX86State *env = &x86_cpu->env;
1790 #elif defined(TARGET_PPC)
1791         PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1792         CPUPPCState *env = &ppc_cpu->env;
1793 #elif defined(TARGET_SPARC)
1794         SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1795         CPUSPARCState *env = &sparc_cpu->env;
1796 #elif defined(TARGET_MIPS)
1797         MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1798         CPUMIPSState *env = &mips_cpu->env;
1799 #elif defined(TARGET_TRICORE)
1800         TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1801         CPUTriCoreState *env = &tricore_cpu->env;
1802 #endif
1803
1804         cpu_synchronize_state(cpu);
1805
1806         info = g_malloc0(sizeof(*info));
1807         info->value = g_malloc0(sizeof(*info->value));
1808         info->value->CPU = cpu->cpu_index;
1809         info->value->current = (cpu == first_cpu);
1810         info->value->halted = cpu->halted;
1811         info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1812         info->value->thread_id = cpu->thread_id;
1813 #if defined(TARGET_I386)
1814         info->value->arch = CPU_INFO_ARCH_X86;
1815         info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1816 #elif defined(TARGET_PPC)
1817         info->value->arch = CPU_INFO_ARCH_PPC;
1818         info->value->u.ppc.nip = env->nip;
1819 #elif defined(TARGET_SPARC)
1820         info->value->arch = CPU_INFO_ARCH_SPARC;
1821         info->value->u.q_sparc.pc = env->pc;
1822         info->value->u.q_sparc.npc = env->npc;
1823 #elif defined(TARGET_MIPS)
1824         info->value->arch = CPU_INFO_ARCH_MIPS;
1825         info->value->u.q_mips.PC = env->active_tc.PC;
1826 #elif defined(TARGET_TRICORE)
1827         info->value->arch = CPU_INFO_ARCH_TRICORE;
1828         info->value->u.tricore.PC = env->PC;
1829 #else
1830         info->value->arch = CPU_INFO_ARCH_OTHER;
1831 #endif
1832
1833         /* XXX: waiting for the qapi to support GSList */
1834         if (!cur_item) {
1835             head = cur_item = info;
1836         } else {
1837             cur_item->next = info;
1838             cur_item = info;
1839         }
1840     }
1841
1842     return head;
1843 }
1844
1845 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1846                  bool has_cpu, int64_t cpu_index, Error **errp)
1847 {
1848     FILE *f;
1849     uint32_t l;
1850     CPUState *cpu;
1851     uint8_t buf[1024];
1852     int64_t orig_addr = addr, orig_size = size;
1853
1854     if (!has_cpu) {
1855         cpu_index = 0;
1856     }
1857
1858     cpu = qemu_get_cpu(cpu_index);
1859     if (cpu == NULL) {
1860         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1861                    "a CPU number");
1862         return;
1863     }
1864
1865     f = fopen(filename, "wb");
1866     if (!f) {
1867         error_setg_file_open(errp, errno, filename);
1868         return;
1869     }
1870
1871     while (size != 0) {
1872         l = sizeof(buf);
1873         if (l > size)
1874             l = size;
1875         if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1876             error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1877                              " specified", orig_addr, orig_size);
1878             goto exit;
1879         }
1880         if (fwrite(buf, 1, l, f) != l) {
1881             error_setg(errp, QERR_IO_ERROR);
1882             goto exit;
1883         }
1884         addr += l;
1885         size -= l;
1886     }
1887
1888 exit:
1889     fclose(f);
1890 }
1891
1892 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1893                   Error **errp)
1894 {
1895     FILE *f;
1896     uint32_t l;
1897     uint8_t buf[1024];
1898
1899     f = fopen(filename, "wb");
1900     if (!f) {
1901         error_setg_file_open(errp, errno, filename);
1902         return;
1903     }
1904
1905     while (size != 0) {
1906         l = sizeof(buf);
1907         if (l > size)
1908             l = size;
1909         cpu_physical_memory_read(addr, buf, l);
1910         if (fwrite(buf, 1, l, f) != l) {
1911             error_setg(errp, QERR_IO_ERROR);
1912             goto exit;
1913         }
1914         addr += l;
1915         size -= l;
1916     }
1917
1918 exit:
1919     fclose(f);
1920 }
1921
1922 void qmp_inject_nmi(Error **errp)
1923 {
1924     nmi_monitor_handle(monitor_get_cpu_index(), errp);
1925 }
1926
1927 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
1928 {
1929     if (!use_icount) {
1930         return;
1931     }
1932
1933     cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
1934                 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
1935     if (icount_align_option) {
1936         cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
1937         cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
1938     } else {
1939         cpu_fprintf(f, "Max guest delay     NA\n");
1940         cpu_fprintf(f, "Max guest advance   NA\n");
1941     }
1942 }
This page took 0.133728 seconds and 4 git commands to generate.