]> Git Repo - qemu.git/blob - cpus.c
char: useless NULL check
[qemu.git] / cpus.c
1 /*
2  * QEMU System Emulator
3  *
4  * Copyright (c) 2003-2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24
25 /* Needed early for CONFIG_BSD etc. */
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "qemu/config-file.h"
29 #include "cpu.h"
30 #include "monitor/monitor.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qemu/error-report.h"
33 #include "sysemu/sysemu.h"
34 #include "sysemu/block-backend.h"
35 #include "exec/gdbstub.h"
36 #include "sysemu/dma.h"
37 #include "sysemu/hw_accel.h"
38 #include "sysemu/kvm.h"
39 #include "sysemu/hax.h"
40 #include "qmp-commands.h"
41 #include "exec/exec-all.h"
42
43 #include "qemu/thread.h"
44 #include "sysemu/cpus.h"
45 #include "sysemu/qtest.h"
46 #include "qemu/main-loop.h"
47 #include "qemu/bitmap.h"
48 #include "qemu/seqlock.h"
49 #include "tcg.h"
50 #include "qapi-event.h"
51 #include "hw/nmi.h"
52 #include "sysemu/replay.h"
53
54 #ifdef CONFIG_LINUX
55
56 #include <sys/prctl.h>
57
58 #ifndef PR_MCE_KILL
59 #define PR_MCE_KILL 33
60 #endif
61
62 #ifndef PR_MCE_KILL_SET
63 #define PR_MCE_KILL_SET 1
64 #endif
65
66 #ifndef PR_MCE_KILL_EARLY
67 #define PR_MCE_KILL_EARLY 1
68 #endif
69
70 #endif /* CONFIG_LINUX */
71
72 int64_t max_delay;
73 int64_t max_advance;
74
75 /* vcpu throttling controls */
76 static QEMUTimer *throttle_timer;
77 static unsigned int throttle_percentage;
78
79 #define CPU_THROTTLE_PCT_MIN 1
80 #define CPU_THROTTLE_PCT_MAX 99
81 #define CPU_THROTTLE_TIMESLICE_NS 10000000
82
83 bool cpu_is_stopped(CPUState *cpu)
84 {
85     return cpu->stopped || !runstate_is_running();
86 }
87
88 static bool cpu_thread_is_idle(CPUState *cpu)
89 {
90     if (cpu->stop || cpu->queued_work_first) {
91         return false;
92     }
93     if (cpu_is_stopped(cpu)) {
94         return true;
95     }
96     if (!cpu->halted || cpu_has_work(cpu) ||
97         kvm_halt_in_kernel()) {
98         return false;
99     }
100     return true;
101 }
102
103 static bool all_cpu_threads_idle(void)
104 {
105     CPUState *cpu;
106
107     CPU_FOREACH(cpu) {
108         if (!cpu_thread_is_idle(cpu)) {
109             return false;
110         }
111     }
112     return true;
113 }
114
115 /***********************************************************/
116 /* guest cycle counter */
117
118 /* Protected by TimersState seqlock */
119
120 static bool icount_sleep = true;
121 static int64_t vm_clock_warp_start = -1;
122 /* Conversion factor from emulated instructions to virtual clock ticks.  */
123 static int icount_time_shift;
124 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
125 #define MAX_ICOUNT_SHIFT 10
126
127 static QEMUTimer *icount_rt_timer;
128 static QEMUTimer *icount_vm_timer;
129 static QEMUTimer *icount_warp_timer;
130
131 typedef struct TimersState {
132     /* Protected by BQL.  */
133     int64_t cpu_ticks_prev;
134     int64_t cpu_ticks_offset;
135
136     /* cpu_clock_offset can be read out of BQL, so protect it with
137      * this lock.
138      */
139     QemuSeqLock vm_clock_seqlock;
140     int64_t cpu_clock_offset;
141     int32_t cpu_ticks_enabled;
142     int64_t dummy;
143
144     /* Compensate for varying guest execution speed.  */
145     int64_t qemu_icount_bias;
146     /* Only written by TCG thread */
147     int64_t qemu_icount;
148 } TimersState;
149
150 static TimersState timers_state;
151 bool mttcg_enabled;
152
153 /*
154  * We default to false if we know other options have been enabled
155  * which are currently incompatible with MTTCG. Otherwise when each
156  * guest (target) has been updated to support:
157  *   - atomic instructions
158  *   - memory ordering primitives (barriers)
159  * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
160  *
161  * Once a guest architecture has been converted to the new primitives
162  * there are two remaining limitations to check.
163  *
164  * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
165  * - The host must have a stronger memory order than the guest
166  *
167  * It may be possible in future to support strong guests on weak hosts
168  * but that will require tagging all load/stores in a guest with their
169  * implicit memory order requirements which would likely slow things
170  * down a lot.
171  */
172
173 static bool check_tcg_memory_orders_compatible(void)
174 {
175 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
176     return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
177 #else
178     return false;
179 #endif
180 }
181
182 static bool default_mttcg_enabled(void)
183 {
184     if (use_icount || TCG_OVERSIZED_GUEST) {
185         return false;
186     } else {
187 #ifdef TARGET_SUPPORTS_MTTCG
188         return check_tcg_memory_orders_compatible();
189 #else
190         return false;
191 #endif
192     }
193 }
194
195 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
196 {
197     const char *t = qemu_opt_get(opts, "thread");
198     if (t) {
199         if (strcmp(t, "multi") == 0) {
200             if (TCG_OVERSIZED_GUEST) {
201                 error_setg(errp, "No MTTCG when guest word size > hosts");
202             } else if (use_icount) {
203                 error_setg(errp, "No MTTCG when icount is enabled");
204             } else {
205 #ifndef TARGET_SUPPORTS_MTTCG
206                 error_report("Guest not yet converted to MTTCG - "
207                              "you may get unexpected results");
208 #endif
209                 if (!check_tcg_memory_orders_compatible()) {
210                     error_report("Guest expects a stronger memory ordering "
211                                  "than the host provides");
212                     error_printf("This may cause strange/hard to debug errors\n");
213                 }
214                 mttcg_enabled = true;
215             }
216         } else if (strcmp(t, "single") == 0) {
217             mttcg_enabled = false;
218         } else {
219             error_setg(errp, "Invalid 'thread' setting %s", t);
220         }
221     } else {
222         mttcg_enabled = default_mttcg_enabled();
223     }
224 }
225
226 /* The current number of executed instructions is based on what we
227  * originally budgeted minus the current state of the decrementing
228  * icount counters in extra/u16.low.
229  */
230 static int64_t cpu_get_icount_executed(CPUState *cpu)
231 {
232     return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
233 }
234
235 /*
236  * Update the global shared timer_state.qemu_icount to take into
237  * account executed instructions. This is done by the TCG vCPU
238  * thread so the main-loop can see time has moved forward.
239  */
240 void cpu_update_icount(CPUState *cpu)
241 {
242     int64_t executed = cpu_get_icount_executed(cpu);
243     cpu->icount_budget -= executed;
244
245 #ifdef CONFIG_ATOMIC64
246     atomic_set__nocheck(&timers_state.qemu_icount,
247                         atomic_read__nocheck(&timers_state.qemu_icount) +
248                         executed);
249 #else /* FIXME: we need 64bit atomics to do this safely */
250     timers_state.qemu_icount += executed;
251 #endif
252 }
253
254 int64_t cpu_get_icount_raw(void)
255 {
256     CPUState *cpu = current_cpu;
257
258     if (cpu && cpu->running) {
259         if (!cpu->can_do_io) {
260             fprintf(stderr, "Bad icount read\n");
261             exit(1);
262         }
263         /* Take into account what has run */
264         cpu_update_icount(cpu);
265     }
266 #ifdef CONFIG_ATOMIC64
267     return atomic_read__nocheck(&timers_state.qemu_icount);
268 #else /* FIXME: we need 64bit atomics to do this safely */
269     return timers_state.qemu_icount;
270 #endif
271 }
272
273 /* Return the virtual CPU time, based on the instruction counter.  */
274 static int64_t cpu_get_icount_locked(void)
275 {
276     int64_t icount = cpu_get_icount_raw();
277     return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
278 }
279
280 int64_t cpu_get_icount(void)
281 {
282     int64_t icount;
283     unsigned start;
284
285     do {
286         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
287         icount = cpu_get_icount_locked();
288     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
289
290     return icount;
291 }
292
293 int64_t cpu_icount_to_ns(int64_t icount)
294 {
295     return icount << icount_time_shift;
296 }
297
298 /* return the time elapsed in VM between vm_start and vm_stop.  Unless
299  * icount is active, cpu_get_ticks() uses units of the host CPU cycle
300  * counter.
301  *
302  * Caller must hold the BQL
303  */
304 int64_t cpu_get_ticks(void)
305 {
306     int64_t ticks;
307
308     if (use_icount) {
309         return cpu_get_icount();
310     }
311
312     ticks = timers_state.cpu_ticks_offset;
313     if (timers_state.cpu_ticks_enabled) {
314         ticks += cpu_get_host_ticks();
315     }
316
317     if (timers_state.cpu_ticks_prev > ticks) {
318         /* Note: non increasing ticks may happen if the host uses
319            software suspend */
320         timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
321         ticks = timers_state.cpu_ticks_prev;
322     }
323
324     timers_state.cpu_ticks_prev = ticks;
325     return ticks;
326 }
327
328 static int64_t cpu_get_clock_locked(void)
329 {
330     int64_t time;
331
332     time = timers_state.cpu_clock_offset;
333     if (timers_state.cpu_ticks_enabled) {
334         time += get_clock();
335     }
336
337     return time;
338 }
339
340 /* Return the monotonic time elapsed in VM, i.e.,
341  * the time between vm_start and vm_stop
342  */
343 int64_t cpu_get_clock(void)
344 {
345     int64_t ti;
346     unsigned start;
347
348     do {
349         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
350         ti = cpu_get_clock_locked();
351     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
352
353     return ti;
354 }
355
356 /* enable cpu_get_ticks()
357  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
358  */
359 void cpu_enable_ticks(void)
360 {
361     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
362     seqlock_write_begin(&timers_state.vm_clock_seqlock);
363     if (!timers_state.cpu_ticks_enabled) {
364         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
365         timers_state.cpu_clock_offset -= get_clock();
366         timers_state.cpu_ticks_enabled = 1;
367     }
368     seqlock_write_end(&timers_state.vm_clock_seqlock);
369 }
370
371 /* disable cpu_get_ticks() : the clock is stopped. You must not call
372  * cpu_get_ticks() after that.
373  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
374  */
375 void cpu_disable_ticks(void)
376 {
377     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
378     seqlock_write_begin(&timers_state.vm_clock_seqlock);
379     if (timers_state.cpu_ticks_enabled) {
380         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
381         timers_state.cpu_clock_offset = cpu_get_clock_locked();
382         timers_state.cpu_ticks_enabled = 0;
383     }
384     seqlock_write_end(&timers_state.vm_clock_seqlock);
385 }
386
387 /* Correlation between real and virtual time is always going to be
388    fairly approximate, so ignore small variation.
389    When the guest is idle real and virtual time will be aligned in
390    the IO wait loop.  */
391 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
392
393 static void icount_adjust(void)
394 {
395     int64_t cur_time;
396     int64_t cur_icount;
397     int64_t delta;
398
399     /* Protected by TimersState mutex.  */
400     static int64_t last_delta;
401
402     /* If the VM is not running, then do nothing.  */
403     if (!runstate_is_running()) {
404         return;
405     }
406
407     seqlock_write_begin(&timers_state.vm_clock_seqlock);
408     cur_time = cpu_get_clock_locked();
409     cur_icount = cpu_get_icount_locked();
410
411     delta = cur_icount - cur_time;
412     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
413     if (delta > 0
414         && last_delta + ICOUNT_WOBBLE < delta * 2
415         && icount_time_shift > 0) {
416         /* The guest is getting too far ahead.  Slow time down.  */
417         icount_time_shift--;
418     }
419     if (delta < 0
420         && last_delta - ICOUNT_WOBBLE > delta * 2
421         && icount_time_shift < MAX_ICOUNT_SHIFT) {
422         /* The guest is getting too far behind.  Speed time up.  */
423         icount_time_shift++;
424     }
425     last_delta = delta;
426     timers_state.qemu_icount_bias = cur_icount
427                               - (timers_state.qemu_icount << icount_time_shift);
428     seqlock_write_end(&timers_state.vm_clock_seqlock);
429 }
430
431 static void icount_adjust_rt(void *opaque)
432 {
433     timer_mod(icount_rt_timer,
434               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
435     icount_adjust();
436 }
437
438 static void icount_adjust_vm(void *opaque)
439 {
440     timer_mod(icount_vm_timer,
441                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
442                    NANOSECONDS_PER_SECOND / 10);
443     icount_adjust();
444 }
445
446 static int64_t qemu_icount_round(int64_t count)
447 {
448     return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
449 }
450
451 static void icount_warp_rt(void)
452 {
453     unsigned seq;
454     int64_t warp_start;
455
456     /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
457      * changes from -1 to another value, so the race here is okay.
458      */
459     do {
460         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
461         warp_start = vm_clock_warp_start;
462     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
463
464     if (warp_start == -1) {
465         return;
466     }
467
468     seqlock_write_begin(&timers_state.vm_clock_seqlock);
469     if (runstate_is_running()) {
470         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
471                                      cpu_get_clock_locked());
472         int64_t warp_delta;
473
474         warp_delta = clock - vm_clock_warp_start;
475         if (use_icount == 2) {
476             /*
477              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
478              * far ahead of real time.
479              */
480             int64_t cur_icount = cpu_get_icount_locked();
481             int64_t delta = clock - cur_icount;
482             warp_delta = MIN(warp_delta, delta);
483         }
484         timers_state.qemu_icount_bias += warp_delta;
485     }
486     vm_clock_warp_start = -1;
487     seqlock_write_end(&timers_state.vm_clock_seqlock);
488
489     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
490         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
491     }
492 }
493
494 static void icount_timer_cb(void *opaque)
495 {
496     /* No need for a checkpoint because the timer already synchronizes
497      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
498      */
499     icount_warp_rt();
500 }
501
502 void qtest_clock_warp(int64_t dest)
503 {
504     int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
505     AioContext *aio_context;
506     assert(qtest_enabled());
507     aio_context = qemu_get_aio_context();
508     while (clock < dest) {
509         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
510         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
511
512         seqlock_write_begin(&timers_state.vm_clock_seqlock);
513         timers_state.qemu_icount_bias += warp;
514         seqlock_write_end(&timers_state.vm_clock_seqlock);
515
516         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
517         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
518         clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
519     }
520     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
521 }
522
523 void qemu_start_warp_timer(void)
524 {
525     int64_t clock;
526     int64_t deadline;
527
528     if (!use_icount) {
529         return;
530     }
531
532     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
533      * do not fire, so computing the deadline does not make sense.
534      */
535     if (!runstate_is_running()) {
536         return;
537     }
538
539     /* warp clock deterministically in record/replay mode */
540     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
541         return;
542     }
543
544     if (!all_cpu_threads_idle()) {
545         return;
546     }
547
548     if (qtest_enabled()) {
549         /* When testing, qtest commands advance icount.  */
550         return;
551     }
552
553     /* We want to use the earliest deadline from ALL vm_clocks */
554     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
555     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
556     if (deadline < 0) {
557         static bool notified;
558         if (!icount_sleep && !notified) {
559             error_report("WARNING: icount sleep disabled and no active timers");
560             notified = true;
561         }
562         return;
563     }
564
565     if (deadline > 0) {
566         /*
567          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
568          * sleep.  Otherwise, the CPU might be waiting for a future timer
569          * interrupt to wake it up, but the interrupt never comes because
570          * the vCPU isn't running any insns and thus doesn't advance the
571          * QEMU_CLOCK_VIRTUAL.
572          */
573         if (!icount_sleep) {
574             /*
575              * We never let VCPUs sleep in no sleep icount mode.
576              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
577              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
578              * It is useful when we want a deterministic execution time,
579              * isolated from host latencies.
580              */
581             seqlock_write_begin(&timers_state.vm_clock_seqlock);
582             timers_state.qemu_icount_bias += deadline;
583             seqlock_write_end(&timers_state.vm_clock_seqlock);
584             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
585         } else {
586             /*
587              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
588              * "real" time, (related to the time left until the next event) has
589              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
590              * This avoids that the warps are visible externally; for example,
591              * you will not be sending network packets continuously instead of
592              * every 100ms.
593              */
594             seqlock_write_begin(&timers_state.vm_clock_seqlock);
595             if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
596                 vm_clock_warp_start = clock;
597             }
598             seqlock_write_end(&timers_state.vm_clock_seqlock);
599             timer_mod_anticipate(icount_warp_timer, clock + deadline);
600         }
601     } else if (deadline == 0) {
602         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
603     }
604 }
605
606 static void qemu_account_warp_timer(void)
607 {
608     if (!use_icount || !icount_sleep) {
609         return;
610     }
611
612     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
613      * do not fire, so computing the deadline does not make sense.
614      */
615     if (!runstate_is_running()) {
616         return;
617     }
618
619     /* warp clock deterministically in record/replay mode */
620     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
621         return;
622     }
623
624     timer_del(icount_warp_timer);
625     icount_warp_rt();
626 }
627
628 static bool icount_state_needed(void *opaque)
629 {
630     return use_icount;
631 }
632
633 /*
634  * This is a subsection for icount migration.
635  */
636 static const VMStateDescription icount_vmstate_timers = {
637     .name = "timer/icount",
638     .version_id = 1,
639     .minimum_version_id = 1,
640     .needed = icount_state_needed,
641     .fields = (VMStateField[]) {
642         VMSTATE_INT64(qemu_icount_bias, TimersState),
643         VMSTATE_INT64(qemu_icount, TimersState),
644         VMSTATE_END_OF_LIST()
645     }
646 };
647
648 static const VMStateDescription vmstate_timers = {
649     .name = "timer",
650     .version_id = 2,
651     .minimum_version_id = 1,
652     .fields = (VMStateField[]) {
653         VMSTATE_INT64(cpu_ticks_offset, TimersState),
654         VMSTATE_INT64(dummy, TimersState),
655         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
656         VMSTATE_END_OF_LIST()
657     },
658     .subsections = (const VMStateDescription*[]) {
659         &icount_vmstate_timers,
660         NULL
661     }
662 };
663
664 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
665 {
666     double pct;
667     double throttle_ratio;
668     long sleeptime_ns;
669
670     if (!cpu_throttle_get_percentage()) {
671         return;
672     }
673
674     pct = (double)cpu_throttle_get_percentage()/100;
675     throttle_ratio = pct / (1 - pct);
676     sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
677
678     qemu_mutex_unlock_iothread();
679     atomic_set(&cpu->throttle_thread_scheduled, 0);
680     g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
681     qemu_mutex_lock_iothread();
682 }
683
684 static void cpu_throttle_timer_tick(void *opaque)
685 {
686     CPUState *cpu;
687     double pct;
688
689     /* Stop the timer if needed */
690     if (!cpu_throttle_get_percentage()) {
691         return;
692     }
693     CPU_FOREACH(cpu) {
694         if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
695             async_run_on_cpu(cpu, cpu_throttle_thread,
696                              RUN_ON_CPU_NULL);
697         }
698     }
699
700     pct = (double)cpu_throttle_get_percentage()/100;
701     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
702                                    CPU_THROTTLE_TIMESLICE_NS / (1-pct));
703 }
704
705 void cpu_throttle_set(int new_throttle_pct)
706 {
707     /* Ensure throttle percentage is within valid range */
708     new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
709     new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
710
711     atomic_set(&throttle_percentage, new_throttle_pct);
712
713     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
714                                        CPU_THROTTLE_TIMESLICE_NS);
715 }
716
717 void cpu_throttle_stop(void)
718 {
719     atomic_set(&throttle_percentage, 0);
720 }
721
722 bool cpu_throttle_active(void)
723 {
724     return (cpu_throttle_get_percentage() != 0);
725 }
726
727 int cpu_throttle_get_percentage(void)
728 {
729     return atomic_read(&throttle_percentage);
730 }
731
732 void cpu_ticks_init(void)
733 {
734     seqlock_init(&timers_state.vm_clock_seqlock);
735     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
736     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
737                                            cpu_throttle_timer_tick, NULL);
738 }
739
740 void configure_icount(QemuOpts *opts, Error **errp)
741 {
742     const char *option;
743     char *rem_str = NULL;
744
745     option = qemu_opt_get(opts, "shift");
746     if (!option) {
747         if (qemu_opt_get(opts, "align") != NULL) {
748             error_setg(errp, "Please specify shift option when using align");
749         }
750         return;
751     }
752
753     icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
754     if (icount_sleep) {
755         icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
756                                          icount_timer_cb, NULL);
757     }
758
759     icount_align_option = qemu_opt_get_bool(opts, "align", false);
760
761     if (icount_align_option && !icount_sleep) {
762         error_setg(errp, "align=on and sleep=off are incompatible");
763     }
764     if (strcmp(option, "auto") != 0) {
765         errno = 0;
766         icount_time_shift = strtol(option, &rem_str, 0);
767         if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
768             error_setg(errp, "icount: Invalid shift value");
769         }
770         use_icount = 1;
771         return;
772     } else if (icount_align_option) {
773         error_setg(errp, "shift=auto and align=on are incompatible");
774     } else if (!icount_sleep) {
775         error_setg(errp, "shift=auto and sleep=off are incompatible");
776     }
777
778     use_icount = 2;
779
780     /* 125MIPS seems a reasonable initial guess at the guest speed.
781        It will be corrected fairly quickly anyway.  */
782     icount_time_shift = 3;
783
784     /* Have both realtime and virtual time triggers for speed adjustment.
785        The realtime trigger catches emulated time passing too slowly,
786        the virtual time trigger catches emulated time passing too fast.
787        Realtime triggers occur even when idle, so use them less frequently
788        than VM triggers.  */
789     icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
790                                    icount_adjust_rt, NULL);
791     timer_mod(icount_rt_timer,
792                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
793     icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
794                                         icount_adjust_vm, NULL);
795     timer_mod(icount_vm_timer,
796                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
797                    NANOSECONDS_PER_SECOND / 10);
798 }
799
800 /***********************************************************/
801 /* TCG vCPU kick timer
802  *
803  * The kick timer is responsible for moving single threaded vCPU
804  * emulation on to the next vCPU. If more than one vCPU is running a
805  * timer event with force a cpu->exit so the next vCPU can get
806  * scheduled.
807  *
808  * The timer is removed if all vCPUs are idle and restarted again once
809  * idleness is complete.
810  */
811
812 static QEMUTimer *tcg_kick_vcpu_timer;
813 static CPUState *tcg_current_rr_cpu;
814
815 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
816
817 static inline int64_t qemu_tcg_next_kick(void)
818 {
819     return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
820 }
821
822 /* Kick the currently round-robin scheduled vCPU */
823 static void qemu_cpu_kick_rr_cpu(void)
824 {
825     CPUState *cpu;
826     do {
827         cpu = atomic_mb_read(&tcg_current_rr_cpu);
828         if (cpu) {
829             cpu_exit(cpu);
830         }
831     } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
832 }
833
834 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
835 {
836 }
837
838 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
839 {
840     if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
841         qemu_notify_event();
842         return;
843     }
844
845     if (!qemu_in_vcpu_thread() && first_cpu) {
846         /* qemu_cpu_kick is not enough to kick a halted CPU out of
847          * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
848          * causes cpu_thread_is_idle to return false.  This way,
849          * handle_icount_deadline can run.
850          */
851         async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
852     }
853 }
854
855 static void kick_tcg_thread(void *opaque)
856 {
857     timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
858     qemu_cpu_kick_rr_cpu();
859 }
860
861 static void start_tcg_kick_timer(void)
862 {
863     if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
864         tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
865                                            kick_tcg_thread, NULL);
866         timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
867     }
868 }
869
870 static void stop_tcg_kick_timer(void)
871 {
872     if (tcg_kick_vcpu_timer) {
873         timer_del(tcg_kick_vcpu_timer);
874         tcg_kick_vcpu_timer = NULL;
875     }
876 }
877
878 /***********************************************************/
879 void hw_error(const char *fmt, ...)
880 {
881     va_list ap;
882     CPUState *cpu;
883
884     va_start(ap, fmt);
885     fprintf(stderr, "qemu: hardware error: ");
886     vfprintf(stderr, fmt, ap);
887     fprintf(stderr, "\n");
888     CPU_FOREACH(cpu) {
889         fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
890         cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
891     }
892     va_end(ap);
893     abort();
894 }
895
896 void cpu_synchronize_all_states(void)
897 {
898     CPUState *cpu;
899
900     CPU_FOREACH(cpu) {
901         cpu_synchronize_state(cpu);
902     }
903 }
904
905 void cpu_synchronize_all_post_reset(void)
906 {
907     CPUState *cpu;
908
909     CPU_FOREACH(cpu) {
910         cpu_synchronize_post_reset(cpu);
911     }
912 }
913
914 void cpu_synchronize_all_post_init(void)
915 {
916     CPUState *cpu;
917
918     CPU_FOREACH(cpu) {
919         cpu_synchronize_post_init(cpu);
920     }
921 }
922
923 static int do_vm_stop(RunState state)
924 {
925     int ret = 0;
926
927     if (runstate_is_running()) {
928         cpu_disable_ticks();
929         pause_all_vcpus();
930         runstate_set(state);
931         vm_state_notify(0, state);
932         qapi_event_send_stop(&error_abort);
933     }
934
935     bdrv_drain_all();
936     replay_disable_events();
937     ret = bdrv_flush_all();
938
939     return ret;
940 }
941
942 static bool cpu_can_run(CPUState *cpu)
943 {
944     if (cpu->stop) {
945         return false;
946     }
947     if (cpu_is_stopped(cpu)) {
948         return false;
949     }
950     return true;
951 }
952
953 static void cpu_handle_guest_debug(CPUState *cpu)
954 {
955     gdb_set_stop_cpu(cpu);
956     qemu_system_debug_request();
957     cpu->stopped = true;
958 }
959
960 #ifdef CONFIG_LINUX
961 static void sigbus_reraise(void)
962 {
963     sigset_t set;
964     struct sigaction action;
965
966     memset(&action, 0, sizeof(action));
967     action.sa_handler = SIG_DFL;
968     if (!sigaction(SIGBUS, &action, NULL)) {
969         raise(SIGBUS);
970         sigemptyset(&set);
971         sigaddset(&set, SIGBUS);
972         pthread_sigmask(SIG_UNBLOCK, &set, NULL);
973     }
974     perror("Failed to re-raise SIGBUS!\n");
975     abort();
976 }
977
978 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
979 {
980     if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
981         sigbus_reraise();
982     }
983
984     if (current_cpu) {
985         /* Called asynchronously in VCPU thread.  */
986         if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
987             sigbus_reraise();
988         }
989     } else {
990         /* Called synchronously (via signalfd) in main thread.  */
991         if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
992             sigbus_reraise();
993         }
994     }
995 }
996
997 static void qemu_init_sigbus(void)
998 {
999     struct sigaction action;
1000
1001     memset(&action, 0, sizeof(action));
1002     action.sa_flags = SA_SIGINFO;
1003     action.sa_sigaction = sigbus_handler;
1004     sigaction(SIGBUS, &action, NULL);
1005
1006     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1007 }
1008 #else /* !CONFIG_LINUX */
1009 static void qemu_init_sigbus(void)
1010 {
1011 }
1012 #endif /* !CONFIG_LINUX */
1013
1014 static QemuMutex qemu_global_mutex;
1015
1016 static QemuThread io_thread;
1017
1018 /* cpu creation */
1019 static QemuCond qemu_cpu_cond;
1020 /* system init */
1021 static QemuCond qemu_pause_cond;
1022
1023 void qemu_init_cpu_loop(void)
1024 {
1025     qemu_init_sigbus();
1026     qemu_cond_init(&qemu_cpu_cond);
1027     qemu_cond_init(&qemu_pause_cond);
1028     qemu_mutex_init(&qemu_global_mutex);
1029
1030     qemu_thread_get_self(&io_thread);
1031 }
1032
1033 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1034 {
1035     do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1036 }
1037
1038 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1039 {
1040     if (kvm_destroy_vcpu(cpu) < 0) {
1041         error_report("kvm_destroy_vcpu failed");
1042         exit(EXIT_FAILURE);
1043     }
1044 }
1045
1046 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1047 {
1048 }
1049
1050 static void qemu_wait_io_event_common(CPUState *cpu)
1051 {
1052     atomic_mb_set(&cpu->thread_kicked, false);
1053     if (cpu->stop) {
1054         cpu->stop = false;
1055         cpu->stopped = true;
1056         qemu_cond_broadcast(&qemu_pause_cond);
1057     }
1058     process_queued_cpu_work(cpu);
1059 }
1060
1061 static bool qemu_tcg_should_sleep(CPUState *cpu)
1062 {
1063     if (mttcg_enabled) {
1064         return cpu_thread_is_idle(cpu);
1065     } else {
1066         return all_cpu_threads_idle();
1067     }
1068 }
1069
1070 static void qemu_tcg_wait_io_event(CPUState *cpu)
1071 {
1072     while (qemu_tcg_should_sleep(cpu)) {
1073         stop_tcg_kick_timer();
1074         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1075     }
1076
1077     start_tcg_kick_timer();
1078
1079     qemu_wait_io_event_common(cpu);
1080 }
1081
1082 static void qemu_kvm_wait_io_event(CPUState *cpu)
1083 {
1084     while (cpu_thread_is_idle(cpu)) {
1085         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1086     }
1087
1088     qemu_wait_io_event_common(cpu);
1089 }
1090
1091 static void *qemu_kvm_cpu_thread_fn(void *arg)
1092 {
1093     CPUState *cpu = arg;
1094     int r;
1095
1096     rcu_register_thread();
1097
1098     qemu_mutex_lock_iothread();
1099     qemu_thread_get_self(cpu->thread);
1100     cpu->thread_id = qemu_get_thread_id();
1101     cpu->can_do_io = 1;
1102     current_cpu = cpu;
1103
1104     r = kvm_init_vcpu(cpu);
1105     if (r < 0) {
1106         fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1107         exit(1);
1108     }
1109
1110     kvm_init_cpu_signals(cpu);
1111
1112     /* signal CPU creation */
1113     cpu->created = true;
1114     qemu_cond_signal(&qemu_cpu_cond);
1115
1116     do {
1117         if (cpu_can_run(cpu)) {
1118             r = kvm_cpu_exec(cpu);
1119             if (r == EXCP_DEBUG) {
1120                 cpu_handle_guest_debug(cpu);
1121             }
1122         }
1123         qemu_kvm_wait_io_event(cpu);
1124     } while (!cpu->unplug || cpu_can_run(cpu));
1125
1126     qemu_kvm_destroy_vcpu(cpu);
1127     cpu->created = false;
1128     qemu_cond_signal(&qemu_cpu_cond);
1129     qemu_mutex_unlock_iothread();
1130     return NULL;
1131 }
1132
1133 static void *qemu_dummy_cpu_thread_fn(void *arg)
1134 {
1135 #ifdef _WIN32
1136     fprintf(stderr, "qtest is not supported under Windows\n");
1137     exit(1);
1138 #else
1139     CPUState *cpu = arg;
1140     sigset_t waitset;
1141     int r;
1142
1143     rcu_register_thread();
1144
1145     qemu_mutex_lock_iothread();
1146     qemu_thread_get_self(cpu->thread);
1147     cpu->thread_id = qemu_get_thread_id();
1148     cpu->can_do_io = 1;
1149     current_cpu = cpu;
1150
1151     sigemptyset(&waitset);
1152     sigaddset(&waitset, SIG_IPI);
1153
1154     /* signal CPU creation */
1155     cpu->created = true;
1156     qemu_cond_signal(&qemu_cpu_cond);
1157
1158     while (1) {
1159         qemu_mutex_unlock_iothread();
1160         do {
1161             int sig;
1162             r = sigwait(&waitset, &sig);
1163         } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1164         if (r == -1) {
1165             perror("sigwait");
1166             exit(1);
1167         }
1168         qemu_mutex_lock_iothread();
1169         qemu_wait_io_event_common(cpu);
1170     }
1171
1172     return NULL;
1173 #endif
1174 }
1175
1176 static int64_t tcg_get_icount_limit(void)
1177 {
1178     int64_t deadline;
1179
1180     if (replay_mode != REPLAY_MODE_PLAY) {
1181         deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1182
1183         /* Maintain prior (possibly buggy) behaviour where if no deadline
1184          * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1185          * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1186          * nanoseconds.
1187          */
1188         if ((deadline < 0) || (deadline > INT32_MAX)) {
1189             deadline = INT32_MAX;
1190         }
1191
1192         return qemu_icount_round(deadline);
1193     } else {
1194         return replay_get_instructions();
1195     }
1196 }
1197
1198 static void handle_icount_deadline(void)
1199 {
1200     assert(qemu_in_vcpu_thread());
1201     if (use_icount) {
1202         int64_t deadline =
1203             qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1204
1205         if (deadline == 0) {
1206             /* Wake up other AioContexts.  */
1207             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1208             qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1209         }
1210     }
1211 }
1212
1213 static void prepare_icount_for_run(CPUState *cpu)
1214 {
1215     if (use_icount) {
1216         int insns_left;
1217
1218         /* These should always be cleared by process_icount_data after
1219          * each vCPU execution. However u16.high can be raised
1220          * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1221          */
1222         g_assert(cpu->icount_decr.u16.low == 0);
1223         g_assert(cpu->icount_extra == 0);
1224
1225         cpu->icount_budget = tcg_get_icount_limit();
1226         insns_left = MIN(0xffff, cpu->icount_budget);
1227         cpu->icount_decr.u16.low = insns_left;
1228         cpu->icount_extra = cpu->icount_budget - insns_left;
1229     }
1230 }
1231
1232 static void process_icount_data(CPUState *cpu)
1233 {
1234     if (use_icount) {
1235         /* Account for executed instructions */
1236         cpu_update_icount(cpu);
1237
1238         /* Reset the counters */
1239         cpu->icount_decr.u16.low = 0;
1240         cpu->icount_extra = 0;
1241         cpu->icount_budget = 0;
1242
1243         replay_account_executed_instructions();
1244     }
1245 }
1246
1247
1248 static int tcg_cpu_exec(CPUState *cpu)
1249 {
1250     int ret;
1251 #ifdef CONFIG_PROFILER
1252     int64_t ti;
1253 #endif
1254
1255 #ifdef CONFIG_PROFILER
1256     ti = profile_getclock();
1257 #endif
1258     qemu_mutex_unlock_iothread();
1259     cpu_exec_start(cpu);
1260     ret = cpu_exec(cpu);
1261     cpu_exec_end(cpu);
1262     qemu_mutex_lock_iothread();
1263 #ifdef CONFIG_PROFILER
1264     tcg_time += profile_getclock() - ti;
1265 #endif
1266     return ret;
1267 }
1268
1269 /* Destroy any remaining vCPUs which have been unplugged and have
1270  * finished running
1271  */
1272 static void deal_with_unplugged_cpus(void)
1273 {
1274     CPUState *cpu;
1275
1276     CPU_FOREACH(cpu) {
1277         if (cpu->unplug && !cpu_can_run(cpu)) {
1278             qemu_tcg_destroy_vcpu(cpu);
1279             cpu->created = false;
1280             qemu_cond_signal(&qemu_cpu_cond);
1281             break;
1282         }
1283     }
1284 }
1285
1286 /* Single-threaded TCG
1287  *
1288  * In the single-threaded case each vCPU is simulated in turn. If
1289  * there is more than a single vCPU we create a simple timer to kick
1290  * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1291  * This is done explicitly rather than relying on side-effects
1292  * elsewhere.
1293  */
1294
1295 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1296 {
1297     CPUState *cpu = arg;
1298
1299     rcu_register_thread();
1300
1301     qemu_mutex_lock_iothread();
1302     qemu_thread_get_self(cpu->thread);
1303
1304     CPU_FOREACH(cpu) {
1305         cpu->thread_id = qemu_get_thread_id();
1306         cpu->created = true;
1307         cpu->can_do_io = 1;
1308     }
1309     qemu_cond_signal(&qemu_cpu_cond);
1310
1311     /* wait for initial kick-off after machine start */
1312     while (first_cpu->stopped) {
1313         qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1314
1315         /* process any pending work */
1316         CPU_FOREACH(cpu) {
1317             current_cpu = cpu;
1318             qemu_wait_io_event_common(cpu);
1319         }
1320     }
1321
1322     start_tcg_kick_timer();
1323
1324     cpu = first_cpu;
1325
1326     /* process any pending work */
1327     cpu->exit_request = 1;
1328
1329     while (1) {
1330         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1331         qemu_account_warp_timer();
1332
1333         /* Run the timers here.  This is much more efficient than
1334          * waking up the I/O thread and waiting for completion.
1335          */
1336         handle_icount_deadline();
1337
1338         if (!cpu) {
1339             cpu = first_cpu;
1340         }
1341
1342         while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1343
1344             atomic_mb_set(&tcg_current_rr_cpu, cpu);
1345             current_cpu = cpu;
1346
1347             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1348                               (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1349
1350             if (cpu_can_run(cpu)) {
1351                 int r;
1352
1353                 prepare_icount_for_run(cpu);
1354
1355                 r = tcg_cpu_exec(cpu);
1356
1357                 process_icount_data(cpu);
1358
1359                 if (r == EXCP_DEBUG) {
1360                     cpu_handle_guest_debug(cpu);
1361                     break;
1362                 } else if (r == EXCP_ATOMIC) {
1363                     qemu_mutex_unlock_iothread();
1364                     cpu_exec_step_atomic(cpu);
1365                     qemu_mutex_lock_iothread();
1366                     break;
1367                 }
1368             } else if (cpu->stop) {
1369                 if (cpu->unplug) {
1370                     cpu = CPU_NEXT(cpu);
1371                 }
1372                 break;
1373             }
1374
1375             cpu = CPU_NEXT(cpu);
1376         } /* while (cpu && !cpu->exit_request).. */
1377
1378         /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
1379         atomic_set(&tcg_current_rr_cpu, NULL);
1380
1381         if (cpu && cpu->exit_request) {
1382             atomic_mb_set(&cpu->exit_request, 0);
1383         }
1384
1385         qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1386         deal_with_unplugged_cpus();
1387     }
1388
1389     return NULL;
1390 }
1391
1392 static void *qemu_hax_cpu_thread_fn(void *arg)
1393 {
1394     CPUState *cpu = arg;
1395     int r;
1396
1397     qemu_mutex_lock_iothread();
1398     qemu_thread_get_self(cpu->thread);
1399
1400     cpu->thread_id = qemu_get_thread_id();
1401     cpu->created = true;
1402     cpu->halted = 0;
1403     current_cpu = cpu;
1404
1405     hax_init_vcpu(cpu);
1406     qemu_cond_signal(&qemu_cpu_cond);
1407
1408     while (1) {
1409         if (cpu_can_run(cpu)) {
1410             r = hax_smp_cpu_exec(cpu);
1411             if (r == EXCP_DEBUG) {
1412                 cpu_handle_guest_debug(cpu);
1413             }
1414         }
1415
1416         while (cpu_thread_is_idle(cpu)) {
1417             qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1418         }
1419 #ifdef _WIN32
1420         SleepEx(0, TRUE);
1421 #endif
1422         qemu_wait_io_event_common(cpu);
1423     }
1424     return NULL;
1425 }
1426
1427 #ifdef _WIN32
1428 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1429 {
1430 }
1431 #endif
1432
1433 /* Multi-threaded TCG
1434  *
1435  * In the multi-threaded case each vCPU has its own thread. The TLS
1436  * variable current_cpu can be used deep in the code to find the
1437  * current CPUState for a given thread.
1438  */
1439
1440 static void *qemu_tcg_cpu_thread_fn(void *arg)
1441 {
1442     CPUState *cpu = arg;
1443
1444     g_assert(!use_icount);
1445
1446     rcu_register_thread();
1447
1448     qemu_mutex_lock_iothread();
1449     qemu_thread_get_self(cpu->thread);
1450
1451     cpu->thread_id = qemu_get_thread_id();
1452     cpu->created = true;
1453     cpu->can_do_io = 1;
1454     current_cpu = cpu;
1455     qemu_cond_signal(&qemu_cpu_cond);
1456
1457     /* process any pending work */
1458     cpu->exit_request = 1;
1459
1460     while (1) {
1461         if (cpu_can_run(cpu)) {
1462             int r;
1463             r = tcg_cpu_exec(cpu);
1464             switch (r) {
1465             case EXCP_DEBUG:
1466                 cpu_handle_guest_debug(cpu);
1467                 break;
1468             case EXCP_HALTED:
1469                 /* during start-up the vCPU is reset and the thread is
1470                  * kicked several times. If we don't ensure we go back
1471                  * to sleep in the halted state we won't cleanly
1472                  * start-up when the vCPU is enabled.
1473                  *
1474                  * cpu->halted should ensure we sleep in wait_io_event
1475                  */
1476                 g_assert(cpu->halted);
1477                 break;
1478             case EXCP_ATOMIC:
1479                 qemu_mutex_unlock_iothread();
1480                 cpu_exec_step_atomic(cpu);
1481                 qemu_mutex_lock_iothread();
1482             default:
1483                 /* Ignore everything else? */
1484                 break;
1485             }
1486         }
1487
1488         atomic_mb_set(&cpu->exit_request, 0);
1489         qemu_tcg_wait_io_event(cpu);
1490     }
1491
1492     return NULL;
1493 }
1494
1495 static void qemu_cpu_kick_thread(CPUState *cpu)
1496 {
1497 #ifndef _WIN32
1498     int err;
1499
1500     if (cpu->thread_kicked) {
1501         return;
1502     }
1503     cpu->thread_kicked = true;
1504     err = pthread_kill(cpu->thread->thread, SIG_IPI);
1505     if (err) {
1506         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1507         exit(1);
1508     }
1509 #else /* _WIN32 */
1510     if (!qemu_cpu_is_self(cpu)) {
1511         if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1512             fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1513                     __func__, GetLastError());
1514             exit(1);
1515         }
1516     }
1517 #endif
1518 }
1519
1520 void qemu_cpu_kick(CPUState *cpu)
1521 {
1522     qemu_cond_broadcast(cpu->halt_cond);
1523     if (tcg_enabled()) {
1524         cpu_exit(cpu);
1525         /* NOP unless doing single-thread RR */
1526         qemu_cpu_kick_rr_cpu();
1527     } else {
1528         if (hax_enabled()) {
1529             /*
1530              * FIXME: race condition with the exit_request check in
1531              * hax_vcpu_hax_exec
1532              */
1533             cpu->exit_request = 1;
1534         }
1535         qemu_cpu_kick_thread(cpu);
1536     }
1537 }
1538
1539 void qemu_cpu_kick_self(void)
1540 {
1541     assert(current_cpu);
1542     qemu_cpu_kick_thread(current_cpu);
1543 }
1544
1545 bool qemu_cpu_is_self(CPUState *cpu)
1546 {
1547     return qemu_thread_is_self(cpu->thread);
1548 }
1549
1550 bool qemu_in_vcpu_thread(void)
1551 {
1552     return current_cpu && qemu_cpu_is_self(current_cpu);
1553 }
1554
1555 static __thread bool iothread_locked = false;
1556
1557 bool qemu_mutex_iothread_locked(void)
1558 {
1559     return iothread_locked;
1560 }
1561
1562 void qemu_mutex_lock_iothread(void)
1563 {
1564     g_assert(!qemu_mutex_iothread_locked());
1565     qemu_mutex_lock(&qemu_global_mutex);
1566     iothread_locked = true;
1567 }
1568
1569 void qemu_mutex_unlock_iothread(void)
1570 {
1571     g_assert(qemu_mutex_iothread_locked());
1572     iothread_locked = false;
1573     qemu_mutex_unlock(&qemu_global_mutex);
1574 }
1575
1576 static bool all_vcpus_paused(void)
1577 {
1578     CPUState *cpu;
1579
1580     CPU_FOREACH(cpu) {
1581         if (!cpu->stopped) {
1582             return false;
1583         }
1584     }
1585
1586     return true;
1587 }
1588
1589 void pause_all_vcpus(void)
1590 {
1591     CPUState *cpu;
1592
1593     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1594     CPU_FOREACH(cpu) {
1595         cpu->stop = true;
1596         qemu_cpu_kick(cpu);
1597     }
1598
1599     if (qemu_in_vcpu_thread()) {
1600         cpu_stop_current();
1601     }
1602
1603     while (!all_vcpus_paused()) {
1604         qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1605         CPU_FOREACH(cpu) {
1606             qemu_cpu_kick(cpu);
1607         }
1608     }
1609 }
1610
1611 void cpu_resume(CPUState *cpu)
1612 {
1613     cpu->stop = false;
1614     cpu->stopped = false;
1615     qemu_cpu_kick(cpu);
1616 }
1617
1618 void resume_all_vcpus(void)
1619 {
1620     CPUState *cpu;
1621
1622     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1623     CPU_FOREACH(cpu) {
1624         cpu_resume(cpu);
1625     }
1626 }
1627
1628 void cpu_remove(CPUState *cpu)
1629 {
1630     cpu->stop = true;
1631     cpu->unplug = true;
1632     qemu_cpu_kick(cpu);
1633 }
1634
1635 void cpu_remove_sync(CPUState *cpu)
1636 {
1637     cpu_remove(cpu);
1638     while (cpu->created) {
1639         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1640     }
1641 }
1642
1643 /* For temporary buffers for forming a name */
1644 #define VCPU_THREAD_NAME_SIZE 16
1645
1646 static void qemu_tcg_init_vcpu(CPUState *cpu)
1647 {
1648     char thread_name[VCPU_THREAD_NAME_SIZE];
1649     static QemuCond *single_tcg_halt_cond;
1650     static QemuThread *single_tcg_cpu_thread;
1651
1652     if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1653         cpu->thread = g_malloc0(sizeof(QemuThread));
1654         cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1655         qemu_cond_init(cpu->halt_cond);
1656
1657         if (qemu_tcg_mttcg_enabled()) {
1658             /* create a thread per vCPU with TCG (MTTCG) */
1659             parallel_cpus = true;
1660             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1661                  cpu->cpu_index);
1662
1663             qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1664                                cpu, QEMU_THREAD_JOINABLE);
1665
1666         } else {
1667             /* share a single thread for all cpus with TCG */
1668             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1669             qemu_thread_create(cpu->thread, thread_name,
1670                                qemu_tcg_rr_cpu_thread_fn,
1671                                cpu, QEMU_THREAD_JOINABLE);
1672
1673             single_tcg_halt_cond = cpu->halt_cond;
1674             single_tcg_cpu_thread = cpu->thread;
1675         }
1676 #ifdef _WIN32
1677         cpu->hThread = qemu_thread_get_handle(cpu->thread);
1678 #endif
1679         while (!cpu->created) {
1680             qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1681         }
1682     } else {
1683         /* For non-MTTCG cases we share the thread */
1684         cpu->thread = single_tcg_cpu_thread;
1685         cpu->halt_cond = single_tcg_halt_cond;
1686     }
1687 }
1688
1689 static void qemu_hax_start_vcpu(CPUState *cpu)
1690 {
1691     char thread_name[VCPU_THREAD_NAME_SIZE];
1692
1693     cpu->thread = g_malloc0(sizeof(QemuThread));
1694     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1695     qemu_cond_init(cpu->halt_cond);
1696
1697     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1698              cpu->cpu_index);
1699     qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1700                        cpu, QEMU_THREAD_JOINABLE);
1701 #ifdef _WIN32
1702     cpu->hThread = qemu_thread_get_handle(cpu->thread);
1703 #endif
1704     while (!cpu->created) {
1705         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1706     }
1707 }
1708
1709 static void qemu_kvm_start_vcpu(CPUState *cpu)
1710 {
1711     char thread_name[VCPU_THREAD_NAME_SIZE];
1712
1713     cpu->thread = g_malloc0(sizeof(QemuThread));
1714     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1715     qemu_cond_init(cpu->halt_cond);
1716     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1717              cpu->cpu_index);
1718     qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1719                        cpu, QEMU_THREAD_JOINABLE);
1720     while (!cpu->created) {
1721         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1722     }
1723 }
1724
1725 static void qemu_dummy_start_vcpu(CPUState *cpu)
1726 {
1727     char thread_name[VCPU_THREAD_NAME_SIZE];
1728
1729     cpu->thread = g_malloc0(sizeof(QemuThread));
1730     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1731     qemu_cond_init(cpu->halt_cond);
1732     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1733              cpu->cpu_index);
1734     qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1735                        QEMU_THREAD_JOINABLE);
1736     while (!cpu->created) {
1737         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1738     }
1739 }
1740
1741 void qemu_init_vcpu(CPUState *cpu)
1742 {
1743     cpu->nr_cores = smp_cores;
1744     cpu->nr_threads = smp_threads;
1745     cpu->stopped = true;
1746
1747     if (!cpu->as) {
1748         /* If the target cpu hasn't set up any address spaces itself,
1749          * give it the default one.
1750          */
1751         AddressSpace *as = address_space_init_shareable(cpu->memory,
1752                                                         "cpu-memory");
1753         cpu->num_ases = 1;
1754         cpu_address_space_init(cpu, as, 0);
1755     }
1756
1757     if (kvm_enabled()) {
1758         qemu_kvm_start_vcpu(cpu);
1759     } else if (hax_enabled()) {
1760         qemu_hax_start_vcpu(cpu);
1761     } else if (tcg_enabled()) {
1762         qemu_tcg_init_vcpu(cpu);
1763     } else {
1764         qemu_dummy_start_vcpu(cpu);
1765     }
1766 }
1767
1768 void cpu_stop_current(void)
1769 {
1770     if (current_cpu) {
1771         current_cpu->stop = false;
1772         current_cpu->stopped = true;
1773         cpu_exit(current_cpu);
1774         qemu_cond_broadcast(&qemu_pause_cond);
1775     }
1776 }
1777
1778 int vm_stop(RunState state)
1779 {
1780     if (qemu_in_vcpu_thread()) {
1781         qemu_system_vmstop_request_prepare();
1782         qemu_system_vmstop_request(state);
1783         /*
1784          * FIXME: should not return to device code in case
1785          * vm_stop() has been requested.
1786          */
1787         cpu_stop_current();
1788         return 0;
1789     }
1790
1791     return do_vm_stop(state);
1792 }
1793
1794 /**
1795  * Prepare for (re)starting the VM.
1796  * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1797  * running or in case of an error condition), 0 otherwise.
1798  */
1799 int vm_prepare_start(void)
1800 {
1801     RunState requested;
1802     int res = 0;
1803
1804     qemu_vmstop_requested(&requested);
1805     if (runstate_is_running() && requested == RUN_STATE__MAX) {
1806         return -1;
1807     }
1808
1809     /* Ensure that a STOP/RESUME pair of events is emitted if a
1810      * vmstop request was pending.  The BLOCK_IO_ERROR event, for
1811      * example, according to documentation is always followed by
1812      * the STOP event.
1813      */
1814     if (runstate_is_running()) {
1815         qapi_event_send_stop(&error_abort);
1816         res = -1;
1817     } else {
1818         replay_enable_events();
1819         cpu_enable_ticks();
1820         runstate_set(RUN_STATE_RUNNING);
1821         vm_state_notify(1, RUN_STATE_RUNNING);
1822     }
1823
1824     /* We are sending this now, but the CPUs will be resumed shortly later */
1825     qapi_event_send_resume(&error_abort);
1826     return res;
1827 }
1828
1829 void vm_start(void)
1830 {
1831     if (!vm_prepare_start()) {
1832         resume_all_vcpus();
1833     }
1834 }
1835
1836 /* does a state transition even if the VM is already stopped,
1837    current state is forgotten forever */
1838 int vm_stop_force_state(RunState state)
1839 {
1840     if (runstate_is_running()) {
1841         return vm_stop(state);
1842     } else {
1843         runstate_set(state);
1844
1845         bdrv_drain_all();
1846         /* Make sure to return an error if the flush in a previous vm_stop()
1847          * failed. */
1848         return bdrv_flush_all();
1849     }
1850 }
1851
1852 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1853 {
1854     /* XXX: implement xxx_cpu_list for targets that still miss it */
1855 #if defined(cpu_list)
1856     cpu_list(f, cpu_fprintf);
1857 #endif
1858 }
1859
1860 CpuInfoList *qmp_query_cpus(Error **errp)
1861 {
1862     CpuInfoList *head = NULL, *cur_item = NULL;
1863     CPUState *cpu;
1864
1865     CPU_FOREACH(cpu) {
1866         CpuInfoList *info;
1867 #if defined(TARGET_I386)
1868         X86CPU *x86_cpu = X86_CPU(cpu);
1869         CPUX86State *env = &x86_cpu->env;
1870 #elif defined(TARGET_PPC)
1871         PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1872         CPUPPCState *env = &ppc_cpu->env;
1873 #elif defined(TARGET_SPARC)
1874         SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1875         CPUSPARCState *env = &sparc_cpu->env;
1876 #elif defined(TARGET_MIPS)
1877         MIPSCPU *mips_cpu = MIPS_CPU(cpu);
1878         CPUMIPSState *env = &mips_cpu->env;
1879 #elif defined(TARGET_TRICORE)
1880         TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
1881         CPUTriCoreState *env = &tricore_cpu->env;
1882 #endif
1883
1884         cpu_synchronize_state(cpu);
1885
1886         info = g_malloc0(sizeof(*info));
1887         info->value = g_malloc0(sizeof(*info->value));
1888         info->value->CPU = cpu->cpu_index;
1889         info->value->current = (cpu == first_cpu);
1890         info->value->halted = cpu->halted;
1891         info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
1892         info->value->thread_id = cpu->thread_id;
1893 #if defined(TARGET_I386)
1894         info->value->arch = CPU_INFO_ARCH_X86;
1895         info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
1896 #elif defined(TARGET_PPC)
1897         info->value->arch = CPU_INFO_ARCH_PPC;
1898         info->value->u.ppc.nip = env->nip;
1899 #elif defined(TARGET_SPARC)
1900         info->value->arch = CPU_INFO_ARCH_SPARC;
1901         info->value->u.q_sparc.pc = env->pc;
1902         info->value->u.q_sparc.npc = env->npc;
1903 #elif defined(TARGET_MIPS)
1904         info->value->arch = CPU_INFO_ARCH_MIPS;
1905         info->value->u.q_mips.PC = env->active_tc.PC;
1906 #elif defined(TARGET_TRICORE)
1907         info->value->arch = CPU_INFO_ARCH_TRICORE;
1908         info->value->u.tricore.PC = env->PC;
1909 #else
1910         info->value->arch = CPU_INFO_ARCH_OTHER;
1911 #endif
1912
1913         /* XXX: waiting for the qapi to support GSList */
1914         if (!cur_item) {
1915             head = cur_item = info;
1916         } else {
1917             cur_item->next = info;
1918             cur_item = info;
1919         }
1920     }
1921
1922     return head;
1923 }
1924
1925 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
1926                  bool has_cpu, int64_t cpu_index, Error **errp)
1927 {
1928     FILE *f;
1929     uint32_t l;
1930     CPUState *cpu;
1931     uint8_t buf[1024];
1932     int64_t orig_addr = addr, orig_size = size;
1933
1934     if (!has_cpu) {
1935         cpu_index = 0;
1936     }
1937
1938     cpu = qemu_get_cpu(cpu_index);
1939     if (cpu == NULL) {
1940         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
1941                    "a CPU number");
1942         return;
1943     }
1944
1945     f = fopen(filename, "wb");
1946     if (!f) {
1947         error_setg_file_open(errp, errno, filename);
1948         return;
1949     }
1950
1951     while (size != 0) {
1952         l = sizeof(buf);
1953         if (l > size)
1954             l = size;
1955         if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
1956             error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
1957                              " specified", orig_addr, orig_size);
1958             goto exit;
1959         }
1960         if (fwrite(buf, 1, l, f) != l) {
1961             error_setg(errp, QERR_IO_ERROR);
1962             goto exit;
1963         }
1964         addr += l;
1965         size -= l;
1966     }
1967
1968 exit:
1969     fclose(f);
1970 }
1971
1972 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
1973                   Error **errp)
1974 {
1975     FILE *f;
1976     uint32_t l;
1977     uint8_t buf[1024];
1978
1979     f = fopen(filename, "wb");
1980     if (!f) {
1981         error_setg_file_open(errp, errno, filename);
1982         return;
1983     }
1984
1985     while (size != 0) {
1986         l = sizeof(buf);
1987         if (l > size)
1988             l = size;
1989         cpu_physical_memory_read(addr, buf, l);
1990         if (fwrite(buf, 1, l, f) != l) {
1991             error_setg(errp, QERR_IO_ERROR);
1992             goto exit;
1993         }
1994         addr += l;
1995         size -= l;
1996     }
1997
1998 exit:
1999     fclose(f);
2000 }
2001
2002 void qmp_inject_nmi(Error **errp)
2003 {
2004     nmi_monitor_handle(monitor_get_cpu_index(), errp);
2005 }
2006
2007 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2008 {
2009     if (!use_icount) {
2010         return;
2011     }
2012
2013     cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
2014                 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2015     if (icount_align_option) {
2016         cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
2017         cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
2018     } else {
2019         cpu_fprintf(f, "Max guest delay     NA\n");
2020         cpu_fprintf(f, "Max guest advance   NA\n");
2021     }
2022 }
This page took 0.135495 seconds and 4 git commands to generate.