cpus.c

   1 /*
   2  * QEMU System Emulator
   3  *
   4  * Copyright (c) 2003-2008 Fabrice Bellard
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7  * of this software and associated documentation files (the "Software"), to deal
   8  * in the Software without restriction, including without limitation the rights
   9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10  * copies of the Software, and to permit persons to whom the Software is
  11  * furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included in
  14  * all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22  * THE SOFTWARE.
  23  */
  24
  25 /* Needed early for CONFIG_BSD etc. */
  26 #include "qemu/osdep.h"
  27 #include "qemu-common.h"
  28 #include "qemu/config-file.h"
  29 #include "cpu.h"
  30 #include "monitor/monitor.h"
  31 #include "qapi/qmp/qerror.h"
  32 #include "qemu/error-report.h"
  33 #include "sysemu/sysemu.h"
  34 #include "sysemu/block-backend.h"
  35 #include "exec/gdbstub.h"
  36 #include "sysemu/dma.h"
  37 #include "sysemu/hw_accel.h"
  38 #include "sysemu/kvm.h"
  39 #include "sysemu/hax.h"
  40 #include "sysemu/hvf.h"
  41 #include "qmp-commands.h"
  42 #include "exec/exec-all.h"
  43
  44 #include "qemu/thread.h"
  45 #include "sysemu/cpus.h"
  46 #include "sysemu/qtest.h"
  47 #include "qemu/main-loop.h"
  48 #include "qemu/bitmap.h"
  49 #include "qemu/seqlock.h"
  50 #include "tcg.h"
  51 #include "qapi-event.h"
  52 #include "hw/nmi.h"
  53 #include "sysemu/replay.h"
  54 #include "hw/boards.h"
  55
  56 #ifdef CONFIG_LINUX
  57
  58 #include <sys/prctl.h>
  59
  60 #ifndef PR_MCE_KILL
  61 #define PR_MCE_KILL 33
  62 #endif
  63
  64 #ifndef PR_MCE_KILL_SET
  65 #define PR_MCE_KILL_SET 1
  66 #endif
  67
  68 #ifndef PR_MCE_KILL_EARLY
  69 #define PR_MCE_KILL_EARLY 1
  70 #endif
  71
  72 #endif /* CONFIG_LINUX */
  73
  74 int64_t max_delay;
  75 int64_t max_advance;
  76
  77 /* vcpu throttling controls */
  78 static QEMUTimer *throttle_timer;
  79 static unsigned int throttle_percentage;
  80
  81 #define CPU_THROTTLE_PCT_MIN 1
  82 #define CPU_THROTTLE_PCT_MAX 99
  83 #define CPU_THROTTLE_TIMESLICE_NS 10000000
  84
  85 bool cpu_is_stopped(CPUState *cpu)
  86 {
  87     return cpu->stopped || !runstate_is_running();
  88 }
  89
  90 static bool cpu_thread_is_idle(CPUState *cpu)
  91 {
  92     if (cpu->stop || cpu->queued_work_first) {
  93         return false;
  94     }
  95     if (cpu_is_stopped(cpu)) {
  96         return true;
  97     }
  98     if (!cpu->halted || cpu_has_work(cpu) ||
  99         kvm_halt_in_kernel()) {
 100         return false;
 101     }
 102     return true;
 103 }
 104
 105 static bool all_cpu_threads_idle(void)
 106 {
 107     CPUState *cpu;
 108
 109     CPU_FOREACH(cpu) {
 110         if (!cpu_thread_is_idle(cpu)) {
 111             return false;
 112         }
 113     }
 114     return true;
 115 }
 116
 117 /***********************************************************/
 118 /* guest cycle counter */
 119
 120 /* Protected by TimersState seqlock */
 121
 122 static bool icount_sleep = true;
 123 static int64_t vm_clock_warp_start = -1;
 124 /* Conversion factor from emulated instructions to virtual clock ticks.  */
 125 static int icount_time_shift;
 126 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 127 #define MAX_ICOUNT_SHIFT 10
 128
 129 static QEMUTimer *icount_rt_timer;
 130 static QEMUTimer *icount_vm_timer;
 131 static QEMUTimer *icount_warp_timer;
 132
 133 typedef struct TimersState {
 134     /* Protected by BQL.  */
 135     int64_t cpu_ticks_prev;
 136     int64_t cpu_ticks_offset;
 137
 138     /* cpu_clock_offset can be read out of BQL, so protect it with
 139      * this lock.
 140      */
 141     QemuSeqLock vm_clock_seqlock;
 142     int64_t cpu_clock_offset;
 143     int32_t cpu_ticks_enabled;
 144     int64_t dummy;
 145
 146     /* Compensate for varying guest execution speed.  */
 147     int64_t qemu_icount_bias;
 148     /* Only written by TCG thread */
 149     int64_t qemu_icount;
 150 } TimersState;
 151
 152 static TimersState timers_state;
 153 bool mttcg_enabled;
 154
 155 /*
 156  * We default to false if we know other options have been enabled
 157  * which are currently incompatible with MTTCG. Otherwise when each
 158  * guest (target) has been updated to support:
 159  *   - atomic instructions
 160  *   - memory ordering primitives (barriers)
 161  * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
 162  *
 163  * Once a guest architecture has been converted to the new primitives
 164  * there are two remaining limitations to check.
 165  *
 166  * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
 167  * - The host must have a stronger memory order than the guest
 168  *
 169  * It may be possible in future to support strong guests on weak hosts
 170  * but that will require tagging all load/stores in a guest with their
 171  * implicit memory order requirements which would likely slow things
 172  * down a lot.
 173  */
 174
 175 static bool check_tcg_memory_orders_compatible(void)
 176 {
 177 #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
 178     return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
 179 #else
 180     return false;
 181 #endif
 182 }
 183
 184 static bool default_mttcg_enabled(void)
 185 {
 186     if (use_icount || TCG_OVERSIZED_GUEST) {
 187         return false;
 188     } else {
 189 #ifdef TARGET_SUPPORTS_MTTCG
 190         return check_tcg_memory_orders_compatible();
 191 #else
 192         return false;
 193 #endif
 194     }
 195 }
 196
 197 void qemu_tcg_configure(QemuOpts *opts, Error **errp)
 198 {
 199     const char *t = qemu_opt_get(opts, "thread");
 200     if (t) {
 201         if (strcmp(t, "multi") == 0) {
 202             if (TCG_OVERSIZED_GUEST) {
 203                 error_setg(errp, "No MTTCG when guest word size > hosts");
 204             } else if (use_icount) {
 205                 error_setg(errp, "No MTTCG when icount is enabled");
 206             } else {
 207 #ifndef TARGET_SUPPORTS_MTTCG
 208                 error_report("Guest not yet converted to MTTCG - "
 209                              "you may get unexpected results");
 210 #endif
 211                 if (!check_tcg_memory_orders_compatible()) {
 212                     error_report("Guest expects a stronger memory ordering "
 213                                  "than the host provides");
 214                     error_printf("This may cause strange/hard to debug errors\n");
 215                 }
 216                 mttcg_enabled = true;
 217             }
 218         } else if (strcmp(t, "single") == 0) {
 219             mttcg_enabled = false;
 220         } else {
 221             error_setg(errp, "Invalid 'thread' setting %s", t);
 222         }
 223     } else {
 224         mttcg_enabled = default_mttcg_enabled();
 225     }
 226 }
 227
 228 /* The current number of executed instructions is based on what we
 229  * originally budgeted minus the current state of the decrementing
 230  * icount counters in extra/u16.low.
 231  */
 232 static int64_t cpu_get_icount_executed(CPUState *cpu)
 233 {
 234     return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
 235 }
 236
 237 /*
 238  * Update the global shared timer_state.qemu_icount to take into
 239  * account executed instructions. This is done by the TCG vCPU
 240  * thread so the main-loop can see time has moved forward.
 241  */
 242 void cpu_update_icount(CPUState *cpu)
 243 {
 244     int64_t executed = cpu_get_icount_executed(cpu);
 245     cpu->icount_budget -= executed;
 246
 247 #ifdef CONFIG_ATOMIC64
 248     atomic_set__nocheck(&timers_state.qemu_icount,
 249                         atomic_read__nocheck(&timers_state.qemu_icount) +
 250                         executed);
 251 #else /* FIXME: we need 64bit atomics to do this safely */
 252     timers_state.qemu_icount += executed;
 253 #endif
 254 }
 255
 256 int64_t cpu_get_icount_raw(void)
 257 {
 258     CPUState *cpu = current_cpu;
 259
 260     if (cpu && cpu->running) {
 261         if (!cpu->can_do_io) {
 262             fprintf(stderr, "Bad icount read\n");
 263             exit(1);
 264         }
 265         /* Take into account what has run */
 266         cpu_update_icount(cpu);
 267     }
 268 #ifdef CONFIG_ATOMIC64
 269     return atomic_read__nocheck(&timers_state.qemu_icount);
 270 #else /* FIXME: we need 64bit atomics to do this safely */
 271     return timers_state.qemu_icount;
 272 #endif
 273 }
 274
 275 /* Return the virtual CPU time, based on the instruction counter.  */
 276 static int64_t cpu_get_icount_locked(void)
 277 {
 278     int64_t icount = cpu_get_icount_raw();
 279     return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
 280 }
 281
 282 int64_t cpu_get_icount(void)
 283 {
 284     int64_t icount;
 285     unsigned start;
 286
 287     do {
 288         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 289         icount = cpu_get_icount_locked();
 290     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 291
 292     return icount;
 293 }
 294
 295 int64_t cpu_icount_to_ns(int64_t icount)
 296 {
 297     return icount << icount_time_shift;
 298 }
 299
 300 /* return the time elapsed in VM between vm_start and vm_stop.  Unless
 301  * icount is active, cpu_get_ticks() uses units of the host CPU cycle
 302  * counter.
 303  *
 304  * Caller must hold the BQL
 305  */
 306 int64_t cpu_get_ticks(void)
 307 {
 308     int64_t ticks;
 309
 310     if (use_icount) {
 311         return cpu_get_icount();
 312     }
 313
 314     ticks = timers_state.cpu_ticks_offset;
 315     if (timers_state.cpu_ticks_enabled) {
 316         ticks += cpu_get_host_ticks();
 317     }
 318
 319     if (timers_state.cpu_ticks_prev > ticks) {
 320         /* Note: non increasing ticks may happen if the host uses
 321            software suspend */
 322         timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
 323         ticks = timers_state.cpu_ticks_prev;
 324     }
 325
 326     timers_state.cpu_ticks_prev = ticks;
 327     return ticks;
 328 }
 329
 330 static int64_t cpu_get_clock_locked(void)
 331 {
 332     int64_t time;
 333
 334     time = timers_state.cpu_clock_offset;
 335     if (timers_state.cpu_ticks_enabled) {
 336         time += get_clock();
 337     }
 338
 339     return time;
 340 }
 341
 342 /* Return the monotonic time elapsed in VM, i.e.,
 343  * the time between vm_start and vm_stop
 344  */
 345 int64_t cpu_get_clock(void)
 346 {
 347     int64_t ti;
 348     unsigned start;
 349
 350     do {
 351         start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 352         ti = cpu_get_clock_locked();
 353     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
 354
 355     return ti;
 356 }
 357
 358 /* enable cpu_get_ticks()
 359  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 360  */
 361 void cpu_enable_ticks(void)
 362 {
 363     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 364     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 365     if (!timers_state.cpu_ticks_enabled) {
 366         timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
 367         timers_state.cpu_clock_offset -= get_clock();
 368         timers_state.cpu_ticks_enabled = 1;
 369     }
 370     seqlock_write_end(&timers_state.vm_clock_seqlock);
 371 }
 372
 373 /* disable cpu_get_ticks() : the clock is stopped. You must not call
 374  * cpu_get_ticks() after that.
 375  * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
 376  */
 377 void cpu_disable_ticks(void)
 378 {
 379     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
 380     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 381     if (timers_state.cpu_ticks_enabled) {
 382         timers_state.cpu_ticks_offset += cpu_get_host_ticks();
 383         timers_state.cpu_clock_offset = cpu_get_clock_locked();
 384         timers_state.cpu_ticks_enabled = 0;
 385     }
 386     seqlock_write_end(&timers_state.vm_clock_seqlock);
 387 }
 388
 389 /* Correlation between real and virtual time is always going to be
 390    fairly approximate, so ignore small variation.
 391    When the guest is idle real and virtual time will be aligned in
 392    the IO wait loop.  */
 393 #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
 394
 395 static void icount_adjust(void)
 396 {
 397     int64_t cur_time;
 398     int64_t cur_icount;
 399     int64_t delta;
 400
 401     /* Protected by TimersState mutex.  */
 402     static int64_t last_delta;
 403
 404     /* If the VM is not running, then do nothing.  */
 405     if (!runstate_is_running()) {
 406         return;
 407     }
 408
 409     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 410     cur_time = cpu_get_clock_locked();
 411     cur_icount = cpu_get_icount_locked();
 412
 413     delta = cur_icount - cur_time;
 414     /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
 415     if (delta > 0
 416         && last_delta + ICOUNT_WOBBLE < delta * 2
 417         && icount_time_shift > 0) {
 418         /* The guest is getting too far ahead.  Slow time down.  */
 419         icount_time_shift--;
 420     }
 421     if (delta < 0
 422         && last_delta - ICOUNT_WOBBLE > delta * 2
 423         && icount_time_shift < MAX_ICOUNT_SHIFT) {
 424         /* The guest is getting too far behind.  Speed time up.  */
 425         icount_time_shift++;
 426     }
 427     last_delta = delta;
 428     timers_state.qemu_icount_bias = cur_icount
 429                               - (timers_state.qemu_icount << icount_time_shift);
 430     seqlock_write_end(&timers_state.vm_clock_seqlock);
 431 }
 432
 433 static void icount_adjust_rt(void *opaque)
 434 {
 435     timer_mod(icount_rt_timer,
 436               qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 437     icount_adjust();
 438 }
 439
 440 static void icount_adjust_vm(void *opaque)
 441 {
 442     timer_mod(icount_vm_timer,
 443                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 444                    NANOSECONDS_PER_SECOND / 10);
 445     icount_adjust();
 446 }
 447
 448 static int64_t qemu_icount_round(int64_t count)
 449 {
 450     return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
 451 }
 452
 453 static void icount_warp_rt(void)
 454 {
 455     unsigned seq;
 456     int64_t warp_start;
 457
 458     /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
 459      * changes from -1 to another value, so the race here is okay.
 460      */
 461     do {
 462         seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
 463         warp_start = vm_clock_warp_start;
 464     } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
 465
 466     if (warp_start == -1) {
 467         return;
 468     }
 469
 470     seqlock_write_begin(&timers_state.vm_clock_seqlock);
 471     if (runstate_is_running()) {
 472         int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
 473                                      cpu_get_clock_locked());
 474         int64_t warp_delta;
 475
 476         warp_delta = clock - vm_clock_warp_start;
 477         if (use_icount == 2) {
 478             /*
 479              * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
 480              * far ahead of real time.
 481              */
 482             int64_t cur_icount = cpu_get_icount_locked();
 483             int64_t delta = clock - cur_icount;
 484             warp_delta = MIN(warp_delta, delta);
 485         }
 486         timers_state.qemu_icount_bias += warp_delta;
 487     }
 488     vm_clock_warp_start = -1;
 489     seqlock_write_end(&timers_state.vm_clock_seqlock);
 490
 491     if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 492         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 493     }
 494 }
 495
 496 static void icount_timer_cb(void *opaque)
 497 {
 498     /* No need for a checkpoint because the timer already synchronizes
 499      * with CHECKPOINT_CLOCK_VIRTUAL_RT.
 500      */
 501     icount_warp_rt();
 502 }
 503
 504 void qtest_clock_warp(int64_t dest)
 505 {
 506     int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 507     AioContext *aio_context;
 508     assert(qtest_enabled());
 509     aio_context = qemu_get_aio_context();
 510     while (clock < dest) {
 511         int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 512         int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
 513
 514         seqlock_write_begin(&timers_state.vm_clock_seqlock);
 515         timers_state.qemu_icount_bias += warp;
 516         seqlock_write_end(&timers_state.vm_clock_seqlock);
 517
 518         qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 519         timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
 520         clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 521     }
 522     qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 523 }
 524
 525 void qemu_start_warp_timer(void)
 526 {
 527     int64_t clock;
 528     int64_t deadline;
 529
 530     if (!use_icount) {
 531         return;
 532     }
 533
 534     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 535      * do not fire, so computing the deadline does not make sense.
 536      */
 537     if (!runstate_is_running()) {
 538         return;
 539     }
 540
 541     /* warp clock deterministically in record/replay mode */
 542     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
 543         return;
 544     }
 545
 546     if (!all_cpu_threads_idle()) {
 547         return;
 548     }
 549
 550     if (qtest_enabled()) {
 551         /* When testing, qtest commands advance icount.  */
 552         return;
 553     }
 554
 555     /* We want to use the earliest deadline from ALL vm_clocks */
 556     clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
 557     deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 558     if (deadline < 0) {
 559         static bool notified;
 560         if (!icount_sleep && !notified) {
 561             warn_report("icount sleep disabled and no active timers");
 562             notified = true;
 563         }
 564         return;
 565     }
 566
 567     if (deadline > 0) {
 568         /*
 569          * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
 570          * sleep.  Otherwise, the CPU might be waiting for a future timer
 571          * interrupt to wake it up, but the interrupt never comes because
 572          * the vCPU isn't running any insns and thus doesn't advance the
 573          * QEMU_CLOCK_VIRTUAL.
 574          */
 575         if (!icount_sleep) {
 576             /*
 577              * We never let VCPUs sleep in no sleep icount mode.
 578              * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
 579              * to the next QEMU_CLOCK_VIRTUAL event and notify it.
 580              * It is useful when we want a deterministic execution time,
 581              * isolated from host latencies.
 582              */
 583             seqlock_write_begin(&timers_state.vm_clock_seqlock);
 584             timers_state.qemu_icount_bias += deadline;
 585             seqlock_write_end(&timers_state.vm_clock_seqlock);
 586             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 587         } else {
 588             /*
 589              * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
 590              * "real" time, (related to the time left until the next event) has
 591              * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
 592              * This avoids that the warps are visible externally; for example,
 593              * you will not be sending network packets continuously instead of
 594              * every 100ms.
 595              */
 596             seqlock_write_begin(&timers_state.vm_clock_seqlock);
 597             if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
 598                 vm_clock_warp_start = clock;
 599             }
 600             seqlock_write_end(&timers_state.vm_clock_seqlock);
 601             timer_mod_anticipate(icount_warp_timer, clock + deadline);
 602         }
 603     } else if (deadline == 0) {
 604         qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 605     }
 606 }
 607
 608 static void qemu_account_warp_timer(void)
 609 {
 610     if (!use_icount || !icount_sleep) {
 611         return;
 612     }
 613
 614     /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
 615      * do not fire, so computing the deadline does not make sense.
 616      */
 617     if (!runstate_is_running()) {
 618         return;
 619     }
 620
 621     /* warp clock deterministically in record/replay mode */
 622     if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
 623         return;
 624     }
 625
 626     timer_del(icount_warp_timer);
 627     icount_warp_rt();
 628 }
 629
 630 static bool icount_state_needed(void *opaque)
 631 {
 632     return use_icount;
 633 }
 634
 635 /*
 636  * This is a subsection for icount migration.
 637  */
 638 static const VMStateDescription icount_vmstate_timers = {
 639     .name = "timer/icount",
 640     .version_id = 1,
 641     .minimum_version_id = 1,
 642     .needed = icount_state_needed,
 643     .fields = (VMStateField[]) {
 644         VMSTATE_INT64(qemu_icount_bias, TimersState),
 645         VMSTATE_INT64(qemu_icount, TimersState),
 646         VMSTATE_END_OF_LIST()
 647     }
 648 };
 649
 650 static const VMStateDescription vmstate_timers = {
 651     .name = "timer",
 652     .version_id = 2,
 653     .minimum_version_id = 1,
 654     .fields = (VMStateField[]) {
 655         VMSTATE_INT64(cpu_ticks_offset, TimersState),
 656         VMSTATE_INT64(dummy, TimersState),
 657         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
 658         VMSTATE_END_OF_LIST()
 659     },
 660     .subsections = (const VMStateDescription*[]) {
 661         &icount_vmstate_timers,
 662         NULL
 663     }
 664 };
 665
 666 static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 667 {
 668     double pct;
 669     double throttle_ratio;
 670     long sleeptime_ns;
 671
 672     if (!cpu_throttle_get_percentage()) {
 673         return;
 674     }
 675
 676     pct = (double)cpu_throttle_get_percentage()/100;
 677     throttle_ratio = pct / (1 - pct);
 678     sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
 679
 680     qemu_mutex_unlock_iothread();
 681     g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
 682     qemu_mutex_lock_iothread();
 683     atomic_set(&cpu->throttle_thread_scheduled, 0);
 684 }
 685
 686 static void cpu_throttle_timer_tick(void *opaque)
 687 {
 688     CPUState *cpu;
 689     double pct;
 690
 691     /* Stop the timer if needed */
 692     if (!cpu_throttle_get_percentage()) {
 693         return;
 694     }
 695     CPU_FOREACH(cpu) {
 696         if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
 697             async_run_on_cpu(cpu, cpu_throttle_thread,
 698                              RUN_ON_CPU_NULL);
 699         }
 700     }
 701
 702     pct = (double)cpu_throttle_get_percentage()/100;
 703     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 704                                    CPU_THROTTLE_TIMESLICE_NS / (1-pct));
 705 }
 706
 707 void cpu_throttle_set(int new_throttle_pct)
 708 {
 709     /* Ensure throttle percentage is within valid range */
 710     new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
 711     new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
 712
 713     atomic_set(&throttle_percentage, new_throttle_pct);
 714
 715     timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
 716                                        CPU_THROTTLE_TIMESLICE_NS);
 717 }
 718
 719 void cpu_throttle_stop(void)
 720 {
 721     atomic_set(&throttle_percentage, 0);
 722 }
 723
 724 bool cpu_throttle_active(void)
 725 {
 726     return (cpu_throttle_get_percentage() != 0);
 727 }
 728
 729 int cpu_throttle_get_percentage(void)
 730 {
 731     return atomic_read(&throttle_percentage);
 732 }
 733
 734 void cpu_ticks_init(void)
 735 {
 736     seqlock_init(&timers_state.vm_clock_seqlock);
 737     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
 738     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 739                                            cpu_throttle_timer_tick, NULL);
 740 }
 741
 742 void configure_icount(QemuOpts *opts, Error **errp)
 743 {
 744     const char *option;
 745     char *rem_str = NULL;
 746
 747     option = qemu_opt_get(opts, "shift");
 748     if (!option) {
 749         if (qemu_opt_get(opts, "align") != NULL) {
 750             error_setg(errp, "Please specify shift option when using align");
 751         }
 752         return;
 753     }
 754
 755     icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
 756     if (icount_sleep) {
 757         icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
 758                                          icount_timer_cb, NULL);
 759     }
 760
 761     icount_align_option = qemu_opt_get_bool(opts, "align", false);
 762
 763     if (icount_align_option && !icount_sleep) {
 764         error_setg(errp, "align=on and sleep=off are incompatible");
 765     }
 766     if (strcmp(option, "auto") != 0) {
 767         errno = 0;
 768         icount_time_shift = strtol(option, &rem_str, 0);
 769         if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
 770             error_setg(errp, "icount: Invalid shift value");
 771         }
 772         use_icount = 1;
 773         return;
 774     } else if (icount_align_option) {
 775         error_setg(errp, "shift=auto and align=on are incompatible");
 776     } else if (!icount_sleep) {
 777         error_setg(errp, "shift=auto and sleep=off are incompatible");
 778     }
 779
 780     use_icount = 2;
 781
 782     /* 125MIPS seems a reasonable initial guess at the guest speed.
 783        It will be corrected fairly quickly anyway.  */
 784     icount_time_shift = 3;
 785
 786     /* Have both realtime and virtual time triggers for speed adjustment.
 787        The realtime trigger catches emulated time passing too slowly,
 788        the virtual time trigger catches emulated time passing too fast.
 789        Realtime triggers occur even when idle, so use them less frequently
 790        than VM triggers.  */
 791     icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
 792                                    icount_adjust_rt, NULL);
 793     timer_mod(icount_rt_timer,
 794                    qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
 795     icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 796                                         icount_adjust_vm, NULL);
 797     timer_mod(icount_vm_timer,
 798                    qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
 799                    NANOSECONDS_PER_SECOND / 10);
 800 }
 801
 802 /***********************************************************/
 803 /* TCG vCPU kick timer
 804  *
 805  * The kick timer is responsible for moving single threaded vCPU
 806  * emulation on to the next vCPU. If more than one vCPU is running a
 807  * timer event with force a cpu->exit so the next vCPU can get
 808  * scheduled.
 809  *
 810  * The timer is removed if all vCPUs are idle and restarted again once
 811  * idleness is complete.
 812  */
 813
 814 static QEMUTimer *tcg_kick_vcpu_timer;
 815 static CPUState *tcg_current_rr_cpu;
 816
 817 #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
 818
 819 static inline int64_t qemu_tcg_next_kick(void)
 820 {
 821     return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
 822 }
 823
 824 /* Kick the currently round-robin scheduled vCPU */
 825 static void qemu_cpu_kick_rr_cpu(void)
 826 {
 827     CPUState *cpu;
 828     do {
 829         cpu = atomic_mb_read(&tcg_current_rr_cpu);
 830         if (cpu) {
 831             cpu_exit(cpu);
 832         }
 833     } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
 834 }
 835
 836 static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
 837 {
 838 }
 839
 840 void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
 841 {
 842     if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
 843         qemu_notify_event();
 844         return;
 845     }
 846
 847     if (!qemu_in_vcpu_thread() && first_cpu) {
 848         /* qemu_cpu_kick is not enough to kick a halted CPU out of
 849          * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
 850          * causes cpu_thread_is_idle to return false.  This way,
 851          * handle_icount_deadline can run.
 852          */
 853         async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
 854     }
 855 }
 856
 857 static void kick_tcg_thread(void *opaque)
 858 {
 859     timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 860     qemu_cpu_kick_rr_cpu();
 861 }
 862
 863 static void start_tcg_kick_timer(void)
 864 {
 865     if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
 866         tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 867                                            kick_tcg_thread, NULL);
 868         timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
 869     }
 870 }
 871
 872 static void stop_tcg_kick_timer(void)
 873 {
 874     if (tcg_kick_vcpu_timer) {
 875         timer_del(tcg_kick_vcpu_timer);
 876         tcg_kick_vcpu_timer = NULL;
 877     }
 878 }
 879
 880 /***********************************************************/
 881 void hw_error(const char *fmt, ...)
 882 {
 883     va_list ap;
 884     CPUState *cpu;
 885
 886     va_start(ap, fmt);
 887     fprintf(stderr, "qemu: hardware error: ");
 888     vfprintf(stderr, fmt, ap);
 889     fprintf(stderr, "\n");
 890     CPU_FOREACH(cpu) {
 891         fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
 892         cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
 893     }
 894     va_end(ap);
 895     abort();
 896 }
 897
 898 void cpu_synchronize_all_states(void)
 899 {
 900     CPUState *cpu;
 901
 902     CPU_FOREACH(cpu) {
 903         cpu_synchronize_state(cpu);
 904         /* TODO: move to cpu_synchronize_state() */
 905         if (hvf_enabled()) {
 906             hvf_cpu_synchronize_state(cpu);
 907         }
 908     }
 909 }
 910
 911 void cpu_synchronize_all_post_reset(void)
 912 {
 913     CPUState *cpu;
 914
 915     CPU_FOREACH(cpu) {
 916         cpu_synchronize_post_reset(cpu);
 917         /* TODO: move to cpu_synchronize_post_reset() */
 918         if (hvf_enabled()) {
 919             hvf_cpu_synchronize_post_reset(cpu);
 920         }
 921     }
 922 }
 923
 924 void cpu_synchronize_all_post_init(void)
 925 {
 926     CPUState *cpu;
 927
 928     CPU_FOREACH(cpu) {
 929         cpu_synchronize_post_init(cpu);
 930         /* TODO: move to cpu_synchronize_post_init() */
 931         if (hvf_enabled()) {
 932             hvf_cpu_synchronize_post_init(cpu);
 933         }
 934     }
 935 }
 936
 937 void cpu_synchronize_all_pre_loadvm(void)
 938 {
 939     CPUState *cpu;
 940
 941     CPU_FOREACH(cpu) {
 942         cpu_synchronize_pre_loadvm(cpu);
 943     }
 944 }
 945
 946 static int do_vm_stop(RunState state)
 947 {
 948     int ret = 0;
 949
 950     if (runstate_is_running()) {
 951         cpu_disable_ticks();
 952         pause_all_vcpus();
 953         runstate_set(state);
 954         vm_state_notify(0, state);
 955         qapi_event_send_stop(&error_abort);
 956     }
 957
 958     bdrv_drain_all();
 959     replay_disable_events();
 960     ret = bdrv_flush_all();
 961
 962     return ret;
 963 }
 964
 965 static bool cpu_can_run(CPUState *cpu)
 966 {
 967     if (cpu->stop) {
 968         return false;
 969     }
 970     if (cpu_is_stopped(cpu)) {
 971         return false;
 972     }
 973     return true;
 974 }
 975
 976 static void cpu_handle_guest_debug(CPUState *cpu)
 977 {
 978     gdb_set_stop_cpu(cpu);
 979     qemu_system_debug_request();
 980     cpu->stopped = true;
 981 }
 982
 983 #ifdef CONFIG_LINUX
 984 static void sigbus_reraise(void)
 985 {
 986     sigset_t set;
 987     struct sigaction action;
 988
 989     memset(&action, 0, sizeof(action));
 990     action.sa_handler = SIG_DFL;
 991     if (!sigaction(SIGBUS, &action, NULL)) {
 992         raise(SIGBUS);
 993         sigemptyset(&set);
 994         sigaddset(&set, SIGBUS);
 995         pthread_sigmask(SIG_UNBLOCK, &set, NULL);
 996     }
 997     perror("Failed to re-raise SIGBUS!\n");
 998     abort();
 999 }
1000
1001 static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
1002 {
1003     if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
1004         sigbus_reraise();
1005     }
1006
1007     if (current_cpu) {
1008         /* Called asynchronously in VCPU thread.  */
1009         if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
1010             sigbus_reraise();
1011         }
1012     } else {
1013         /* Called synchronously (via signalfd) in main thread.  */
1014         if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
1015             sigbus_reraise();
1016         }
1017     }
1018 }
1019
1020 static void qemu_init_sigbus(void)
1021 {
1022     struct sigaction action;
1023
1024     memset(&action, 0, sizeof(action));
1025     action.sa_flags = SA_SIGINFO;
1026     action.sa_sigaction = sigbus_handler;
1027     sigaction(SIGBUS, &action, NULL);
1028
1029     prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
1030 }
1031 #else /* !CONFIG_LINUX */
1032 static void qemu_init_sigbus(void)
1033 {
1034 }
1035 #endif /* !CONFIG_LINUX */
1036
1037 static QemuMutex qemu_global_mutex;
1038
1039 static QemuThread io_thread;
1040
1041 /* cpu creation */
1042 static QemuCond qemu_cpu_cond;
1043 /* system init */
1044 static QemuCond qemu_pause_cond;
1045
1046 void qemu_init_cpu_loop(void)
1047 {
1048     qemu_init_sigbus();
1049     qemu_cond_init(&qemu_cpu_cond);
1050     qemu_cond_init(&qemu_pause_cond);
1051     qemu_mutex_init(&qemu_global_mutex);
1052
1053     qemu_thread_get_self(&io_thread);
1054 }
1055
1056 void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1057 {
1058     do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1059 }
1060
1061 static void qemu_kvm_destroy_vcpu(CPUState *cpu)
1062 {
1063     if (kvm_destroy_vcpu(cpu) < 0) {
1064         error_report("kvm_destroy_vcpu failed");
1065         exit(EXIT_FAILURE);
1066     }
1067 }
1068
1069 static void qemu_tcg_destroy_vcpu(CPUState *cpu)
1070 {
1071 }
1072
1073 static void qemu_cpu_stop(CPUState *cpu, bool exit)
1074 {
1075     g_assert(qemu_cpu_is_self(cpu));
1076     cpu->stop = false;
1077     cpu->stopped = true;
1078     if (exit) {
1079         cpu_exit(cpu);
1080     }
1081     qemu_cond_broadcast(&qemu_pause_cond);
1082 }
1083
1084 static void qemu_wait_io_event_common(CPUState *cpu)
1085 {
1086     atomic_mb_set(&cpu->thread_kicked, false);
1087     if (cpu->stop) {
1088         qemu_cpu_stop(cpu, false);
1089     }
1090     process_queued_cpu_work(cpu);
1091 }
1092
1093 static bool qemu_tcg_should_sleep(CPUState *cpu)
1094 {
1095     if (mttcg_enabled) {
1096         return cpu_thread_is_idle(cpu);
1097     } else {
1098         return all_cpu_threads_idle();
1099     }
1100 }
1101
1102 static void qemu_tcg_wait_io_event(CPUState *cpu)
1103 {
1104     while (qemu_tcg_should_sleep(cpu)) {
1105         stop_tcg_kick_timer();
1106         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1107     }
1108
1109     start_tcg_kick_timer();
1110
1111     qemu_wait_io_event_common(cpu);
1112 }
1113
1114 static void qemu_kvm_wait_io_event(CPUState *cpu)
1115 {
1116     while (cpu_thread_is_idle(cpu)) {
1117         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1118     }
1119
1120     qemu_wait_io_event_common(cpu);
1121 }
1122
1123 static void qemu_hvf_wait_io_event(CPUState *cpu)
1124 {
1125     while (cpu_thread_is_idle(cpu)) {
1126         qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1127     }
1128     qemu_wait_io_event_common(cpu);
1129 }
1130
1131 static void *qemu_kvm_cpu_thread_fn(void *arg)
1132 {
1133     CPUState *cpu = arg;
1134     int r;
1135
1136     rcu_register_thread();
1137
1138     qemu_mutex_lock_iothread();
1139     qemu_thread_get_self(cpu->thread);
1140     cpu->thread_id = qemu_get_thread_id();
1141     cpu->can_do_io = 1;
1142     current_cpu = cpu;
1143
1144     r = kvm_init_vcpu(cpu);
1145     if (r < 0) {
1146         fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
1147         exit(1);
1148     }
1149
1150     kvm_init_cpu_signals(cpu);
1151
1152     /* signal CPU creation */
1153     cpu->created = true;
1154     qemu_cond_signal(&qemu_cpu_cond);
1155
1156     do {
1157         if (cpu_can_run(cpu)) {
1158             r = kvm_cpu_exec(cpu);
1159             if (r == EXCP_DEBUG) {
1160                 cpu_handle_guest_debug(cpu);
1161             }
1162         }
1163         qemu_kvm_wait_io_event(cpu);
1164     } while (!cpu->unplug || cpu_can_run(cpu));
1165
1166     qemu_kvm_destroy_vcpu(cpu);
1167     cpu->created = false;
1168     qemu_cond_signal(&qemu_cpu_cond);
1169     qemu_mutex_unlock_iothread();
1170     return NULL;
1171 }
1172
1173 static void *qemu_dummy_cpu_thread_fn(void *arg)
1174 {
1175 #ifdef _WIN32
1176     fprintf(stderr, "qtest is not supported under Windows\n");
1177     exit(1);
1178 #else
1179     CPUState *cpu = arg;
1180     sigset_t waitset;
1181     int r;
1182
1183     rcu_register_thread();
1184
1185     qemu_mutex_lock_iothread();
1186     qemu_thread_get_self(cpu->thread);
1187     cpu->thread_id = qemu_get_thread_id();
1188     cpu->can_do_io = 1;
1189     current_cpu = cpu;
1190
1191     sigemptyset(&waitset);
1192     sigaddset(&waitset, SIG_IPI);
1193
1194     /* signal CPU creation */
1195     cpu->created = true;
1196     qemu_cond_signal(&qemu_cpu_cond);
1197
1198     while (1) {
1199         qemu_mutex_unlock_iothread();
1200         do {
1201             int sig;
1202             r = sigwait(&waitset, &sig);
1203         } while (r == -1 && (errno == EAGAIN || errno == EINTR));
1204         if (r == -1) {
1205             perror("sigwait");
1206             exit(1);
1207         }
1208         qemu_mutex_lock_iothread();
1209         qemu_wait_io_event_common(cpu);
1210     }
1211
1212     return NULL;
1213 #endif
1214 }
1215
1216 static int64_t tcg_get_icount_limit(void)
1217 {
1218     int64_t deadline;
1219
1220     if (replay_mode != REPLAY_MODE_PLAY) {
1221         deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1222
1223         /* Maintain prior (possibly buggy) behaviour where if no deadline
1224          * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
1225          * INT32_MAX nanoseconds ahead, we still use INT32_MAX
1226          * nanoseconds.
1227          */
1228         if ((deadline < 0) || (deadline > INT32_MAX)) {
1229             deadline = INT32_MAX;
1230         }
1231
1232         return qemu_icount_round(deadline);
1233     } else {
1234         return replay_get_instructions();
1235     }
1236 }
1237
1238 static void handle_icount_deadline(void)
1239 {
1240     assert(qemu_in_vcpu_thread());
1241     if (use_icount) {
1242         int64_t deadline =
1243             qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
1244
1245         if (deadline == 0) {
1246             /* Wake up other AioContexts.  */
1247             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1248             qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1249         }
1250     }
1251 }
1252
1253 static void prepare_icount_for_run(CPUState *cpu)
1254 {
1255     if (use_icount) {
1256         int insns_left;
1257
1258         /* These should always be cleared by process_icount_data after
1259          * each vCPU execution. However u16.high can be raised
1260          * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
1261          */
1262         g_assert(cpu->icount_decr.u16.low == 0);
1263         g_assert(cpu->icount_extra == 0);
1264
1265         cpu->icount_budget = tcg_get_icount_limit();
1266         insns_left = MIN(0xffff, cpu->icount_budget);
1267         cpu->icount_decr.u16.low = insns_left;
1268         cpu->icount_extra = cpu->icount_budget - insns_left;
1269     }
1270 }
1271
1272 static void process_icount_data(CPUState *cpu)
1273 {
1274     if (use_icount) {
1275         /* Account for executed instructions */
1276         cpu_update_icount(cpu);
1277
1278         /* Reset the counters */
1279         cpu->icount_decr.u16.low = 0;
1280         cpu->icount_extra = 0;
1281         cpu->icount_budget = 0;
1282
1283         replay_account_executed_instructions();
1284     }
1285 }
1286
1287
1288 static int tcg_cpu_exec(CPUState *cpu)
1289 {
1290     int ret;
1291 #ifdef CONFIG_PROFILER
1292     int64_t ti;
1293 #endif
1294
1295 #ifdef CONFIG_PROFILER
1296     ti = profile_getclock();
1297 #endif
1298     qemu_mutex_unlock_iothread();
1299     cpu_exec_start(cpu);
1300     ret = cpu_exec(cpu);
1301     cpu_exec_end(cpu);
1302     qemu_mutex_lock_iothread();
1303 #ifdef CONFIG_PROFILER
1304     tcg_time += profile_getclock() - ti;
1305 #endif
1306     return ret;
1307 }
1308
1309 /* Destroy any remaining vCPUs which have been unplugged and have
1310  * finished running
1311  */
1312 static void deal_with_unplugged_cpus(void)
1313 {
1314     CPUState *cpu;
1315
1316     CPU_FOREACH(cpu) {
1317         if (cpu->unplug && !cpu_can_run(cpu)) {
1318             qemu_tcg_destroy_vcpu(cpu);
1319             cpu->created = false;
1320             qemu_cond_signal(&qemu_cpu_cond);
1321             break;
1322         }
1323     }
1324 }
1325
1326 /* Single-threaded TCG
1327  *
1328  * In the single-threaded case each vCPU is simulated in turn. If
1329  * there is more than a single vCPU we create a simple timer to kick
1330  * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
1331  * This is done explicitly rather than relying on side-effects
1332  * elsewhere.
1333  */
1334
1335 static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1336 {
1337     CPUState *cpu = arg;
1338
1339     rcu_register_thread();
1340     tcg_register_thread();
1341
1342     qemu_mutex_lock_iothread();
1343     qemu_thread_get_self(cpu->thread);
1344
1345     CPU_FOREACH(cpu) {
1346         cpu->thread_id = qemu_get_thread_id();
1347         cpu->created = true;
1348         cpu->can_do_io = 1;
1349     }
1350     qemu_cond_signal(&qemu_cpu_cond);
1351
1352     /* wait for initial kick-off after machine start */
1353     while (first_cpu->stopped) {
1354         qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1355
1356         /* process any pending work */
1357         CPU_FOREACH(cpu) {
1358             current_cpu = cpu;
1359             qemu_wait_io_event_common(cpu);
1360         }
1361     }
1362
1363     start_tcg_kick_timer();
1364
1365     cpu = first_cpu;
1366
1367     /* process any pending work */
1368     cpu->exit_request = 1;
1369
1370     while (1) {
1371         /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
1372         qemu_account_warp_timer();
1373
1374         /* Run the timers here.  This is much more efficient than
1375          * waking up the I/O thread and waiting for completion.
1376          */
1377         handle_icount_deadline();
1378
1379         if (!cpu) {
1380             cpu = first_cpu;
1381         }
1382
1383         while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
1384
1385             atomic_mb_set(&tcg_current_rr_cpu, cpu);
1386             current_cpu = cpu;
1387
1388             qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
1389                               (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
1390
1391             if (cpu_can_run(cpu)) {
1392                 int r;
1393
1394                 prepare_icount_for_run(cpu);
1395
1396                 r = tcg_cpu_exec(cpu);
1397
1398                 process_icount_data(cpu);
1399
1400                 if (r == EXCP_DEBUG) {
1401                     cpu_handle_guest_debug(cpu);
1402                     break;
1403                 } else if (r == EXCP_ATOMIC) {
1404                     qemu_mutex_unlock_iothread();
1405                     cpu_exec_step_atomic(cpu);
1406                     qemu_mutex_lock_iothread();
1407                     break;
1408                 }
1409             } else if (cpu->stop) {
1410                 if (cpu->unplug) {
1411                     cpu = CPU_NEXT(cpu);
1412                 }
1413                 break;
1414             }
1415
1416             cpu = CPU_NEXT(cpu);
1417         } /* while (cpu && !cpu->exit_request).. */
1418
1419         /* Does not need atomic_mb_set because a spurious wakeup is okay.  */
1420         atomic_set(&tcg_current_rr_cpu, NULL);
1421
1422         if (cpu && cpu->exit_request) {
1423             atomic_mb_set(&cpu->exit_request, 0);
1424         }
1425
1426         qemu_tcg_wait_io_event(cpu ? cpu : QTAILQ_FIRST(&cpus));
1427         deal_with_unplugged_cpus();
1428     }
1429
1430     return NULL;
1431 }
1432
1433 static void *qemu_hax_cpu_thread_fn(void *arg)
1434 {
1435     CPUState *cpu = arg;
1436     int r;
1437
1438     qemu_mutex_lock_iothread();
1439     qemu_thread_get_self(cpu->thread);
1440
1441     cpu->thread_id = qemu_get_thread_id();
1442     cpu->created = true;
1443     cpu->halted = 0;
1444     current_cpu = cpu;
1445
1446     hax_init_vcpu(cpu);
1447     qemu_cond_signal(&qemu_cpu_cond);
1448
1449     while (1) {
1450         if (cpu_can_run(cpu)) {
1451             r = hax_smp_cpu_exec(cpu);
1452             if (r == EXCP_DEBUG) {
1453                 cpu_handle_guest_debug(cpu);
1454             }
1455         }
1456
1457         while (cpu_thread_is_idle(cpu)) {
1458             qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1459         }
1460 #ifdef _WIN32
1461         SleepEx(0, TRUE);
1462 #endif
1463         qemu_wait_io_event_common(cpu);
1464     }
1465     return NULL;
1466 }
1467
1468 /* The HVF-specific vCPU thread function. This one should only run when the host
1469  * CPU supports the VMX "unrestricted guest" feature. */
1470 static void *qemu_hvf_cpu_thread_fn(void *arg)
1471 {
1472     CPUState *cpu = arg;
1473
1474     int r;
1475
1476     assert(hvf_enabled());
1477
1478     rcu_register_thread();
1479
1480     qemu_mutex_lock_iothread();
1481     qemu_thread_get_self(cpu->thread);
1482
1483     cpu->thread_id = qemu_get_thread_id();
1484     cpu->can_do_io = 1;
1485     current_cpu = cpu;
1486
1487     hvf_init_vcpu(cpu);
1488
1489     /* signal CPU creation */
1490     cpu->created = true;
1491     qemu_cond_signal(&qemu_cpu_cond);
1492
1493     do {
1494         if (cpu_can_run(cpu)) {
1495             r = hvf_vcpu_exec(cpu);
1496             if (r == EXCP_DEBUG) {
1497                 cpu_handle_guest_debug(cpu);
1498             }
1499         }
1500         qemu_hvf_wait_io_event(cpu);
1501     } while (!cpu->unplug || cpu_can_run(cpu));
1502
1503     hvf_vcpu_destroy(cpu);
1504     cpu->created = false;
1505     qemu_cond_signal(&qemu_cpu_cond);
1506     qemu_mutex_unlock_iothread();
1507     return NULL;
1508 }
1509
1510 #ifdef _WIN32
1511 static void CALLBACK dummy_apc_func(ULONG_PTR unused)
1512 {
1513 }
1514 #endif
1515
1516 /* Multi-threaded TCG
1517  *
1518  * In the multi-threaded case each vCPU has its own thread. The TLS
1519  * variable current_cpu can be used deep in the code to find the
1520  * current CPUState for a given thread.
1521  */
1522
1523 static void *qemu_tcg_cpu_thread_fn(void *arg)
1524 {
1525     CPUState *cpu = arg;
1526
1527     g_assert(!use_icount);
1528
1529     rcu_register_thread();
1530     tcg_register_thread();
1531
1532     qemu_mutex_lock_iothread();
1533     qemu_thread_get_self(cpu->thread);
1534
1535     cpu->thread_id = qemu_get_thread_id();
1536     cpu->created = true;
1537     cpu->can_do_io = 1;
1538     current_cpu = cpu;
1539     qemu_cond_signal(&qemu_cpu_cond);
1540
1541     /* process any pending work */
1542     cpu->exit_request = 1;
1543
1544     while (1) {
1545         if (cpu_can_run(cpu)) {
1546             int r;
1547             r = tcg_cpu_exec(cpu);
1548             switch (r) {
1549             case EXCP_DEBUG:
1550                 cpu_handle_guest_debug(cpu);
1551                 break;
1552             case EXCP_HALTED:
1553                 /* during start-up the vCPU is reset and the thread is
1554                  * kicked several times. If we don't ensure we go back
1555                  * to sleep in the halted state we won't cleanly
1556                  * start-up when the vCPU is enabled.
1557                  *
1558                  * cpu->halted should ensure we sleep in wait_io_event
1559                  */
1560                 g_assert(cpu->halted);
1561                 break;
1562             case EXCP_ATOMIC:
1563                 qemu_mutex_unlock_iothread();
1564                 cpu_exec_step_atomic(cpu);
1565                 qemu_mutex_lock_iothread();
1566             default:
1567                 /* Ignore everything else? */
1568                 break;
1569             }
1570         } else if (cpu->unplug) {
1571             qemu_tcg_destroy_vcpu(cpu);
1572             cpu->created = false;
1573             qemu_cond_signal(&qemu_cpu_cond);
1574             qemu_mutex_unlock_iothread();
1575             return NULL;
1576         }
1577
1578         atomic_mb_set(&cpu->exit_request, 0);
1579         qemu_tcg_wait_io_event(cpu);
1580     }
1581
1582     return NULL;
1583 }
1584
1585 static void qemu_cpu_kick_thread(CPUState *cpu)
1586 {
1587 #ifndef _WIN32
1588     int err;
1589
1590     if (cpu->thread_kicked) {
1591         return;
1592     }
1593     cpu->thread_kicked = true;
1594     err = pthread_kill(cpu->thread->thread, SIG_IPI);
1595     if (err) {
1596         fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
1597         exit(1);
1598     }
1599 #else /* _WIN32 */
1600     if (!qemu_cpu_is_self(cpu)) {
1601         if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
1602             fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
1603                     __func__, GetLastError());
1604             exit(1);
1605         }
1606     }
1607 #endif
1608 }
1609
1610 void qemu_cpu_kick(CPUState *cpu)
1611 {
1612     qemu_cond_broadcast(cpu->halt_cond);
1613     if (tcg_enabled()) {
1614         cpu_exit(cpu);
1615         /* NOP unless doing single-thread RR */
1616         qemu_cpu_kick_rr_cpu();
1617     } else {
1618         if (hax_enabled()) {
1619             /*
1620              * FIXME: race condition with the exit_request check in
1621              * hax_vcpu_hax_exec
1622              */
1623             cpu->exit_request = 1;
1624         }
1625         qemu_cpu_kick_thread(cpu);
1626     }
1627 }
1628
1629 void qemu_cpu_kick_self(void)
1630 {
1631     assert(current_cpu);
1632     qemu_cpu_kick_thread(current_cpu);
1633 }
1634
1635 bool qemu_cpu_is_self(CPUState *cpu)
1636 {
1637     return qemu_thread_is_self(cpu->thread);
1638 }
1639
1640 bool qemu_in_vcpu_thread(void)
1641 {
1642     return current_cpu && qemu_cpu_is_self(current_cpu);
1643 }
1644
1645 static __thread bool iothread_locked = false;
1646
1647 bool qemu_mutex_iothread_locked(void)
1648 {
1649     return iothread_locked;
1650 }
1651
1652 void qemu_mutex_lock_iothread(void)
1653 {
1654     g_assert(!qemu_mutex_iothread_locked());
1655     qemu_mutex_lock(&qemu_global_mutex);
1656     iothread_locked = true;
1657 }
1658
1659 void qemu_mutex_unlock_iothread(void)
1660 {
1661     g_assert(qemu_mutex_iothread_locked());
1662     iothread_locked = false;
1663     qemu_mutex_unlock(&qemu_global_mutex);
1664 }
1665
1666 static bool all_vcpus_paused(void)
1667 {
1668     CPUState *cpu;
1669
1670     CPU_FOREACH(cpu) {
1671         if (!cpu->stopped) {
1672             return false;
1673         }
1674     }
1675
1676     return true;
1677 }
1678
1679 void pause_all_vcpus(void)
1680 {
1681     CPUState *cpu;
1682
1683     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
1684     CPU_FOREACH(cpu) {
1685         if (qemu_cpu_is_self(cpu)) {
1686             qemu_cpu_stop(cpu, true);
1687         } else {
1688             cpu->stop = true;
1689             qemu_cpu_kick(cpu);
1690         }
1691     }
1692
1693     while (!all_vcpus_paused()) {
1694         qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
1695         CPU_FOREACH(cpu) {
1696             qemu_cpu_kick(cpu);
1697         }
1698     }
1699 }
1700
1701 void cpu_resume(CPUState *cpu)
1702 {
1703     cpu->stop = false;
1704     cpu->stopped = false;
1705     qemu_cpu_kick(cpu);
1706 }
1707
1708 void resume_all_vcpus(void)
1709 {
1710     CPUState *cpu;
1711
1712     qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
1713     CPU_FOREACH(cpu) {
1714         cpu_resume(cpu);
1715     }
1716 }
1717
1718 void cpu_remove(CPUState *cpu)
1719 {
1720     cpu->stop = true;
1721     cpu->unplug = true;
1722     qemu_cpu_kick(cpu);
1723 }
1724
1725 void cpu_remove_sync(CPUState *cpu)
1726 {
1727     cpu_remove(cpu);
1728     while (cpu->created) {
1729         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1730     }
1731 }
1732
1733 /* For temporary buffers for forming a name */
1734 #define VCPU_THREAD_NAME_SIZE 16
1735
1736 static void qemu_tcg_init_vcpu(CPUState *cpu)
1737 {
1738     char thread_name[VCPU_THREAD_NAME_SIZE];
1739     static QemuCond *single_tcg_halt_cond;
1740     static QemuThread *single_tcg_cpu_thread;
1741     static int tcg_region_inited;
1742
1743     /*
1744      * Initialize TCG regions--once. Now is a good time, because:
1745      * (1) TCG's init context, prologue and target globals have been set up.
1746      * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
1747      *     -accel flag is processed, so the check doesn't work then).
1748      */
1749     if (!tcg_region_inited) {
1750         tcg_region_inited = 1;
1751         tcg_region_init();
1752     }
1753
1754     if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
1755         cpu->thread = g_malloc0(sizeof(QemuThread));
1756         cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1757         qemu_cond_init(cpu->halt_cond);
1758
1759         if (qemu_tcg_mttcg_enabled()) {
1760             /* create a thread per vCPU with TCG (MTTCG) */
1761             parallel_cpus = true;
1762             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
1763                  cpu->cpu_index);
1764
1765             qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
1766                                cpu, QEMU_THREAD_JOINABLE);
1767
1768         } else {
1769             /* share a single thread for all cpus with TCG */
1770             snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
1771             qemu_thread_create(cpu->thread, thread_name,
1772                                qemu_tcg_rr_cpu_thread_fn,
1773                                cpu, QEMU_THREAD_JOINABLE);
1774
1775             single_tcg_halt_cond = cpu->halt_cond;
1776             single_tcg_cpu_thread = cpu->thread;
1777         }
1778 #ifdef _WIN32
1779         cpu->hThread = qemu_thread_get_handle(cpu->thread);
1780 #endif
1781         while (!cpu->created) {
1782             qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1783         }
1784     } else {
1785         /* For non-MTTCG cases we share the thread */
1786         cpu->thread = single_tcg_cpu_thread;
1787         cpu->halt_cond = single_tcg_halt_cond;
1788     }
1789 }
1790
1791 static void qemu_hax_start_vcpu(CPUState *cpu)
1792 {
1793     char thread_name[VCPU_THREAD_NAME_SIZE];
1794
1795     cpu->thread = g_malloc0(sizeof(QemuThread));
1796     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1797     qemu_cond_init(cpu->halt_cond);
1798
1799     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
1800              cpu->cpu_index);
1801     qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
1802                        cpu, QEMU_THREAD_JOINABLE);
1803 #ifdef _WIN32
1804     cpu->hThread = qemu_thread_get_handle(cpu->thread);
1805 #endif
1806     while (!cpu->created) {
1807         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1808     }
1809 }
1810
1811 static void qemu_kvm_start_vcpu(CPUState *cpu)
1812 {
1813     char thread_name[VCPU_THREAD_NAME_SIZE];
1814
1815     cpu->thread = g_malloc0(sizeof(QemuThread));
1816     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1817     qemu_cond_init(cpu->halt_cond);
1818     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
1819              cpu->cpu_index);
1820     qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
1821                        cpu, QEMU_THREAD_JOINABLE);
1822     while (!cpu->created) {
1823         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1824     }
1825 }
1826
1827 static void qemu_hvf_start_vcpu(CPUState *cpu)
1828 {
1829     char thread_name[VCPU_THREAD_NAME_SIZE];
1830
1831     /* HVF currently does not support TCG, and only runs in
1832      * unrestricted-guest mode. */
1833     assert(hvf_enabled());
1834
1835     cpu->thread = g_malloc0(sizeof(QemuThread));
1836     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1837     qemu_cond_init(cpu->halt_cond);
1838
1839     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
1840              cpu->cpu_index);
1841     qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
1842                        cpu, QEMU_THREAD_JOINABLE);
1843     while (!cpu->created) {
1844         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1845     }
1846 }
1847
1848 static void qemu_dummy_start_vcpu(CPUState *cpu)
1849 {
1850     char thread_name[VCPU_THREAD_NAME_SIZE];
1851
1852     cpu->thread = g_malloc0(sizeof(QemuThread));
1853     cpu->halt_cond = g_malloc0(sizeof(QemuCond));
1854     qemu_cond_init(cpu->halt_cond);
1855     snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
1856              cpu->cpu_index);
1857     qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
1858                        QEMU_THREAD_JOINABLE);
1859     while (!cpu->created) {
1860         qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
1861     }
1862 }
1863
1864 void qemu_init_vcpu(CPUState *cpu)
1865 {
1866     cpu->nr_cores = smp_cores;
1867     cpu->nr_threads = smp_threads;
1868     cpu->stopped = true;
1869
1870     if (!cpu->as) {
1871         /* If the target cpu hasn't set up any address spaces itself,
1872          * give it the default one.
1873          */
1874         cpu->num_ases = 1;
1875         cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
1876     }
1877
1878     if (kvm_enabled()) {
1879         qemu_kvm_start_vcpu(cpu);
1880     } else if (hax_enabled()) {
1881         qemu_hax_start_vcpu(cpu);
1882     } else if (hvf_enabled()) {
1883         qemu_hvf_start_vcpu(cpu);
1884     } else if (tcg_enabled()) {
1885         qemu_tcg_init_vcpu(cpu);
1886     } else {
1887         qemu_dummy_start_vcpu(cpu);
1888     }
1889 }
1890
1891 void cpu_stop_current(void)
1892 {
1893     if (current_cpu) {
1894         qemu_cpu_stop(current_cpu, true);
1895     }
1896 }
1897
1898 int vm_stop(RunState state)
1899 {
1900     if (qemu_in_vcpu_thread()) {
1901         qemu_system_vmstop_request_prepare();
1902         qemu_system_vmstop_request(state);
1903         /*
1904          * FIXME: should not return to device code in case
1905          * vm_stop() has been requested.
1906          */
1907         cpu_stop_current();
1908         return 0;
1909     }
1910
1911     return do_vm_stop(state);
1912 }
1913
1914 /**
1915  * Prepare for (re)starting the VM.
1916  * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
1917  * running or in case of an error condition), 0 otherwise.
1918  */
1919 int vm_prepare_start(void)
1920 {
1921     RunState requested;
1922     int res = 0;
1923
1924     qemu_vmstop_requested(&requested);
1925     if (runstate_is_running() && requested == RUN_STATE__MAX) {
1926         return -1;
1927     }
1928
1929     /* Ensure that a STOP/RESUME pair of events is emitted if a
1930      * vmstop request was pending.  The BLOCK_IO_ERROR event, for
1931      * example, according to documentation is always followed by
1932      * the STOP event.
1933      */
1934     if (runstate_is_running()) {
1935         qapi_event_send_stop(&error_abort);
1936         res = -1;
1937     } else {
1938         replay_enable_events();
1939         cpu_enable_ticks();
1940         runstate_set(RUN_STATE_RUNNING);
1941         vm_state_notify(1, RUN_STATE_RUNNING);
1942     }
1943
1944     /* We are sending this now, but the CPUs will be resumed shortly later */
1945     qapi_event_send_resume(&error_abort);
1946     return res;
1947 }
1948
1949 void vm_start(void)
1950 {
1951     if (!vm_prepare_start()) {
1952         resume_all_vcpus();
1953     }
1954 }
1955
1956 /* does a state transition even if the VM is already stopped,
1957    current state is forgotten forever */
1958 int vm_stop_force_state(RunState state)
1959 {
1960     if (runstate_is_running()) {
1961         return vm_stop(state);
1962     } else {
1963         runstate_set(state);
1964
1965         bdrv_drain_all();
1966         /* Make sure to return an error if the flush in a previous vm_stop()
1967          * failed. */
1968         return bdrv_flush_all();
1969     }
1970 }
1971
1972 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
1973 {
1974     /* XXX: implement xxx_cpu_list for targets that still miss it */
1975 #if defined(cpu_list)
1976     cpu_list(f, cpu_fprintf);
1977 #endif
1978 }
1979
1980 CpuInfoList *qmp_query_cpus(Error **errp)
1981 {
1982     MachineState *ms = MACHINE(qdev_get_machine());
1983     MachineClass *mc = MACHINE_GET_CLASS(ms);
1984     CpuInfoList *head = NULL, *cur_item = NULL;
1985     CPUState *cpu;
1986
1987     CPU_FOREACH(cpu) {
1988         CpuInfoList *info;
1989 #if defined(TARGET_I386)
1990         X86CPU *x86_cpu = X86_CPU(cpu);
1991         CPUX86State *env = &x86_cpu->env;
1992 #elif defined(TARGET_PPC)
1993         PowerPCCPU *ppc_cpu = POWERPC_CPU(cpu);
1994         CPUPPCState *env = &ppc_cpu->env;
1995 #elif defined(TARGET_SPARC)
1996         SPARCCPU *sparc_cpu = SPARC_CPU(cpu);
1997         CPUSPARCState *env = &sparc_cpu->env;
1998 #elif defined(TARGET_MIPS)
1999         MIPSCPU *mips_cpu = MIPS_CPU(cpu);
2000         CPUMIPSState *env = &mips_cpu->env;
2001 #elif defined(TARGET_TRICORE)
2002         TriCoreCPU *tricore_cpu = TRICORE_CPU(cpu);
2003         CPUTriCoreState *env = &tricore_cpu->env;
2004 #endif
2005
2006         cpu_synchronize_state(cpu);
2007
2008         info = g_malloc0(sizeof(*info));
2009         info->value = g_malloc0(sizeof(*info->value));
2010         info->value->CPU = cpu->cpu_index;
2011         info->value->current = (cpu == first_cpu);
2012         info->value->halted = cpu->halted;
2013         info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
2014         info->value->thread_id = cpu->thread_id;
2015 #if defined(TARGET_I386)
2016         info->value->arch = CPU_INFO_ARCH_X86;
2017         info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
2018 #elif defined(TARGET_PPC)
2019         info->value->arch = CPU_INFO_ARCH_PPC;
2020         info->value->u.ppc.nip = env->nip;
2021 #elif defined(TARGET_SPARC)
2022         info->value->arch = CPU_INFO_ARCH_SPARC;
2023         info->value->u.q_sparc.pc = env->pc;
2024         info->value->u.q_sparc.npc = env->npc;
2025 #elif defined(TARGET_MIPS)
2026         info->value->arch = CPU_INFO_ARCH_MIPS;
2027         info->value->u.q_mips.PC = env->active_tc.PC;
2028 #elif defined(TARGET_TRICORE)
2029         info->value->arch = CPU_INFO_ARCH_TRICORE;
2030         info->value->u.tricore.PC = env->PC;
2031 #else
2032         info->value->arch = CPU_INFO_ARCH_OTHER;
2033 #endif
2034         info->value->has_props = !!mc->cpu_index_to_instance_props;
2035         if (info->value->has_props) {
2036             CpuInstanceProperties *props;
2037             props = g_malloc0(sizeof(*props));
2038             *props = mc->cpu_index_to_instance_props(ms, cpu->cpu_index);
2039             info->value->props = props;
2040         }
2041
2042         /* XXX: waiting for the qapi to support GSList */
2043         if (!cur_item) {
2044             head = cur_item = info;
2045         } else {
2046             cur_item->next = info;
2047             cur_item = info;
2048         }
2049     }
2050
2051     return head;
2052 }
2053
2054 void qmp_memsave(int64_t addr, int64_t size, const char *filename,
2055                  bool has_cpu, int64_t cpu_index, Error **errp)
2056 {
2057     FILE *f;
2058     uint32_t l;
2059     CPUState *cpu;
2060     uint8_t buf[1024];
2061     int64_t orig_addr = addr, orig_size = size;
2062
2063     if (!has_cpu) {
2064         cpu_index = 0;
2065     }
2066
2067     cpu = qemu_get_cpu(cpu_index);
2068     if (cpu == NULL) {
2069         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
2070                    "a CPU number");
2071         return;
2072     }
2073
2074     f = fopen(filename, "wb");
2075     if (!f) {
2076         error_setg_file_open(errp, errno, filename);
2077         return;
2078     }
2079
2080     while (size != 0) {
2081         l = sizeof(buf);
2082         if (l > size)
2083             l = size;
2084         if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
2085             error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
2086                              " specified", orig_addr, orig_size);
2087             goto exit;
2088         }
2089         if (fwrite(buf, 1, l, f) != l) {
2090             error_setg(errp, QERR_IO_ERROR);
2091             goto exit;
2092         }
2093         addr += l;
2094         size -= l;
2095     }
2096
2097 exit:
2098     fclose(f);
2099 }
2100
2101 void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
2102                   Error **errp)
2103 {
2104     FILE *f;
2105     uint32_t l;
2106     uint8_t buf[1024];
2107
2108     f = fopen(filename, "wb");
2109     if (!f) {
2110         error_setg_file_open(errp, errno, filename);
2111         return;
2112     }
2113
2114     while (size != 0) {
2115         l = sizeof(buf);
2116         if (l > size)
2117             l = size;
2118         cpu_physical_memory_read(addr, buf, l);
2119         if (fwrite(buf, 1, l, f) != l) {
2120             error_setg(errp, QERR_IO_ERROR);
2121             goto exit;
2122         }
2123         addr += l;
2124         size -= l;
2125     }
2126
2127 exit:
2128     fclose(f);
2129 }
2130
2131 void qmp_inject_nmi(Error **errp)
2132 {
2133     nmi_monitor_handle(monitor_get_cpu_index(), errp);
2134 }
2135
2136 void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
2137 {
2138     if (!use_icount) {
2139         return;
2140     }
2141
2142     cpu_fprintf(f, "Host - Guest clock  %"PRIi64" ms\n",
2143                 (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
2144     if (icount_align_option) {
2145         cpu_fprintf(f, "Max guest delay     %"PRIi64" ms\n", -max_delay/SCALE_MS);
2146         cpu_fprintf(f, "Max guest advance   %"PRIi64" ms\n", max_advance/SCALE_MS);
2147     } else {
2148         cpu_fprintf(f, "Max guest delay     NA\n");
2149         cpu_fprintf(f, "Max guest advance   NA\n");
2150     }
2151 }