apic_internal.h: added more constants

[qemu.git] / cpus.c
diff --git a/cpus.c b/cpus.c

index 314df161908df41d8e5f29ae9384da08f90b22b0..dddd056454ed5898c7589c6b49d7795d220de4d3 100644 (file)
--- a/cpus.c
+++ b/cpus.c
@@ -27,6 +27,7 @@
  
  #include "monitor/monitor.h"
  #include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
  #include "sysemu/sysemu.h"
  #include "exec/gdbstub.h"
  #include "sysemu/dma.h"
@@ -105,6 +106,7 @@ static bool all_cpu_threads_idle(void)
  
  /* Protected by TimersState seqlock */
  
+static bool icount_sleep = true;
  static int64_t vm_clock_warp_start = -1;
  /* Conversion factor from emulated instructions to virtual clock ticks.  */
  static int icount_time_shift;
@@ -143,7 +145,7 @@ int64_t cpu_get_icount_raw(void)
  
      icount = timers_state.qemu_icount;
      if (cpu) {
-        if (!cpu_can_do_io(cpu)) {
+        if (!cpu->can_do_io) {
              fprintf(stderr, "Bad icount read\n");
              exit(1);
          }
@@ -393,15 +395,18 @@ void qemu_clock_warp(QEMUClockType type)
          return;
      }
  
-    /*
-     * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
-     * This ensures that the deadline for the timer is computed correctly below.
-     * This also makes sure that the insn counter is synchronized before the
-     * CPU starts running, in case the CPU is woken by an event other than
-     * the earliest QEMU_CLOCK_VIRTUAL timer.
-     */
-    icount_warp_rt(NULL);
-    timer_del(icount_warp_timer);
+    if (icount_sleep) {
+        /*
+         * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
+         * This ensures that the deadline for the timer is computed correctly
+         * below.
+         * This also makes sure that the insn counter is synchronized before
+         * the CPU starts running, in case the CPU is woken by an event other
+         * than the earliest QEMU_CLOCK_VIRTUAL timer.
+         */
+        icount_warp_rt(NULL);
+        timer_del(icount_warp_timer);
+    }
      if (!all_cpu_threads_idle()) {
          return;
      }
@@ -415,6 +420,11 @@ void qemu_clock_warp(QEMUClockType type)
      clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
      deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
      if (deadline < 0) {
+        static bool notified;
+        if (!icount_sleep && !notified) {
+            error_report("WARNING: icount sleep disabled and no active timers");
+            notified = true;
+        }
          return;
      }
  
@@ -425,23 +435,35 @@ void qemu_clock_warp(QEMUClockType type)
           * interrupt to wake it up, but the interrupt never comes because
           * the vCPU isn't running any insns and thus doesn't advance the
           * QEMU_CLOCK_VIRTUAL.
-         *
-         * An extreme solution for this problem would be to never let VCPUs
-         * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
-         * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
-         * event.  Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
-         * after some "real" time, (related to the time left until the next
-         * event) has passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
-         * This avoids that the warps are visible externally; for example,
-         * you will not be sending network packets continuously instead of
-         * every 100ms.
           */
-        seqlock_write_lock(&timers_state.vm_clock_seqlock);
-        if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
-            vm_clock_warp_start = clock;
+        if (!icount_sleep) {
+            /*
+             * We never let VCPUs sleep in no sleep icount mode.
+             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
+             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
+             * It is useful when we want a deterministic execution time,
+             * isolated from host latencies.
+             */
+            seqlock_write_lock(&timers_state.vm_clock_seqlock);
+            timers_state.qemu_icount_bias += deadline;
+            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+        } else {
+            /*
+             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
+             * "real" time, (related to the time left until the next event) has
+             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
+             * This avoids that the warps are visible externally; for example,
+             * you will not be sending network packets continuously instead of
+             * every 100ms.
+             */
+            seqlock_write_lock(&timers_state.vm_clock_seqlock);
+            if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
+                vm_clock_warp_start = clock;
+            }
+            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+            timer_mod_anticipate(icount_warp_timer, clock + deadline);
          }
-        seqlock_write_unlock(&timers_state.vm_clock_seqlock);
-        timer_mod_anticipate(icount_warp_timer, clock + deadline);
      } else if (deadline == 0) {
          qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
      }
@@ -459,6 +481,7 @@ static const VMStateDescription icount_vmstate_timers = {
      .name = "timer/icount",
      .version_id = 1,
      .minimum_version_id = 1,
+    .needed = icount_state_needed,
      .fields = (VMStateField[]) {
          VMSTATE_INT64(qemu_icount_bias, TimersState),
          VMSTATE_INT64(qemu_icount, TimersState),
@@ -476,13 +499,9 @@ static const VMStateDescription vmstate_timers = {
          VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
          VMSTATE_END_OF_LIST()
      },
-    .subsections = (VMStateSubsection[]) {
-        {
-            .vmsd = &icount_vmstate_timers,
-            .needed = icount_state_needed,
-        }, {
-            /* empty */
-        }
+    .subsections = (const VMStateDescription*[]) {
+        &icount_vmstate_timers,
+        NULL
      }
  };
  
@@ -504,9 +523,18 @@ void configure_icount(QemuOpts *opts, Error **errp)
          }
          return;
      }
+
+    icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
+    if (icount_sleep) {
+        icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
+                                         icount_warp_rt, NULL);
+    }
+
      icount_align_option = qemu_opt_get_bool(opts, "align", false);
-    icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
-                                     icount_warp_rt, NULL);
+
+    if (icount_align_option && !icount_sleep) {
+        error_setg(errp, "align=on and sleep=no are incompatible");
+    }
      if (strcmp(option, "auto") != 0) {
          errno = 0;
          icount_time_shift = strtol(option, &rem_str, 0);
@@ -517,6 +545,8 @@ void configure_icount(QemuOpts *opts, Error **errp)
          return;
      } else if (icount_align_option) {
          error_setg(errp, "shift=auto and align=on are incompatible");
+    } else if (!icount_sleep) {
+        error_setg(errp, "shift=auto and sleep=no are incompatible");
      }
  
      use_icount = 2;
@@ -631,14 +661,6 @@ static void cpu_handle_guest_debug(CPUState *cpu)
      cpu->stopped = true;
  }
  
-static void cpu_signal(int sig)
-{
-    if (current_cpu) {
-        cpu_exit(current_cpu);
-    }
-    exit_request = 1;
-}
-
  #ifdef CONFIG_LINUX
  static void sigbus_reraise(void)
  {
@@ -751,29 +773,11 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu)
      }
  }
  
-static void qemu_tcg_init_cpu_signals(void)
-{
-    sigset_t set;
-    struct sigaction sigact;
-
-    memset(&sigact, 0, sizeof(sigact));
-    sigact.sa_handler = cpu_signal;
-    sigaction(SIG_IPI, &sigact, NULL);
-
-    sigemptyset(&set);
-    sigaddset(&set, SIG_IPI);
-    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-}
-
  #else /* _WIN32 */
  static void qemu_kvm_init_cpu_signals(CPUState *cpu)
  {
      abort();
  }
-
-static void qemu_tcg_init_cpu_signals(void)
-{
-}
  #endif /* _WIN32 */
  
  static QemuMutex qemu_global_mutex;
@@ -782,9 +786,6 @@ static unsigned iothread_requesting_mutex;
  
  static QemuThread io_thread;
  
-static QemuThread *tcg_cpu_thread;
-static QemuCond *tcg_halt_cond;
-
  /* cpu creation */
  static QemuCond qemu_cpu_cond;
  /* system init */
@@ -815,6 +816,8 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
      wi.func = func;
      wi.data = data;
      wi.free = false;
+
+    qemu_mutex_lock(&cpu->work_mutex);
      if (cpu->queued_work_first == NULL) {
          cpu->queued_work_first = &wi;
      } else {
@@ -823,9 +826,10 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
      cpu->queued_work_last = &wi;
      wi.next = NULL;
      wi.done = false;
+    qemu_mutex_unlock(&cpu->work_mutex);
  
      qemu_cpu_kick(cpu);
-    while (!wi.done) {
+    while (!atomic_mb_read(&wi.done)) {
          CPUState *self_cpu = current_cpu;
  
          qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
@@ -846,6 +850,8 @@ void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
      wi->func = func;
      wi->data = data;
      wi->free = true;
+
+    qemu_mutex_lock(&cpu->work_mutex);
      if (cpu->queued_work_first == NULL) {
          cpu->queued_work_first = wi;
      } else {
@@ -854,6 +860,7 @@ void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
      cpu->queued_work_last = wi;
      wi->next = NULL;
      wi->done = false;
+    qemu_mutex_unlock(&cpu->work_mutex);
  
      qemu_cpu_kick(cpu);
  }
@@ -866,15 +873,23 @@ static void flush_queued_work(CPUState *cpu)
          return;
      }
  
-    while ((wi = cpu->queued_work_first)) {
+    qemu_mutex_lock(&cpu->work_mutex);
+    while (cpu->queued_work_first != NULL) {
+        wi = cpu->queued_work_first;
          cpu->queued_work_first = wi->next;
+        if (!cpu->queued_work_first) {
+            cpu->queued_work_last = NULL;
+        }
+        qemu_mutex_unlock(&cpu->work_mutex);
          wi->func(wi->data);
-        wi->done = true;
+        qemu_mutex_lock(&cpu->work_mutex);
          if (wi->free) {
              g_free(wi);
+        } else {
+            atomic_mb_set(&wi->done, true);
          }
      }
-    cpu->queued_work_last = NULL;
+    qemu_mutex_unlock(&cpu->work_mutex);
      qemu_cond_broadcast(&qemu_work_cond);
  }
  
@@ -889,15 +904,13 @@ static void qemu_wait_io_event_common(CPUState *cpu)
      cpu->thread_kicked = false;
  }
  
-static void qemu_tcg_wait_io_event(void)
+static void qemu_tcg_wait_io_event(CPUState *cpu)
  {
-    CPUState *cpu;
-
      while (all_cpu_threads_idle()) {
         /* Start accounting real time to the virtual clock if the CPUs
            are idle.  */
          qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
-        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
+        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
      }
  
      while (iothread_requesting_mutex) {
@@ -924,7 +937,9 @@ static void *qemu_kvm_cpu_thread_fn(void *arg)
      CPUState *cpu = arg;
      int r;
  
-    qemu_mutex_lock(&qemu_global_mutex);
+    rcu_register_thread();
+
+    qemu_mutex_lock_iothread();
      qemu_thread_get_self(cpu->thread);
      cpu->thread_id = qemu_get_thread_id();
      cpu->can_do_io = 1;
@@ -965,6 +980,8 @@ static void *qemu_dummy_cpu_thread_fn(void *arg)
      sigset_t waitset;
      int r;
  
+    rcu_register_thread();
+
      qemu_mutex_lock_iothread();
      qemu_thread_get_self(cpu->thread);
      cpu->thread_id = qemu_get_thread_id();
@@ -1004,10 +1021,11 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
  {
      CPUState *cpu = arg;
  
-    qemu_tcg_init_cpu_signals();
+    rcu_register_thread();
+
+    qemu_mutex_lock_iothread();
      qemu_thread_get_self(cpu->thread);
  
-    qemu_mutex_lock(&qemu_global_mutex);
      CPU_FOREACH(cpu) {
          cpu->thread_id = qemu_get_thread_id();
          cpu->created = true;
@@ -1016,8 +1034,8 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
      qemu_cond_signal(&qemu_cpu_cond);
  
      /* wait for initial kick-off after machine start */
-    while (QTAILQ_FIRST(&cpus)->stopped) {
-        qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
+    while (first_cpu->stopped) {
+        qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
  
          /* process any pending work */
          CPU_FOREACH(cpu) {
@@ -1026,7 +1044,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
      }
  
      /* process any pending work */
-    exit_request = 1;
+    atomic_mb_set(&exit_request, 1);
  
      while (1) {
          tcg_exec_all();
@@ -1038,7 +1056,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
                  qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
              }
          }
-        qemu_tcg_wait_io_event();
+        qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
      }
  
      return NULL;
@@ -1049,61 +1067,47 @@ static void qemu_cpu_kick_thread(CPUState *cpu)
  #ifndef _WIN32
      int err;
  
+    if (cpu->thread_kicked) {
+        return;
+    }
+    cpu->thread_kicked = true;
      err = pthread_kill(cpu->thread->thread, SIG_IPI);
      if (err) {
          fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
          exit(1);
      }
  #else /* _WIN32 */
-    if (!qemu_cpu_is_self(cpu)) {
-        CONTEXT tcgContext;
-
-        if (SuspendThread(cpu->hThread) == (DWORD)-1) {
-            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
-                    GetLastError());
-            exit(1);
-        }
-
-        /* On multi-core systems, we are not sure that the thread is actually
-         * suspended until we can get the context.
-         */
-        tcgContext.ContextFlags = CONTEXT_CONTROL;
-        while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
-            continue;
-        }
-
-        cpu_signal(0);
+    abort();
+#endif
+}
  
-        if (ResumeThread(cpu->hThread) == (DWORD)-1) {
-            fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
-                    GetLastError());
-            exit(1);
-        }
+static void qemu_cpu_kick_no_halt(void)
+{
+    CPUState *cpu;
+    /* Ensure whatever caused the exit has reached the CPU threads before
+     * writing exit_request.
+     */
+    atomic_mb_set(&exit_request, 1);
+    cpu = atomic_mb_read(&tcg_current_cpu);
+    if (cpu) {
+        cpu_exit(cpu);
      }
-#endif
  }
  
  void qemu_cpu_kick(CPUState *cpu)
  {
      qemu_cond_broadcast(cpu->halt_cond);
-    if (!tcg_enabled() && !cpu->thread_kicked) {
+    if (tcg_enabled()) {
+        qemu_cpu_kick_no_halt();
+    } else {
          qemu_cpu_kick_thread(cpu);
-        cpu->thread_kicked = true;
      }
  }
  
  void qemu_cpu_kick_self(void)
  {
-#ifndef _WIN32
      assert(current_cpu);
-
-    if (!current_cpu->thread_kicked) {
-        qemu_cpu_kick_thread(current_cpu);
-        current_cpu->thread_kicked = true;
-    }
-#else
-    abort();
-#endif
+    qemu_cpu_kick_thread(current_cpu);
  }
  
  bool qemu_cpu_is_self(CPUState *cpu)
@@ -1116,24 +1120,37 @@ bool qemu_in_vcpu_thread(void)
      return current_cpu && qemu_cpu_is_self(current_cpu);
  }
  
+static __thread bool iothread_locked = false;
+
+bool qemu_mutex_iothread_locked(void)
+{
+    return iothread_locked;
+}
+
  void qemu_mutex_lock_iothread(void)
  {
      atomic_inc(&iothread_requesting_mutex);
-    if (!tcg_enabled() || !first_cpu) {
+    /* In the simple case there is no need to bump the VCPU thread out of
+     * TCG code execution.
+     */
+    if (!tcg_enabled() || qemu_in_vcpu_thread() ||
+        !first_cpu || !first_cpu->created) {
          qemu_mutex_lock(&qemu_global_mutex);
          atomic_dec(&iothread_requesting_mutex);
      } else {
          if (qemu_mutex_trylock(&qemu_global_mutex)) {
-            qemu_cpu_kick_thread(first_cpu);
+            qemu_cpu_kick_no_halt();
              qemu_mutex_lock(&qemu_global_mutex);
          }
          atomic_dec(&iothread_requesting_mutex);
          qemu_cond_broadcast(&qemu_io_proceeded_cond);
      }
+    iothread_locked = true;
  }
  
  void qemu_mutex_unlock_iothread(void)
  {
+    iothread_locked = false;
      qemu_mutex_unlock(&qemu_global_mutex);
  }
  
@@ -1202,6 +1219,8 @@ void resume_all_vcpus(void)
  static void qemu_tcg_init_vcpu(CPUState *cpu)
  {
      char thread_name[VCPU_THREAD_NAME_SIZE];
+    static QemuCond *tcg_halt_cond;
+    static QemuThread *tcg_cpu_thread;
  
      tcg_cpu_address_space_init(cpu, cpu->as);
  
@@ -1314,9 +1333,8 @@ int vm_stop_force_state(RunState state)
      }
  }
  
-static int tcg_cpu_exec(CPUArchState *env)
+static int tcg_cpu_exec(CPUState *cpu)
  {
-    CPUState *cpu = ENV_GET_CPU(env);
      int ret;
  #ifdef CONFIG_PROFILER
      int64_t ti;
@@ -1351,7 +1369,7 @@ static int tcg_cpu_exec(CPUArchState *env)
          cpu->icount_decr.u16.low = decr;
          cpu->icount_extra = count;
      }
-    ret = cpu_exec(env);
+    ret = cpu_exec(cpu);
  #ifdef CONFIG_PROFILER
      tcg_time += profile_getclock() - ti;
  #endif
@@ -1378,13 +1396,12 @@ static void tcg_exec_all(void)
      }
      for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
          CPUState *cpu = next_cpu;
-        CPUArchState *env = cpu->env_ptr;
  
          qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
                            (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
  
          if (cpu_can_run(cpu)) {
-            r = tcg_cpu_exec(env);
+            r = tcg_cpu_exec(cpu);
              if (r == EXCP_DEBUG) {
                  cpu_handle_guest_debug(cpu);
                  break;
@@ -1393,7 +1410,9 @@ static void tcg_exec_all(void)
              break;
          }
      }
-    exit_request = 0;
+
+    /* Pairs with smp_wmb in qemu_cpu_kick.  */
+    atomic_mb_set(&exit_request, 0);
  }
  
  void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
@@ -1435,6 +1454,7 @@ CpuInfoList *qmp_query_cpus(Error **errp)
          info->value->CPU = cpu->cpu_index;
          info->value->current = (cpu == first_cpu);
          info->value->halted = cpu->halted;
+        info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
          info->value->thread_id = cpu->thread_id;
  #if defined(TARGET_I386)
          info->value->has_pc = true;
@@ -1482,8 +1502,8 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
  
      cpu = qemu_get_cpu(cpu_index);
      if (cpu == NULL) {
-        error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
-                  "a CPU number");
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
+                   "a CPU number");
          return;
      }
  
@@ -1503,7 +1523,7 @@ void qmp_memsave(int64_t addr, int64_t size, const char *filename,
              goto exit;
          }
          if (fwrite(buf, 1, l, f) != l) {
-            error_set(errp, QERR_IO_ERROR);
+            error_setg(errp, QERR_IO_ERROR);
              goto exit;
          }
          addr += l;
@@ -1533,7 +1553,7 @@ void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
              l = size;
          cpu_physical_memory_read(addr, buf, l);
          if (fwrite(buf, 1, l, f) != l) {
-            error_set(errp, QERR_IO_ERROR);
+            error_setg(errp, QERR_IO_ERROR);
              goto exit;
          }
          addr += l;