COLO: Synchronize PVM's state to SVM periodically

[qemu.git] / translate-all.c
diff --git a/translate-all.c b/translate-all.c

index eaa95e4cd7dc306deb21737e57ded872b523c4b6..76fc18c98f587ef2e2c5e0c7c9e62a927b49ba3b 100644 (file)
--- a/translate-all.c
+++ b/translate-all.c
@@ -97,34 +97,54 @@ typedef struct PageDesc {
  #define V_L2_BITS 10
  #define V_L2_SIZE (1 << V_L2_BITS)
  
-/* The bits remaining after N lower levels of page tables.  */
-#define V_L1_BITS_REM \
-    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS)
-
-#if V_L1_BITS_REM < 4
-#define V_L1_BITS  (V_L1_BITS_REM + V_L2_BITS)
-#else
-#define V_L1_BITS  V_L1_BITS_REM
-#endif
-
-#define V_L1_SIZE  ((target_ulong)1 << V_L1_BITS)
-
-#define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
-
  uintptr_t qemu_host_page_size;
  intptr_t qemu_host_page_mask;
  
-/* The bottom level has pointers to PageDesc */
-static void *l1_map[V_L1_SIZE];
+/*
+ * L1 Mapping properties
+ */
+static int v_l1_size;
+static int v_l1_shift;
+static int v_l2_levels;
+
+/* The bottom level has pointers to PageDesc, and is indexed by
+ * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
+ */
+#define V_L1_MIN_BITS 4
+#define V_L1_MAX_BITS (V_L2_BITS + 3)
+#define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
+
+static void *l1_map[V_L1_MAX_SIZE];
  
  /* code generation context */
  TCGContext tcg_ctx;
+bool parallel_cpus;
  
  /* translation block context */
  #ifdef CONFIG_USER_ONLY
  __thread int have_tb_lock;
  #endif
  
+static void page_table_config_init(void)
+{
+    uint32_t v_l1_bits;
+
+    assert(TARGET_PAGE_BITS);
+    /* The bits remaining after N lower levels of page tables.  */
+    v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
+    if (v_l1_bits < V_L1_MIN_BITS) {
+        v_l1_bits += V_L2_BITS;
+    }
+
+    v_l1_size = 1 << v_l1_bits;
+    v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
+    v_l2_levels = v_l1_shift / V_L2_BITS - 1;
+
+    assert(v_l1_bits <= V_L1_MAX_BITS);
+    assert(v_l1_shift % V_L2_BITS == 0);
+    assert(v_l2_levels >= 0);
+}
+
  void tb_lock(void)
  {
  #ifdef CONFIG_USER_ONLY
@@ -260,6 +280,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
      int64_t ti = profile_getclock();
  #endif
  
+    searched_pc -= GETPC_ADJ;
+
      if (searched_pc < host_pc) {
          return -1;
      }
@@ -330,6 +352,8 @@ void page_size_init(void)
  static void page_init(void)
  {
      page_size_init();
+    page_table_config_init();
+
  #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
      {
  #ifdef HAVE_KINFO_GETVMMAP
@@ -406,10 +430,10 @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
      int i;
  
      /* Level 1.  Always allocated.  */
-    lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
+    lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
  
      /* Level 2..N-1.  */
-    for (i = V_L1_SHIFT / V_L2_BITS - 1; i > 0; i--) {
+    for (i = v_l2_levels; i > 0; i--) {
          void **p = atomic_rcu_read(lp);
  
          if (p == NULL) {
@@ -773,6 +797,7 @@ static TranslationBlock *tb_alloc(target_ulong pc)
      tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
      tb->pc = pc;
      tb->cflags = 0;
+    tb->invalid = false;
      return tb;
  }
  
@@ -823,17 +848,27 @@ static void page_flush_tb_1(int level, void **lp)
  
  static void page_flush_tb(void)
  {
-    int i;
+    int i, l1_sz = v_l1_size;
  
-    for (i = 0; i < V_L1_SIZE; i++) {
-        page_flush_tb_1(V_L1_SHIFT / V_L2_BITS - 1, l1_map + i);
+    for (i = 0; i < l1_sz; i++) {
+        page_flush_tb_1(v_l2_levels, l1_map + i);
      }
  }
  
  /* flush all the translation blocks */
-/* XXX: tb_flush is currently not thread safe */
-void tb_flush(CPUState *cpu)
+static void do_tb_flush(CPUState *cpu, void *data)
  {
+    unsigned tb_flush_req = (unsigned) (uintptr_t) data;
+
+    tb_lock();
+
+    /* If it's already been done on request of another CPU,
+     * just retry.
+     */
+    if (tcg_ctx.tb_ctx.tb_flush_count != tb_flush_req) {
+        goto done;
+    }
+
  #if defined(DEBUG_FLUSH)
      printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
             (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
@@ -845,20 +880,35 @@ void tb_flush(CPUState *cpu)
          > tcg_ctx.code_gen_buffer_size) {
          cpu_abort(cpu, "Internal error: code buffer overflow\n");
      }
-    tcg_ctx.tb_ctx.nb_tbs = 0;
  
      CPU_FOREACH(cpu) {
-        memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
-        cpu->tb_flushed = true;
+        int i;
+
+        for (i = 0; i < TB_JMP_CACHE_SIZE; ++i) {
+            atomic_set(&cpu->tb_jmp_cache[i], NULL);
+        }
      }
  
+    tcg_ctx.tb_ctx.nb_tbs = 0;
      qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
      page_flush_tb();
  
      tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
      /* XXX: flush processor icache at this point if cache flush is
         expensive */
-    tcg_ctx.tb_ctx.tb_flush_count++;
+    atomic_mb_set(&tcg_ctx.tb_ctx.tb_flush_count,
+                  tcg_ctx.tb_ctx.tb_flush_count + 1);
+
+done:
+    tb_unlock();
+}
+
+void tb_flush(CPUState *cpu)
+{
+    if (tcg_enabled()) {
+        uintptr_t tb_flush_req = atomic_mb_read(&tcg_ctx.tb_ctx.tb_flush_count);
+        async_safe_run_on_cpu(cpu, do_tb_flush, (void *) tb_flush_req);
+    }
  }
  
  #ifdef DEBUG_TB_CHECK
@@ -987,6 +1037,8 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
      uint32_t h;
      tb_page_addr_t phys_pc;
  
+    atomic_set(&tb->invalid, true);
+
      /* remove the TB from the hash list */
      phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
      h = tb_hash_func(phys_pc, tb->pc, tb->flags);
@@ -1007,8 +1059,8 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
      /* remove the TB from the hash list */
      h = tb_jmp_cache_hash_func(tb->pc);
      CPU_FOREACH(cpu) {
-        if (cpu->tb_jmp_cache[h] == tb) {
-            cpu->tb_jmp_cache[h] = NULL;
+        if (atomic_read(&cpu->tb_jmp_cache[h]) == tb) {
+            atomic_set(&cpu->tb_jmp_cache[h], NULL);
          }
      }
  
@@ -1121,10 +1173,6 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
  {
      uint32_t h;
  
-    /* add in the hash table */
-    h = tb_hash_func(phys_pc, tb->pc, tb->flags);
-    qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);
-
      /* add in the page list */
      tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
      if (phys_page2 != -1) {
@@ -1133,6 +1181,10 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
          tb->page_addr[1] = -1;
      }
  
+    /* add in the hash table */
+    h = tb_hash_func(phys_pc, tb->pc, tb->flags);
+    qht_insert(&tcg_ctx.tb_ctx.htable, tb, h);
+
  #ifdef DEBUG_TB_CHECK
      tb_page_check();
  #endif
@@ -1163,9 +1215,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
   buffer_overflow:
          /* flush must be done */
          tb_flush(cpu);
-        /* cannot fail at this point */
-        tb = tb_alloc(pc);
-        assert(tb != NULL);
+        mmap_unlock();
+        cpu_loop_exit(cpu);
      }
  
      gen_code_buf = tcg_ctx.code_gen_ptr;
@@ -1663,15 +1714,50 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
             TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
  }
  
+static void print_qht_statistics(FILE *f, fprintf_function cpu_fprintf,
+                                 struct qht_stats hst)
+{
+    uint32_t hgram_opts;
+    size_t hgram_bins;
+    char *hgram;
+
+    if (!hst.head_buckets) {
+        return;
+    }
+    cpu_fprintf(f, "TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
+                hst.used_head_buckets, hst.head_buckets,
+                (double)hst.used_head_buckets / hst.head_buckets * 100);
+
+    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
+    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
+    if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
+        hgram_opts |= QDIST_PR_NODECIMAL;
+    }
+    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
+    cpu_fprintf(f, "TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
+                qdist_avg(&hst.occupancy) * 100, hgram);
+    g_free(hgram);
+
+    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
+    hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
+    if (hgram_bins > 10) {
+        hgram_bins = 10;
+    } else {
+        hgram_bins = 0;
+        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
+    }
+    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
+    cpu_fprintf(f, "TB hash avg chain   %0.3f buckets. Histogram: %s\n",
+                qdist_avg(&hst.chain), hgram);
+    g_free(hgram);
+}
+
  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
  {
      int i, target_code_size, max_target_code_size;
      int direct_jmp_count, direct_jmp2_count, cross_page;
      TranslationBlock *tb;
      struct qht_stats hst;
-    uint32_t hgram_opts;
-    size_t hgram_bins;
-    char *hgram;
  
      target_code_size = 0;
      max_target_code_size = 0;
@@ -1724,38 +1810,12 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
                          tcg_ctx.tb_ctx.nb_tbs : 0);
  
      qht_statistics_init(&tcg_ctx.tb_ctx.htable, &hst);
-
-    cpu_fprintf(f, "TB hash buckets     %zu/%zu (%0.2f%% head buckets used)\n",
-                hst.used_head_buckets, hst.head_buckets,
-                (double)hst.used_head_buckets / hst.head_buckets * 100);
-
-    hgram_opts =  QDIST_PR_BORDER | QDIST_PR_LABELS;
-    hgram_opts |= QDIST_PR_100X   | QDIST_PR_PERCENT;
-    if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
-        hgram_opts |= QDIST_PR_NODECIMAL;
-    }
-    hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
-    cpu_fprintf(f, "TB hash occupancy   %0.2f%% avg chain occ. Histogram: %s\n",
-                qdist_avg(&hst.occupancy) * 100, hgram);
-    g_free(hgram);
-
-    hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
-    hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
-    if (hgram_bins > 10) {
-        hgram_bins = 10;
-    } else {
-        hgram_bins = 0;
-        hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
-    }
-    hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
-    cpu_fprintf(f, "TB hash avg chain   %0.3f buckets. Histogram: %s\n",
-                qdist_avg(&hst.chain), hgram);
-    g_free(hgram);
-
+    print_qht_statistics(f, cpu_fprintf, hst);
      qht_statistics_destroy(&hst);
  
      cpu_fprintf(f, "\nStatistics:\n");
-    cpu_fprintf(f, "TB flush count      %d\n", tcg_ctx.tb_ctx.tb_flush_count);
+    cpu_fprintf(f, "TB flush count      %u\n",
+            atomic_read(&tcg_ctx.tb_ctx.tb_flush_count));
      cpu_fprintf(f, "TB invalidate count %d\n",
              tcg_ctx.tb_ctx.tb_phys_invalidate_count);
      cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
@@ -1845,16 +1905,16 @@ static int walk_memory_regions_1(struct walk_memory_regions_data *data,
  int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
  {
      struct walk_memory_regions_data data;
-    uintptr_t i;
+    uintptr_t i, l1_sz = v_l1_size;
  
      data.fn = fn;
      data.priv = priv;
      data.start = -1u;
      data.prot = 0;
  
-    for (i = 0; i < V_L1_SIZE; i++) {
-        int rc = walk_memory_regions_1(&data, (target_ulong)i << (V_L1_SHIFT + TARGET_PAGE_BITS),
-                                       V_L1_SHIFT / V_L2_BITS - 1, l1_map + i);
+    for (i = 0; i < l1_sz; i++) {
+        target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
+        int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
          if (rc != 0) {
              return rc;
          }
@@ -2000,6 +2060,7 @@ int page_check_range(target_ulong start, target_ulong len, int flags)
  int page_unprotect(target_ulong address, uintptr_t pc)
  {
      unsigned int prot;
+    bool current_tb_invalidated;
      PageDesc *p;
      target_ulong host_start, host_end, addr;
  
@@ -2021,6 +2082,7 @@ int page_unprotect(target_ulong address, uintptr_t pc)
          host_end = host_start + qemu_host_page_size;
  
          prot = 0;
+        current_tb_invalidated = false;
          for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
              p = page_find(addr >> TARGET_PAGE_BITS);
              p->flags |= PAGE_WRITE;
@@ -2028,10 +2090,7 @@ int page_unprotect(target_ulong address, uintptr_t pc)
  
              /* and since the content will be modified, we must invalidate
                 the corresponding translated code. */
-            if (tb_invalidate_phys_page(addr, pc)) {
-                mmap_unlock();
-                return 2;
-            }
+            current_tb_invalidated |= tb_invalidate_phys_page(addr, pc);
  #ifdef DEBUG_TB_CHECK
              tb_invalidate_check(addr);
  #endif
@@ -2040,7 +2099,8 @@ int page_unprotect(target_ulong address, uintptr_t pc)
                   prot & PAGE_BITS);
  
          mmap_unlock();
-        return 1;
+        /* If current TB was invalidated return to main loop */
+        return current_tb_invalidated ? 2 : 1;
      }
      mmap_unlock();
      return 0;