target-arm: Add arm_boot_info secure_boot control

[qemu.git] / translate-all.c
diff --git a/translate-all.c b/translate-all.c

index d367fc4d11d3cfc966ef6537da21fbc502f177e7..c24cfe865b2b3fc7582782b0fe34201b070605ef 100644 (file)
--- a/translate-all.c
+++ b/translate-all.c
@@ -33,11 +33,9 @@
  #include "qemu-common.h"
  #define NO_CPU_IO_DEFS
  #include "cpu.h"
+#include "trace.h"
  #include "disas/disas.h"
  #include "tcg.h"
-#include "qemu/timer.h"
-#include "exec/memory.h"
-#include "exec/address-spaces.h"
  #if defined(CONFIG_USER_ONLY)
  #include "qemu.h"
  #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -55,10 +53,13 @@
  #include <libutil.h>
  #endif
  #endif
+#else
+#include "exec/address-spaces.h"
  #endif
  
  #include "exec/cputlb.h"
  #include "translate-all.h"
+#include "qemu/timer.h"
  
  //#define DEBUG_TB_INVALIDATE
  //#define DEBUG_FLUSH
@@ -72,21 +73,6 @@
  
  #define SMC_BITMAP_USE_THRESHOLD 10
  
-/* Code generation and translation blocks */
-static TranslationBlock *tbs;
-static int code_gen_max_blocks;
-TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
-static int nb_tbs;
-/* any access to the tbs or the page table must use this lock */
-spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
-
-uint8_t *code_gen_prologue;
-static uint8_t *code_gen_buffer;
-static size_t code_gen_buffer_size;
-/* threshold to flush the translated code buffer */
-static size_t code_gen_buffer_max_size;
-static uint8_t *code_gen_ptr;
-
  typedef struct PageDesc {
      /* list of TBs intersecting this ram page */
      TranslationBlock *first_tb;
@@ -111,12 +97,16 @@ typedef struct PageDesc {
  # define L1_MAP_ADDR_SPACE_BITS  TARGET_VIRT_ADDR_SPACE_BITS
  #endif
  
+/* Size of the L2 (and L3, etc) page tables.  */
+#define V_L2_BITS 10
+#define V_L2_SIZE (1 << V_L2_BITS)
+
  /* The bits remaining after N lower levels of page tables.  */
  #define V_L1_BITS_REM \
-    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
+    ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS)
  
  #if V_L1_BITS_REM < 4
-#define V_L1_BITS  (V_L1_BITS_REM + L2_BITS)
+#define V_L1_BITS  (V_L1_BITS_REM + V_L2_BITS)
  #else
  #define V_L1_BITS  V_L1_BITS_REM
  #endif
@@ -133,10 +123,6 @@ uintptr_t qemu_host_page_mask;
     The bottom level has pointers to PageDesc.  */
  static void *l1_map[V_L1_SIZE];
  
-/* statistics */
-static int tb_flush_count;
-static int tb_phys_invalidate_count;
-
  /* code generation context */
  TCGContext tcg_ctx;
  
@@ -158,7 +144,7 @@ void cpu_gen_init(void)
  int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr)
  {
      TCGContext *s = &tcg_ctx;
-    uint8_t *gen_code_buf;
+    tcg_insn_unit *gen_code_buf;
      int gen_code_size;
  #ifdef CONFIG_PROFILER
      int64_t ti;
@@ -173,6 +159,8 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
  
      gen_intermediate_code(env, tb);
  
+    trace_translate_block(tb, tb->pc, tb->tc_ptr);
+
      /* generate machine code */
      gen_code_buf = tb->tc_ptr;
      tb->tb_next_offset[0] = 0xffff;
@@ -201,8 +189,8 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
  
  #ifdef DEBUG_DISAS
      if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
-        qemu_log("OUT: [size=%d]\n", *gen_code_size_ptr);
-        log_disas(tb->tc_ptr, *gen_code_size_ptr);
+        qemu_log("OUT: [size=%d]\n", gen_code_size);
+        log_disas(tb->tc_ptr, gen_code_size);
          qemu_log("\n");
          qemu_log_flush();
      }
@@ -212,9 +200,10 @@ int cpu_gen_code(CPUArchState *env, TranslationBlock *tb, int *gen_code_size_ptr
  
  /* The cpu state corresponding to 'searched_pc' is restored.
   */
-static int cpu_restore_state_from_tb(TranslationBlock *tb, CPUArchState *env,
+static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                                       uintptr_t searched_pc)
  {
+    CPUArchState *env = cpu->env_ptr;
      TCGContext *s = &tcg_ctx;
      int j;
      uintptr_t tc_ptr;
@@ -231,9 +220,9 @@ static int cpu_restore_state_from_tb(TranslationBlock *tb, CPUArchState *env,
  
      if (use_icount) {
          /* Reset the cycle counter to the start of the block.  */
-        env->icount_decr.u16.low += tb->icount;
+        cpu->icount_decr.u16.low += tb->icount;
          /* Clear the IO flag.  */
-        env->can_do_io = 0;
+        cpu->can_do_io = 0;
      }
  
      /* find opc index corresponding to search_pc */
@@ -249,14 +238,15 @@ static int cpu_restore_state_from_tb(TranslationBlock *tb, CPUArchState *env,
      s->tb_jmp_offset = NULL;
      s->tb_next = tb->tb_next;
  #endif
-    j = tcg_gen_code_search_pc(s, (uint8_t *)tc_ptr, searched_pc - tc_ptr);
+    j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
+                               searched_pc - tc_ptr);
      if (j < 0)
          return -1;
      /* now find start of instruction before */
      while (s->gen_opc_instr_start[j] == 0) {
          j--;
      }
-    env->icount_decr.u16.low -= s->gen_opc_icount[j];
+    cpu->icount_decr.u16.low -= s->gen_opc_icount[j];
  
      restore_state_to_opc(env, tb, j);
  
@@ -267,13 +257,19 @@ static int cpu_restore_state_from_tb(TranslationBlock *tb, CPUArchState *env,
      return 0;
  }
  
-bool cpu_restore_state(CPUArchState *env, uintptr_t retaddr)
+bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
  {
      TranslationBlock *tb;
  
      tb = tb_find_pc(retaddr);
      if (tb) {
-        cpu_restore_state_from_tb(tb, env, retaddr);
+        cpu_restore_state_from_tb(cpu, tb, retaddr);
+        if (tb->cflags & CF_NOCACHE) {
+            /* one-shot translation, invalidate it immediately */
+            cpu->current_tb = NULL;
+            tb_phys_invalidate(tb, -1);
+            tb_free(tb);
+        }
          return true;
      }
      return false;
@@ -304,20 +300,11 @@ static inline void map_exec(void *addr, long size)
  }
  #endif
  
-static void page_init(void)
+void page_size_init(void)
  {
      /* NOTE: we can always suppose that qemu_host_page_size >=
         TARGET_PAGE_SIZE */
-#ifdef _WIN32
-    {
-        SYSTEM_INFO system_info;
-
-        GetSystemInfo(&system_info);
-        qemu_real_host_page_size = system_info.dwPageSize;
-    }
-#else
      qemu_real_host_page_size = getpagesize();
-#endif
      if (qemu_host_page_size == 0) {
          qemu_host_page_size = qemu_real_host_page_size;
      }
@@ -325,7 +312,11 @@ static void page_init(void)
          qemu_host_page_size = TARGET_PAGE_SIZE;
      }
      qemu_host_page_mask = ~(qemu_host_page_size - 1);
+}
  
+static void page_init(void)
+{
+    page_size_init();
  #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
      {
  #ifdef HAVE_KINFO_GETVMMAP
@@ -414,18 +405,18 @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
      lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
  
      /* Level 2..N-1.  */
-    for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
+    for (i = V_L1_SHIFT / V_L2_BITS - 1; i > 0; i--) {
          void **p = *lp;
  
          if (p == NULL) {
              if (!alloc) {
                  return NULL;
              }
-            ALLOC(p, sizeof(void *) * L2_SIZE);
+            ALLOC(p, sizeof(void *) * V_L2_SIZE);
              *lp = p;
          }
  
-        lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
+        lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
      }
  
      pd = *lp;
@@ -433,13 +424,13 @@ static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
          if (!alloc) {
              return NULL;
          }
-        ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
+        ALLOC(pd, sizeof(PageDesc) * V_L2_SIZE);
          *lp = pd;
      }
  
  #undef ALLOC
  
-    return pd + (index & (L2_SIZE - 1));
+    return pd + (index & (V_L2_SIZE - 1));
  }
  
  static inline PageDesc *page_find(tb_page_addr_t index)
@@ -479,11 +470,17 @@ static inline PageDesc *page_find(tb_page_addr_t index)
  # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
  #elif defined(__sparc__)
  # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__aarch64__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
  #elif defined(__arm__)
  # define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
  #elif defined(__s390x__)
    /* We have a +- 4GB range on the branches; leave some slop.  */
  # define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
+#elif defined(__mips__)
+  /* We have a 256MB branch region, but leave room to make sure the
+     main executable is also within that region.  */
+# define MAX_CODE_GEN_BUFFER_SIZE  (128ul * 1024 * 1024)
  #else
  # define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
  #endif
@@ -514,18 +511,51 @@ static inline size_t size_code_gen_buffer(size_t tb_size)
      if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
          tb_size = MAX_CODE_GEN_BUFFER_SIZE;
      }
-    code_gen_buffer_size = tb_size;
+    tcg_ctx.code_gen_buffer_size = tb_size;
      return tb_size;
  }
  
+#ifdef __mips__
+/* In order to use J and JAL within the code_gen_buffer, we require
+   that the buffer not cross a 256MB boundary.  */
+static inline bool cross_256mb(void *addr, size_t size)
+{
+    return ((uintptr_t)addr ^ ((uintptr_t)addr + size)) & 0xf0000000;
+}
+
+/* We weren't able to allocate a buffer without crossing that boundary,
+   so make do with the larger portion of the buffer that doesn't cross.
+   Returns the new base of the buffer, and adjusts code_gen_buffer_size.  */
+static inline void *split_cross_256mb(void *buf1, size_t size1)
+{
+    void *buf2 = (void *)(((uintptr_t)buf1 + size1) & 0xf0000000);
+    size_t size2 = buf1 + size1 - buf2;
+
+    size1 = buf2 - buf1;
+    if (size1 < size2) {
+        size1 = size2;
+        buf1 = buf2;
+    }
+
+    tcg_ctx.code_gen_buffer_size = size1;
+    return buf1;
+}
+#endif
+
  #ifdef USE_STATIC_CODE_GEN_BUFFER
  static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
      __attribute__((aligned(CODE_GEN_ALIGN)));
  
  static inline void *alloc_code_gen_buffer(void)
  {
-    map_exec(static_code_gen_buffer, code_gen_buffer_size);
-    return static_code_gen_buffer;
+    void *buf = static_code_gen_buffer;
+#ifdef __mips__
+    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
+        buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
+    }
+#endif
+    map_exec(buf, tcg_ctx.code_gen_buffer_size);
+    return buf;
  }
  #elif defined(USE_MMAP)
  static inline void *alloc_code_gen_buffer(void)
@@ -547,54 +577,114 @@ static inline void *alloc_code_gen_buffer(void)
         Leave the choice of exact location with the kernel.  */
      flags |= MAP_32BIT;
      /* Cannot expect to map more than 800MB in low memory.  */
-    if (code_gen_buffer_size > 800u * 1024 * 1024) {
-        code_gen_buffer_size = 800u * 1024 * 1024;
+    if (tcg_ctx.code_gen_buffer_size > 800u * 1024 * 1024) {
+        tcg_ctx.code_gen_buffer_size = 800u * 1024 * 1024;
      }
  # elif defined(__sparc__)
      start = 0x40000000ul;
  # elif defined(__s390x__)
      start = 0x90000000ul;
+# elif defined(__mips__)
+    /* ??? We ought to more explicitly manage layout for softmmu too.  */
+#  ifdef CONFIG_USER_ONLY
+    start = 0x68000000ul;
+#  elif _MIPS_SIM == _ABI64
+    start = 0x128000000ul;
+#  else
+    start = 0x08000000ul;
+#  endif
  # endif
  
-    buf = mmap((void *)start, code_gen_buffer_size,
+    buf = mmap((void *)start, tcg_ctx.code_gen_buffer_size,
                 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
-    return buf == MAP_FAILED ? NULL : buf;
+    if (buf == MAP_FAILED) {
+        return NULL;
+    }
+
+#ifdef __mips__
+    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
+        /* Try again, with the original still mapped, to avoid re-acquiring
+           that 256mb crossing.  This time don't specify an address.  */
+        size_t size2, size1 = tcg_ctx.code_gen_buffer_size;
+        void *buf2 = mmap(NULL, size1, PROT_WRITE | PROT_READ | PROT_EXEC,
+                          flags, -1, 0);
+        if (buf2 != MAP_FAILED) {
+            if (!cross_256mb(buf2, size1)) {
+                /* Success!  Use the new buffer.  */
+                munmap(buf, size1);
+                return buf2;
+            }
+            /* Failure.  Work with what we had.  */
+            munmap(buf2, size1);
+        }
+
+        /* Split the original buffer.  Free the smaller half.  */
+        buf2 = split_cross_256mb(buf, size1);
+        size2 = tcg_ctx.code_gen_buffer_size;
+        munmap(buf + (buf == buf2 ? size2 : 0), size1 - size2);
+        return buf2;
+    }
+#endif
+
+    return buf;
  }
  #else
  static inline void *alloc_code_gen_buffer(void)
  {
-    void *buf = g_malloc(code_gen_buffer_size);
+    void *buf = g_malloc(tcg_ctx.code_gen_buffer_size);
  
-    if (buf) {
-        map_exec(buf, code_gen_buffer_size);
+    if (buf == NULL) {
+        return NULL;
      }
+
+#ifdef __mips__
+    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
+        void *buf2 = g_malloc(tcg_ctx.code_gen_buffer_size);
+        if (buf2 != NULL && !cross_256mb(buf2, size1)) {
+            /* Success!  Use the new buffer.  */
+            free(buf);
+            buf = buf2;
+        } else {
+            /* Failure.  Work with what we had.  Since this is malloc
+               and not mmap, we can't free the other half.  */
+            free(buf2);
+            buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
+        }
+    }
+#endif
+
+    map_exec(buf, tcg_ctx.code_gen_buffer_size);
      return buf;
  }
  #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
  
  static inline void code_gen_alloc(size_t tb_size)
  {
-    code_gen_buffer_size = size_code_gen_buffer(tb_size);
-    code_gen_buffer = alloc_code_gen_buffer();
-    if (code_gen_buffer == NULL) {
+    tcg_ctx.code_gen_buffer_size = size_code_gen_buffer(tb_size);
+    tcg_ctx.code_gen_buffer = alloc_code_gen_buffer();
+    if (tcg_ctx.code_gen_buffer == NULL) {
          fprintf(stderr, "Could not allocate dynamic translator buffer\n");
          exit(1);
      }
  
-    qemu_madvise(code_gen_buffer, code_gen_buffer_size, QEMU_MADV_HUGEPAGE);
+    qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size,
+            QEMU_MADV_HUGEPAGE);
  
      /* Steal room for the prologue at the end of the buffer.  This ensures
         (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
         from TB's to the prologue are going to be in range.  It also means
         that we don't need to mark (additional) portions of the data segment
         as executable.  */
-    code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
-    code_gen_buffer_size -= 1024;
+    tcg_ctx.code_gen_prologue = tcg_ctx.code_gen_buffer +
+            tcg_ctx.code_gen_buffer_size - 1024;
+    tcg_ctx.code_gen_buffer_size -= 1024;
  
-    code_gen_buffer_max_size = code_gen_buffer_size -
+    tcg_ctx.code_gen_buffer_max_size = tcg_ctx.code_gen_buffer_size -
          (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
-    code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
-    tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
+    tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size /
+            CODE_GEN_AVG_BLOCK_SIZE;
+    tcg_ctx.tb_ctx.tbs =
+            g_malloc(tcg_ctx.code_gen_max_blocks * sizeof(TranslationBlock));
  }
  
  /* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -604,8 +694,8 @@ void tcg_exec_init(unsigned long tb_size)
  {
      cpu_gen_init();
      code_gen_alloc(tb_size);
-    code_gen_ptr = code_gen_buffer;
-    tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
+    tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
+    tcg_register_jit(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size);
      page_init();
  #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
      /* There's no guest base to take into account, so go ahead and
@@ -616,7 +706,7 @@ void tcg_exec_init(unsigned long tb_size)
  
  bool tcg_enabled(void)
  {
-    return code_gen_buffer != NULL;
+    return tcg_ctx.code_gen_buffer != NULL;
  }
  
  /* Allocate a new translation block. Flush the translation buffer if
@@ -625,11 +715,12 @@ static TranslationBlock *tb_alloc(target_ulong pc)
  {
      TranslationBlock *tb;
  
-    if (nb_tbs >= code_gen_max_blocks ||
-        (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size) {
+    if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks ||
+        (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) >=
+         tcg_ctx.code_gen_buffer_max_size) {
          return NULL;
      }
-    tb = &tbs[nb_tbs++];
+    tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
      tb->pc = pc;
      tb->cflags = 0;
      return tb;
@@ -640,9 +731,10 @@ void tb_free(TranslationBlock *tb)
      /* In practice this is mostly used for single use temporary TB
         Ignore the hard cases and just back up if this TB happens to
         be the last one generated.  */
-    if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
-        code_gen_ptr = tb->tc_ptr;
-        nb_tbs--;
+    if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
+            tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
+        tcg_ctx.code_gen_ptr = tb->tc_ptr;
+        tcg_ctx.tb_ctx.nb_tbs--;
      }
  }
  
@@ -666,14 +758,14 @@ static void page_flush_tb_1(int level, void **lp)
      if (level == 0) {
          PageDesc *pd = *lp;
  
-        for (i = 0; i < L2_SIZE; ++i) {
+        for (i = 0; i < V_L2_SIZE; ++i) {
              pd[i].first_tb = NULL;
              invalidate_page_bitmap(pd + i);
          }
      } else {
          void **pp = *lp;
  
-        for (i = 0; i < L2_SIZE; ++i) {
+        for (i = 0; i < V_L2_SIZE; ++i) {
              page_flush_tb_1(level - 1, pp + i);
          }
      }
@@ -684,7 +776,7 @@ static void page_flush_tb(void)
      int i;
  
      for (i = 0; i < V_L1_SIZE; i++) {
-        page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
+        page_flush_tb_1(V_L1_SHIFT / V_L2_BITS - 1, l1_map + i);
      }
  }
  
@@ -692,31 +784,32 @@ static void page_flush_tb(void)
  /* XXX: tb_flush is currently not thread safe */
  void tb_flush(CPUArchState *env1)
  {
-    CPUArchState *env;
+    CPUState *cpu = ENV_GET_CPU(env1);
  
  #if defined(DEBUG_FLUSH)
      printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
-           (unsigned long)(code_gen_ptr - code_gen_buffer),
-           nb_tbs, nb_tbs > 0 ?
-           ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
+           (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
+           tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ?
+           ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) /
+           tcg_ctx.tb_ctx.nb_tbs : 0);
  #endif
-    if ((unsigned long)(code_gen_ptr - code_gen_buffer)
-        > code_gen_buffer_size) {
-        cpu_abort(env1, "Internal error: code buffer overflow\n");
+    if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)
+        > tcg_ctx.code_gen_buffer_size) {
+        cpu_abort(cpu, "Internal error: code buffer overflow\n");
      }
-    nb_tbs = 0;
+    tcg_ctx.tb_ctx.nb_tbs = 0;
  
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
+    CPU_FOREACH(cpu) {
+        memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
      }
  
-    memset(tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
+    memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
      page_flush_tb();
  
-    code_gen_ptr = code_gen_buffer;
+    tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
      /* XXX: flush processor icache at this point if cache flush is
         expensive */
-    tb_flush_count++;
+    tcg_ctx.tb_ctx.tb_flush_count++;
  }
  
  #ifdef DEBUG_TB_CHECK
@@ -728,7 +821,7 @@ static void tb_invalidate_check(target_ulong address)
  
      address &= TARGET_PAGE_MASK;
      for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+        for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
              if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
                    address >= tb->pc + tb->size)) {
                  printf("ERROR invalidate: address=" TARGET_FMT_lx
@@ -746,7 +839,8 @@ static void tb_page_check(void)
      int i, flags1, flags2;
  
      for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
-        for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+        for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
+                tb = tb->phys_hash_next) {
              flags1 = page_get_flags(tb->pc);
              flags2 = page_get_flags(tb->pc + tb->size - 1);
              if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
@@ -829,7 +923,7 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n)
  /* invalidate one TB */
  void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
  {
-    CPUArchState *env;
+    CPUState *cpu;
      PageDesc *p;
      unsigned int h, n1;
      tb_page_addr_t phys_pc;
@@ -838,7 +932,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
      /* remove the TB from the hash list */
      phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
      h = tb_phys_hash_func(phys_pc);
-    tb_hash_remove(&tb_phys_hash[h], tb);
+    tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb);
  
      /* remove the TB from the page list */
      if (tb->page_addr[0] != page_addr) {
@@ -852,13 +946,13 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
          invalidate_page_bitmap(p);
      }
  
-    tb_invalidated_flag = 1;
+    tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
  
      /* remove the TB from the hash list */
      h = tb_jmp_cache_hash_func(tb->pc);
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        if (env->tb_jmp_cache[h] == tb) {
-            env->tb_jmp_cache[h] = NULL;
+    CPU_FOREACH(cpu) {
+        if (cpu->tb_jmp_cache[h] == tb) {
+            cpu->tb_jmp_cache[h] = NULL;
          }
      }
  
@@ -881,7 +975,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
      }
      tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
  
-    tb_phys_invalidate_count++;
+    tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
  }
  
  static inline void set_bits(uint8_t *tab, int start, int len)
@@ -940,12 +1034,12 @@ static void build_page_bitmap(PageDesc *p)
      }
  }
  
-TranslationBlock *tb_gen_code(CPUArchState *env,
+TranslationBlock *tb_gen_code(CPUState *cpu,
                                target_ulong pc, target_ulong cs_base,
                                int flags, int cflags)
  {
+    CPUArchState *env = cpu->env_ptr;
      TranslationBlock *tb;
-    uint8_t *tc_ptr;
      tb_page_addr_t phys_pc, phys_page2;
      target_ulong virt_page2;
      int code_gen_size;
@@ -958,16 +1052,15 @@ TranslationBlock *tb_gen_code(CPUArchState *env,
          /* cannot fail at this point */
          tb = tb_alloc(pc);
          /* Don't forget to invalidate previous TB info.  */
-        tb_invalidated_flag = 1;
+        tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
      }
-    tc_ptr = code_gen_ptr;
-    tb->tc_ptr = tc_ptr;
+    tb->tc_ptr = tcg_ctx.code_gen_ptr;
      tb->cs_base = cs_base;
      tb->flags = flags;
      tb->cflags = cflags;
      cpu_gen_code(env, tb, &code_gen_size);
-    code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
-                             CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+    tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)tcg_ctx.code_gen_ptr +
+            code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
  
      /* check next page if needed */
      virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
@@ -1007,7 +1100,10 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
                                     int is_cpu_write_access)
  {
      TranslationBlock *tb, *tb_next, *saved_tb;
-    CPUArchState *env = cpu_single_env;
+    CPUState *cpu = current_cpu;
+#if defined(TARGET_HAS_PRECISE_SMC)
+    CPUArchState *env = NULL;
+#endif
      tb_page_addr_t tb_start, tb_end;
      PageDesc *p;
      int n;
@@ -1030,6 +1126,11 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
          /* build code bitmap */
          build_page_bitmap(p);
      }
+#if defined(TARGET_HAS_PRECISE_SMC)
+    if (cpu != NULL) {
+        env = cpu->env_ptr;
+    }
+#endif
  
      /* we remove all the TBs in the range [start, end[ */
      /* XXX: see if in some cases it could be faster to invalidate all
@@ -1054,9 +1155,9 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
              if (current_tb_not_found) {
                  current_tb_not_found = 0;
                  current_tb = NULL;
-                if (env->mem_io_pc) {
+                if (cpu->mem_io_pc) {
                      /* now we have a real cpu fault */
-                    current_tb = tb_find_pc(env->mem_io_pc);
+                    current_tb = tb_find_pc(cpu->mem_io_pc);
                  }
              }
              if (current_tb == tb &&
@@ -1068,7 +1169,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
                  restore the CPU state */
  
                  current_tb_modified = 1;
-                cpu_restore_state_from_tb(current_tb, env, env->mem_io_pc);
+                cpu_restore_state_from_tb(cpu, current_tb, cpu->mem_io_pc);
                  cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
                                       &current_flags);
              }
@@ -1076,15 +1177,15 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
              /* we need to do that to handle the case where a signal
                 occurs while doing tb_phys_invalidate() */
              saved_tb = NULL;
-            if (env) {
-                saved_tb = env->current_tb;
-                env->current_tb = NULL;
+            if (cpu != NULL) {
+                saved_tb = cpu->current_tb;
+                cpu->current_tb = NULL;
              }
              tb_phys_invalidate(tb, -1);
-            if (env) {
-                env->current_tb = saved_tb;
-                if (env->interrupt_request && env->current_tb) {
-                    cpu_interrupt(env, env->interrupt_request);
+            if (cpu != NULL) {
+                cpu->current_tb = saved_tb;
+                if (cpu->interrupt_request && cpu->current_tb) {
+                    cpu_interrupt(cpu, cpu->interrupt_request);
                  }
              }
          }
@@ -1095,7 +1196,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
      if (!p->first_tb) {
          invalidate_page_bitmap(p);
          if (is_cpu_write_access) {
-            tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
+            tlb_unprotect_code_phys(cpu, start, cpu->mem_io_vaddr);
          }
      }
  #endif
@@ -1104,9 +1205,9 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
          /* we generate a block containing just the instruction
             modifying the memory. It will ensure that it cannot modify
             itself */
-        env->current_tb = NULL;
-        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
-        cpu_resume_from_signal(env, NULL);
+        cpu->current_tb = NULL;
+        tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
+        cpu_resume_from_signal(cpu, NULL);
      }
  #endif
  }
@@ -1144,14 +1245,16 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
  
  #if !defined(CONFIG_SOFTMMU)
  static void tb_invalidate_phys_page(tb_page_addr_t addr,
-                                    uintptr_t pc, void *puc)
+                                    uintptr_t pc, void *puc,
+                                    bool locked)
  {
      TranslationBlock *tb;
      PageDesc *p;
      int n;
  #ifdef TARGET_HAS_PRECISE_SMC
      TranslationBlock *current_tb = NULL;
-    CPUArchState *env = cpu_single_env;
+    CPUState *cpu = current_cpu;
+    CPUArchState *env = NULL;
      int current_tb_modified = 0;
      target_ulong current_pc = 0;
      target_ulong current_cs_base = 0;
@@ -1168,6 +1271,9 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
      if (tb && pc != 0) {
          current_tb = tb_find_pc(pc);
      }
+    if (cpu != NULL) {
+        env = cpu->env_ptr;
+    }
  #endif
      while (tb != NULL) {
          n = (uintptr_t)tb & 3;
@@ -1182,7 +1288,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
                     restore the CPU state */
  
              current_tb_modified = 1;
-            cpu_restore_state_from_tb(current_tb, env, pc);
+            cpu_restore_state_from_tb(cpu, current_tb, pc);
              cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
                                   &current_flags);
          }
@@ -1196,9 +1302,12 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
          /* we generate a block containing just the instruction
             modifying the memory. It will ensure that it cannot modify
             itself */
-        env->current_tb = NULL;
-        tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
-        cpu_resume_from_signal(env, puc);
+        cpu->current_tb = NULL;
+        tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
+        if (locked) {
+            mmap_unlock();
+        }
+        cpu_resume_from_signal(cpu, puc);
      }
  #endif
  }
@@ -1276,7 +1385,7 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
      mmap_lock();
      /* add in the physical hash table */
      h = tb_phys_hash_func(phys_pc);
-    ptb = &tb_phys_hash[h];
+    ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
      tb->phys_hash_next = *ptb;
      *ptb = tb;
  
@@ -1306,17 +1415,6 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
      mmap_unlock();
  }
  
-#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
-/* check whether the given addr is in TCG generated code buffer or not */
-bool is_tcg_gen_code(uintptr_t tc_ptr)
-{
-    /* This can be called during code generation, code_gen_buffer_max_size
-       is used instead of code_gen_ptr for upper boundary checking */
-    return (tc_ptr >= (uintptr_t)code_gen_buffer &&
-            tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
-}
-#endif
-
  /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
     tb[1].tc_ptr. Return NULL if not found */
  static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
@@ -1325,19 +1423,19 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
      uintptr_t v;
      TranslationBlock *tb;
  
-    if (nb_tbs <= 0) {
+    if (tcg_ctx.tb_ctx.nb_tbs <= 0) {
          return NULL;
      }
-    if (tc_ptr < (uintptr_t)code_gen_buffer ||
-        tc_ptr >= (uintptr_t)code_gen_ptr) {
+    if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer ||
+        tc_ptr >= (uintptr_t)tcg_ctx.code_gen_ptr) {
          return NULL;
      }
      /* binary search (cf Knuth) */
      m_min = 0;
-    m_max = nb_tbs - 1;
+    m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
      while (m_min <= m_max) {
          m = (m_min + m_max) >> 1;
-        tb = &tbs[m];
+        tb = &tcg_ctx.tb_ctx.tbs[m];
          v = (uintptr_t)tb->tc_ptr;
          if (v == tc_ptr) {
              return tb;
@@ -1347,118 +1445,48 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
              m_min = m + 1;
          }
      }
-    return &tbs[m_max];
-}
-
-static void tb_reset_jump_recursive(TranslationBlock *tb);
-
-static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
-{
-    TranslationBlock *tb1, *tb_next, **ptb;
-    unsigned int n1;
-
-    tb1 = tb->jmp_next[n];
-    if (tb1 != NULL) {
-        /* find head of list */
-        for (;;) {
-            n1 = (uintptr_t)tb1 & 3;
-            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == 2) {
-                break;
-            }
-            tb1 = tb1->jmp_next[n1];
-        }
-        /* we are now sure now that tb jumps to tb1 */
-        tb_next = tb1;
-
-        /* remove tb from the jmp_first list */
-        ptb = &tb_next->jmp_first;
-        for (;;) {
-            tb1 = *ptb;
-            n1 = (uintptr_t)tb1 & 3;
-            tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
-            if (n1 == n && tb1 == tb) {
-                break;
-            }
-            ptb = &tb1->jmp_next[n1];
-        }
-        *ptb = tb->jmp_next[n];
-        tb->jmp_next[n] = NULL;
-
-        /* suppress the jump to next tb in generated code */
-        tb_reset_jump(tb, n);
-
-        /* suppress jumps in the tb on which we could have jumped */
-        tb_reset_jump_recursive(tb_next);
-    }
-}
-
-static void tb_reset_jump_recursive(TranslationBlock *tb)
-{
-    tb_reset_jump_recursive2(tb, 0);
-    tb_reset_jump_recursive2(tb, 1);
+    return &tcg_ctx.tb_ctx.tbs[m_max];
  }
  
  #if defined(TARGET_HAS_ICE) && !defined(CONFIG_USER_ONLY)
-void tb_invalidate_phys_addr(hwaddr addr)
+void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr)
  {
      ram_addr_t ram_addr;
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
+    hwaddr l = 1;
  
-    section = phys_page_find(address_space_memory.dispatch,
-                             addr >> TARGET_PAGE_BITS);
-    if (!(memory_region_is_ram(section->mr)
-          || (section->mr->rom_device && section->mr->readable))) {
+    mr = address_space_translate(as, addr, &addr, &l, false);
+    if (!(memory_region_is_ram(mr)
+          || memory_region_is_romd(mr))) {
          return;
      }
-    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
-        + memory_region_section_addr(section, addr);
+    ram_addr = (memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK)
+        + addr;
      tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
  }
  #endif /* TARGET_HAS_ICE && !defined(CONFIG_USER_ONLY) */
  
-void cpu_unlink_tb(CPUArchState *env)
+void tb_check_watchpoint(CPUState *cpu)
  {
-    /* FIXME: TB unchaining isn't SMP safe.  For now just ignore the
-       problem and hope the cpu will stop of its own accord.  For userspace
-       emulation this often isn't actually as bad as it sounds.  Often
-       signals are used primarily to interrupt blocking syscalls.  */
      TranslationBlock *tb;
-    static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
  
-    spin_lock(&interrupt_lock);
-    tb = env->current_tb;
-    /* if the cpu is currently executing code, we must unlink it and
-       all the potentially executing TB */
-    if (tb) {
-        env->current_tb = NULL;
-        tb_reset_jump_recursive(tb);
-    }
-    spin_unlock(&interrupt_lock);
-}
-
-void tb_check_watchpoint(CPUArchState *env)
-{
-    TranslationBlock *tb;
-
-    tb = tb_find_pc(env->mem_io_pc);
+    tb = tb_find_pc(cpu->mem_io_pc);
      if (!tb) {
-        cpu_abort(env, "check_watchpoint: could not find TB for pc=%p",
-                  (void *)env->mem_io_pc);
+        cpu_abort(cpu, "check_watchpoint: could not find TB for pc=%p",
+                  (void *)cpu->mem_io_pc);
      }
-    cpu_restore_state_from_tb(tb, env, env->mem_io_pc);
+    cpu_restore_state_from_tb(cpu, tb, cpu->mem_io_pc);
      tb_phys_invalidate(tb, -1);
  }
  
  #ifndef CONFIG_USER_ONLY
  /* mask must never be zero, except for A20 change call */
-static void tcg_handle_interrupt(CPUArchState *env, int mask)
+static void tcg_handle_interrupt(CPUState *cpu, int mask)
  {
-    CPUState *cpu = ENV_GET_CPU(env);
      int old_mask;
  
-    old_mask = env->interrupt_request;
-    env->interrupt_request |= mask;
+    old_mask = cpu->interrupt_request;
+    cpu->interrupt_request |= mask;
  
      /*
       * If called from iothread context, wake the target cpu in
@@ -1470,13 +1498,13 @@ static void tcg_handle_interrupt(CPUArchState *env, int mask)
      }
  
      if (use_icount) {
-        env->icount_decr.u16.high = 0xffff;
-        if (!can_do_io(env)
+        cpu->icount_decr.u16.high = 0xffff;
+        if (!cpu_can_do_io(cpu)
              && (mask & ~old_mask) != 0) {
-            cpu_abort(env, "Raised interrupt while not in I/O function");
+            cpu_abort(cpu, "Raised interrupt while not in I/O function");
          }
      } else {
-        cpu_unlink_tb(env);
+        cpu->tcg_exit_req = 1;
      }
  }
  
@@ -1484,8 +1512,11 @@ CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
  
  /* in deterministic execution mode, instructions doing device I/Os
     must be at the end of the TB */
-void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
+void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
  {
+#if defined(TARGET_MIPS) || defined(TARGET_SH4)
+    CPUArchState *env = cpu->env_ptr;
+#endif
      TranslationBlock *tb;
      uint32_t n, cflags;
      target_ulong pc, cs_base;
@@ -1493,14 +1524,14 @@ void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
  
      tb = tb_find_pc(retaddr);
      if (!tb) {
-        cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
+        cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
                    (void *)retaddr);
      }
-    n = env->icount_decr.u16.low + tb->icount;
-    cpu_restore_state_from_tb(tb, env, retaddr);
+    n = cpu->icount_decr.u16.low + tb->icount;
+    cpu_restore_state_from_tb(cpu, tb, retaddr);
      /* Calculate how many instructions had been executed before the fault
         occurred.  */
-    n = n - env->icount_decr.u16.low;
+    n = n - cpu->icount_decr.u16.low;
      /* Generate a new TB ending on the I/O insn.  */
      n++;
      /* On MIPS and SH, delay slot instructions can only be restarted if
@@ -1509,21 +1540,21 @@ void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
         branch.  */
  #if defined(TARGET_MIPS)
      if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
-        env->active_tc.PC -= 4;
-        env->icount_decr.u16.low++;
+        env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
+        cpu->icount_decr.u16.low++;
          env->hflags &= ~MIPS_HFLAG_BMASK;
      }
  #elif defined(TARGET_SH4)
      if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
              && n > 1) {
          env->pc -= 2;
-        env->icount_decr.u16.low++;
+        cpu->icount_decr.u16.low++;
          env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
      }
  #endif
      /* This should never happen.  */
      if (n > CF_COUNT_MASK) {
-        cpu_abort(env, "TB too big during recompile");
+        cpu_abort(cpu, "TB too big during recompile");
      }
  
      cflags = n | CF_LAST_IO;
@@ -1533,27 +1564,27 @@ void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
      tb_phys_invalidate(tb, -1);
      /* FIXME: In theory this could raise an exception.  In practice
         we have already translated the block once so it's probably ok.  */
-    tb_gen_code(env, pc, cs_base, flags, cflags);
+    tb_gen_code(cpu, pc, cs_base, flags, cflags);
      /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
         the first in the TB) then we end up generating a whole new TB and
         repeating the fault, which is horribly inefficient.
         Better would be to execute just this insn uncached, or generate a
         second new TB.  */
-    cpu_resume_from_signal(env, NULL);
+    cpu_resume_from_signal(cpu, NULL);
  }
  
-void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
+void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
  {
      unsigned int i;
  
      /* Discard jump cache entries for any tb which might potentially
         overlap the flushed page.  */
      i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
-    memset(&env->tb_jmp_cache[i], 0,
+    memset(&cpu->tb_jmp_cache[i], 0,
             TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
  
      i = tb_jmp_cache_hash_page(addr);
-    memset(&env->tb_jmp_cache[i], 0,
+    memset(&cpu->tb_jmp_cache[i], 0,
             TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
  }
  
@@ -1568,8 +1599,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
      cross_page = 0;
      direct_jmp_count = 0;
      direct_jmp2_count = 0;
-    for (i = 0; i < nb_tbs; i++) {
-        tb = &tbs[i];
+    for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
+        tb = &tcg_ctx.tb_ctx.tbs[i];
          target_code_size += tb->size;
          if (tb->size > max_target_code_size) {
              max_target_code_size = tb->size;
@@ -1587,37 +1618,50 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
      /* XXX: avoid using doubles ? */
      cpu_fprintf(f, "Translation buffer state:\n");
      cpu_fprintf(f, "gen code size       %td/%zd\n",
-                code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
+                tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
+                tcg_ctx.code_gen_buffer_max_size);
      cpu_fprintf(f, "TB count            %d/%d\n",
-                nb_tbs, code_gen_max_blocks);
+            tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
      cpu_fprintf(f, "TB avg target size  %d max=%d bytes\n",
-                nb_tbs ? target_code_size / nb_tbs : 0,
-                max_target_code_size);
+            tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
+                    tcg_ctx.tb_ctx.nb_tbs : 0,
+            max_target_code_size);
      cpu_fprintf(f, "TB avg host size    %td bytes (expansion ratio: %0.1f)\n",
-                nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
-                target_code_size ? (double) (code_gen_ptr - code_gen_buffer)
-                / target_code_size : 0);
-    cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
-            cross_page,
-            nb_tbs ? (cross_page * 100) / nb_tbs : 0);
+            tcg_ctx.tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr -
+                                     tcg_ctx.code_gen_buffer) /
+                                     tcg_ctx.tb_ctx.nb_tbs : 0,
+                target_code_size ? (double) (tcg_ctx.code_gen_ptr -
+                                             tcg_ctx.code_gen_buffer) /
+                                             target_code_size : 0);
+    cpu_fprintf(f, "cross page TB count %d (%d%%)\n", cross_page,
+            tcg_ctx.tb_ctx.nb_tbs ? (cross_page * 100) /
+                                    tcg_ctx.tb_ctx.nb_tbs : 0);
      cpu_fprintf(f, "direct jump count   %d (%d%%) (2 jumps=%d %d%%)\n",
                  direct_jmp_count,
-                nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
+                tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp_count * 100) /
+                        tcg_ctx.tb_ctx.nb_tbs : 0,
                  direct_jmp2_count,
-                nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
+                tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) /
+                        tcg_ctx.tb_ctx.nb_tbs : 0);
      cpu_fprintf(f, "\nStatistics:\n");
-    cpu_fprintf(f, "TB flush count      %d\n", tb_flush_count);
-    cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
+    cpu_fprintf(f, "TB flush count      %d\n", tcg_ctx.tb_ctx.tb_flush_count);
+    cpu_fprintf(f, "TB invalidate count %d\n",
+            tcg_ctx.tb_ctx.tb_phys_invalidate_count);
      cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
      tcg_dump_info(f, cpu_fprintf);
  }
  
+void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)
+{
+    tcg_dump_op_count(f, cpu_fprintf);
+}
+
  #else /* CONFIG_USER_ONLY */
  
-void cpu_interrupt(CPUArchState *env, int mask)
+void cpu_interrupt(CPUState *cpu, int mask)
  {
-    env->interrupt_request |= mask;
-    cpu_unlink_tb(env);
+    cpu->interrupt_request |= mask;
+    cpu->tcg_exit_req = 1;
  }
  
  /*
@@ -1627,30 +1671,30 @@ void cpu_interrupt(CPUArchState *env, int mask)
  struct walk_memory_regions_data {
      walk_memory_regions_fn fn;
      void *priv;
-    uintptr_t start;
+    target_ulong start;
      int prot;
  };
  
  static int walk_memory_regions_end(struct walk_memory_regions_data *data,
-                                   abi_ulong end, int new_prot)
+                                   target_ulong end, int new_prot)
  {
-    if (data->start != -1ul) {
+    if (data->start != -1u) {
          int rc = data->fn(data->priv, data->start, end, data->prot);
          if (rc != 0) {
              return rc;
          }
      }
  
-    data->start = (new_prot ? end : -1ul);
+    data->start = (new_prot ? end : -1u);
      data->prot = new_prot;
  
      return 0;
  }
  
  static int walk_memory_regions_1(struct walk_memory_regions_data *data,
-                                 abi_ulong base, int level, void **lp)
+                                 target_ulong base, int level, void **lp)
  {
-    abi_ulong pa;
+    target_ulong pa;
      int i, rc;
  
      if (*lp == NULL) {
@@ -1660,7 +1704,7 @@ static int walk_memory_regions_1(struct walk_memory_regions_data *data,
      if (level == 0) {
          PageDesc *pd = *lp;
  
-        for (i = 0; i < L2_SIZE; ++i) {
+        for (i = 0; i < V_L2_SIZE; ++i) {
              int prot = pd[i].flags;
  
              pa = base | (i << TARGET_PAGE_BITS);
@@ -1674,9 +1718,9 @@ static int walk_memory_regions_1(struct walk_memory_regions_data *data,
      } else {
          void **pp = *lp;
  
-        for (i = 0; i < L2_SIZE; ++i) {
-            pa = base | ((abi_ulong)i <<
-                (TARGET_PAGE_BITS + L2_BITS * level));
+        for (i = 0; i < V_L2_SIZE; ++i) {
+            pa = base | ((target_ulong)i <<
+                (TARGET_PAGE_BITS + V_L2_BITS * level));
              rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
              if (rc != 0) {
                  return rc;
@@ -1694,13 +1738,12 @@ int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
  
      data.fn = fn;
      data.priv = priv;
-    data.start = -1ul;
+    data.start = -1u;
      data.prot = 0;
  
      for (i = 0; i < V_L1_SIZE; i++) {
-        int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
-                                       V_L1_SHIFT / L2_BITS - 1, l1_map + i);
-
+        int rc = walk_memory_regions_1(&data, (target_ulong)i << (V_L1_SHIFT + TARGET_PAGE_BITS),
+                                       V_L1_SHIFT / V_L2_BITS - 1, l1_map + i);
          if (rc != 0) {
              return rc;
          }
@@ -1709,13 +1752,13 @@ int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
      return walk_memory_regions_end(&data, 0, 0);
  }
  
-static int dump_region(void *priv, abi_ulong start,
-    abi_ulong end, unsigned long prot)
+static int dump_region(void *priv, target_ulong start,
+    target_ulong end, unsigned long prot)
  {
      FILE *f = (FILE *)priv;
  
-    (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
-        " "TARGET_ABI_FMT_lx" %c%c%c\n",
+    (void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
+        " "TARGET_FMT_lx" %c%c%c\n",
          start, end, end - start,
          ((prot & PAGE_READ) ? 'r' : '-'),
          ((prot & PAGE_WRITE) ? 'w' : '-'),
@@ -1727,8 +1770,9 @@ static int dump_region(void *priv, abi_ulong start,
  /* dump memory mappings */
  void page_dump(FILE *f)
  {
-    (void) fprintf(f, "%-8s %-8s %-8s %s\n",
-            "start", "end", "size", "prot");
+    const int length = sizeof(target_ulong) * 2;
+    (void) fprintf(f, "%-*s %-*s %-*s %s\n",
+            length, "start", length, "end", length, "size", "prot");
      walk_memory_regions(f, dump_region);
  }
  
@@ -1754,7 +1798,7 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
         guest address space.  If this assert fires, it probably indicates
         a missing call to h2g_valid.  */
  #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
-    assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
+    assert(end < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
  #endif
      assert(start < end);
  
@@ -1775,7 +1819,7 @@ void page_set_flags(target_ulong start, target_ulong end, int flags)
          if (!(p->flags & PAGE_WRITE) &&
              (flags & PAGE_WRITE) &&
              p->first_tb) {
-            tb_invalidate_phys_page(addr, 0, NULL);
+            tb_invalidate_phys_page(addr, 0, NULL, false);
          }
          p->flags = flags;
      }
@@ -1791,7 +1835,7 @@ int page_check_range(target_ulong start, target_ulong len, int flags)
         guest address space.  If this assert fires, it probably indicates
         a missing call to h2g_valid.  */
  #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
-    assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
+    assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
  #endif
  
      if (len == 0) {
@@ -1831,7 +1875,6 @@ int page_check_range(target_ulong start, target_ulong len, int flags)
                      return -1;
                  }
              }
-            return 0;
          }
      }
      return 0;
@@ -1870,7 +1913,7 @@ int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
  
              /* and since the content will be modified, we must invalidate
                 the corresponding translated code. */
-            tb_invalidate_phys_page(addr, pc, puc);
+            tb_invalidate_phys_page(addr, pc, puc, true);
  #ifdef DEBUG_TB_CHECK
              tb_invalidate_check(addr);
  #endif