]> Git Repo - qemu.git/blobdiff - tcg/tcg.c
gen-icount: fold exitreq_label into TCGContext
[qemu.git] / tcg / tcg.c
index b46bf1acdff21933fb51eb655aae39e8bcfcd046..62f418ac8a6846a0f3183142c64a5844e2e02b3d 100644 (file)
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -23,7 +23,6 @@
  */
 
 /* define it to use liveness analysis (better code) */
-#define USE_LIVENESS_ANALYSIS
 #define USE_TCG_OPTIMIZATIONS
 
 #include "qemu/osdep.h"
 /* Define to jump the ELF file used to communicate with GDB.  */
 #undef DEBUG_JIT
 
-#if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
-/* define it to suppress various consistency checks (faster) */
-#define NDEBUG
-#endif
-
 #include "qemu/cutils.h"
 #include "qemu/host-utils.h"
 #include "qemu/timer.h"
@@ -46,6 +40,9 @@
 #define NO_CPU_IO_DEFS
 #include "cpu.h"
 
+#include "exec/cpu-common.h"
+#include "exec/exec-all.h"
+
 #include "tcg-op.h"
 
 #if UINTPTR_MAX == UINT32_MAX
@@ -65,6 +62,7 @@
 /* Forward declarations for functions declared in tcg-target.inc.c and
    used here. */
 static void tcg_target_init(TCGContext *s);
+static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
 static void tcg_target_qemu_prologue(TCGContext *s);
 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
                         intptr_t value, intptr_t addend);
@@ -98,7 +96,8 @@ static void tcg_register_jit_int(void *buf, size_t size,
     __attribute__((unused));
 
 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str);
+static const char *target_parse_constraint(TCGArgConstraint *ct,
+                                           const char *ct_str, TCGType type);
 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
                        intptr_t arg2);
 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
@@ -108,13 +107,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
                        const int *const_args);
 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
                        intptr_t arg2);
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+                        TCGReg base, intptr_t ofs);
 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
 static int tcg_target_const_match(tcg_target_long val, TCGType type,
                                   const TCGArgConstraint *arg_ct);
-static void tcg_out_tb_init(TCGContext *s);
-static bool tcg_out_tb_finalize(TCGContext *s);
-
+#ifdef TCG_TARGET_NEED_LDST_LABELS
+static bool tcg_out_ldst_finalize(TCGContext *s);
+#endif
 
+#define TCG_HIGHWATER 1024
 
 static TCGRegSet tcg_target_available_regs[2];
 static TCGRegSet tcg_target_call_clobber_regs;
@@ -229,7 +231,7 @@ static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
     intptr_t value = (intptr_t)ptr;
     TCGRelocation *r;
 
-    assert(!l->has_value);
+    tcg_debug_assert(!l->has_value);
 
     for (r = l->u.first_reloc; r != NULL; r = r->next) {
         patch_reloc(r->ptr, r->type, value, r->addend);
@@ -241,7 +243,7 @@ static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
 
 TCGLabel *gen_new_label(void)
 {
-    TCGContext *s = &tcg_ctx;
+    TCGContext *s = tcg_ctx;
     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 
     *l = (TCGLabel){
@@ -318,8 +320,10 @@ typedef struct TCGHelperInfo {
 static const TCGHelperInfo all_helpers[] = {
 #include "exec/helper-tcg.h"
 };
+static GHashTable *helper_table;
 
 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
+static void process_op_defs(TCGContext *s);
 
 void tcg_context_init(TCGContext *s)
 {
@@ -327,11 +331,10 @@ void tcg_context_init(TCGContext *s)
     TCGOpDef *def;
     TCGArgConstraint *args_ct;
     int *sorted_args;
-    GHashTable *helper_table;
 
     memset(s, 0, sizeof(*s));
     s->nb_globals = 0;
-    
+
     /* Count total number of arguments and allocate the corresponding
        space */
     total_args = 0;
@@ -355,7 +358,7 @@ void tcg_context_init(TCGContext *s)
 
     /* Register helpers.  */
     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
-    s->helpers = helper_table = g_hash_table_new(NULL, NULL);
+    helper_table = g_hash_table_new(NULL, NULL);
 
     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
@@ -363,6 +366,7 @@ void tcg_context_init(TCGContext *s)
     }
 
     tcg_target_init(s);
+    process_op_defs(s);
 
     /* Reverse the order of the saved registers, assuming they're all at
        the start of tcg_target_reg_alloc_order.  */
@@ -378,6 +382,29 @@ void tcg_context_init(TCGContext *s)
     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
     }
+
+    tcg_ctx = s;
+}
+
+/*
+ * Allocate TBs right before their corresponding translated code, making
+ * sure that TBs and code are on different cache lines.
+ */
+TranslationBlock *tcg_tb_alloc(TCGContext *s)
+{
+    uintptr_t align = qemu_icache_linesize;
+    TranslationBlock *tb;
+    void *next;
+
+    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
+    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
+
+    if (unlikely(next > s->code_gen_highwater)) {
+        return NULL;
+    }
+    s->code_gen_ptr = next;
+    s->data_gen_ptr = NULL;
+    return tb;
 }
 
 void tcg_prologue_init(TCGContext *s)
@@ -407,18 +434,25 @@ void tcg_prologue_init(TCGContext *s)
     /* Compute a high-water mark, at which we voluntarily flush the buffer
        and start over.  The size here is arbitrary, significantly larger
        than we expect the code generation for any one opcode to require.  */
-    s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024);
+    s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
 
     tcg_register_jit(s->code_gen_buffer, total_size);
 
 #ifdef DEBUG_DISAS
     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
+        qemu_log_lock();
         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
         log_disas(buf0, prologue_size);
         qemu_log("\n");
         qemu_log_flush();
+        qemu_log_unlock();
     }
 #endif
+
+    /* Assert that goto_ptr is implemented completely.  */
+    if (TCG_TARGET_HAS_goto_ptr) {
+        tcg_debug_assert(s->code_gen_epilogue != NULL);
+    }
 }
 
 void tcg_func_start(TCGContext *s)
@@ -436,19 +470,9 @@ void tcg_func_start(TCGContext *s)
     s->goto_tb_issue_mask = 0;
 #endif
 
-    s->gen_first_op_idx = 0;
-    s->gen_last_op_idx = -1;
-    s->gen_next_op_idx = 0;
-    s->gen_next_parm_idx = 0;
-
-    s->be = tcg_malloc(sizeof(TCGBackendData));
-}
-
-static inline int temp_idx(TCGContext *s, TCGTemp *ts)
-{
-    ptrdiff_t n = ts - s->temps;
-    tcg_debug_assert(n >= 0 && n < s->nb_temps);
-    return n;
+    s->gen_op_buf[0].next = 1;
+    s->gen_op_buf[0].prev = 0;
+    s->gen_next_op_idx = 1;
 }
 
 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
@@ -460,13 +484,18 @@ static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
 
 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
 {
+    TCGTemp *ts;
+
     tcg_debug_assert(s->nb_globals == s->nb_temps);
     s->nb_globals++;
-    return tcg_temp_alloc(s);
+    ts = tcg_temp_alloc(s);
+    ts->temp_global = 1;
+
+    return ts;
 }
 
-static int tcg_global_reg_new_internal(TCGContext *s, TCGType type,
-                                       TCGReg reg, const char *name)
+static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
+                                            TCGReg reg, const char *name)
 {
     TCGTemp *ts;
 
@@ -482,47 +511,46 @@ static int tcg_global_reg_new_internal(TCGContext *s, TCGType type,
     ts->name = name;
     tcg_regset_set_reg(s->reserved_regs, reg);
 
-    return temp_idx(s, ts);
+    return ts;
 }
 
 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
 {
-    int idx;
     s->frame_start = start;
     s->frame_end = start + size;
-    idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
-    s->frame_temp = &s->temps[idx];
+    s->frame_temp
+        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
 }
 
 TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name)
 {
-    TCGContext *s = &tcg_ctx;
-    int idx;
+    TCGContext *s = tcg_ctx;
+    TCGTemp *t;
 
     if (tcg_regset_test_reg(s->reserved_regs, reg)) {
         tcg_abort();
     }
-    idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name);
-    return MAKE_TCGV_I32(idx);
+    t = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name);
+    return temp_tcgv_i32(t);
 }
 
 TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name)
 {
-    TCGContext *s = &tcg_ctx;
-    int idx;
+    TCGContext *s = tcg_ctx;
+    TCGTemp *t;
 
     if (tcg_regset_test_reg(s->reserved_regs, reg)) {
         tcg_abort();
     }
-    idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name);
-    return MAKE_TCGV_I64(idx);
+    t = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name);
+    return temp_tcgv_i64(t);
 }
 
-int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
-                                intptr_t offset, const char *name)
+TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
+                                     intptr_t offset, const char *name)
 {
-    TCGContext *s = &tcg_ctx;
-    TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)];
+    TCGContext *s = tcg_ctx;
+    TCGTemp *base_ts = tcgv_ptr_temp(base);
     TCGTemp *ts = tcg_global_alloc(s);
     int indirect_reg = 0, bigendian = 0;
 #ifdef HOST_WORDS_BIGENDIAN
@@ -530,8 +558,12 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
 #endif
 
     if (!base_ts->fixed_reg) {
-        indirect_reg = 1;
+        /* We do not support double-indirect registers.  */
+        tcg_debug_assert(!base_ts->indirect_reg);
         base_ts->indirect_base = 1;
+        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
+                            ? 2 : 1);
+        indirect_reg = 1;
     }
 
     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
@@ -557,7 +589,7 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
         ts2->mem_offset = offset + (1 - bigendian) * 4;
         pstrcpy(buf, sizeof(buf), name);
         pstrcat(buf, sizeof(buf), "_1");
-        ts->name = strdup(buf);
+        ts2->name = strdup(buf);
     } else {
         ts->base_type = type;
         ts->type = type;
@@ -567,12 +599,12 @@ int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
         ts->mem_offset = offset;
         ts->name = name;
     }
-    return temp_idx(s, ts);
+    return ts;
 }
 
-static int tcg_temp_new_internal(TCGType type, int temp_local)
+static TCGTemp *tcg_temp_new_internal(TCGType type, int temp_local)
 {
-    TCGContext *s = &tcg_ctx;
+    TCGContext *s = tcg_ctx;
     TCGTemp *ts;
     int idx, k;
 
@@ -607,36 +639,30 @@ static int tcg_temp_new_internal(TCGType type, int temp_local)
             ts->temp_allocated = 1;
             ts->temp_local = temp_local;
         }
-        idx = temp_idx(s, ts);
     }
 
 #if defined(CONFIG_DEBUG_TCG)
     s->temps_in_use++;
 #endif
-    return idx;
+    return ts;
 }
 
 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
 {
-    int idx;
-
-    idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
-    return MAKE_TCGV_I32(idx);
+    TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
+    return temp_tcgv_i32(t);
 }
 
 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
 {
-    int idx;
-
-    idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
-    return MAKE_TCGV_I64(idx);
+    TCGTemp *t = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
+    return temp_tcgv_i64(t);
 }
 
-static void tcg_temp_free_internal(int idx)
+static void tcg_temp_free_internal(TCGTemp *ts)
 {
-    TCGContext *s = &tcg_ctx;
-    TCGTemp *ts;
-    int k;
+    TCGContext *s = tcg_ctx;
+    int k, idx;
 
 #if defined(CONFIG_DEBUG_TCG)
     s->temps_in_use--;
@@ -645,23 +671,23 @@ static void tcg_temp_free_internal(int idx)
     }
 #endif
 
-    assert(idx >= s->nb_globals && idx < s->nb_temps);
-    ts = &s->temps[idx];
-    assert(ts->temp_allocated != 0);
+    tcg_debug_assert(ts->temp_global == 0);
+    tcg_debug_assert(ts->temp_allocated != 0);
     ts->temp_allocated = 0;
 
+    idx = temp_idx(ts);
     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
     set_bit(idx, s->free_temps[k].l);
 }
 
 void tcg_temp_free_i32(TCGv_i32 arg)
 {
-    tcg_temp_free_internal(GET_TCGV_I32(arg));
+    tcg_temp_free_internal(tcgv_i32_temp(arg));
 }
 
 void tcg_temp_free_i64(TCGv_i64 arg)
 {
-    tcg_temp_free_internal(GET_TCGV_I64(arg));
+    tcg_temp_free_internal(tcgv_i64_temp(arg));
 }
 
 TCGv_i32 tcg_const_i32(int32_t val)
@@ -699,13 +725,13 @@ TCGv_i64 tcg_const_local_i64(int64_t val)
 #if defined(CONFIG_DEBUG_TCG)
 void tcg_clear_temp_count(void)
 {
-    TCGContext *s = &tcg_ctx;
+    TCGContext *s = tcg_ctx;
     s->temps_in_use = 0;
 }
 
 int tcg_check_temp_count(void)
 {
-    TCGContext *s = &tcg_ctx;
+    TCGContext *s = tcg_ctx;
     if (s->temps_in_use) {
         /* Clear the count so that we don't give another
          * warning immediately next time around.
@@ -717,17 +743,241 @@ int tcg_check_temp_count(void)
 }
 #endif
 
+/* Return true if OP may appear in the opcode stream.
+   Test the runtime variable that controls each opcode.  */
+bool tcg_op_supported(TCGOpcode op)
+{
+    switch (op) {
+    case INDEX_op_discard:
+    case INDEX_op_set_label:
+    case INDEX_op_call:
+    case INDEX_op_br:
+    case INDEX_op_mb:
+    case INDEX_op_insn_start:
+    case INDEX_op_exit_tb:
+    case INDEX_op_goto_tb:
+    case INDEX_op_qemu_ld_i32:
+    case INDEX_op_qemu_st_i32:
+    case INDEX_op_qemu_ld_i64:
+    case INDEX_op_qemu_st_i64:
+        return true;
+
+    case INDEX_op_goto_ptr:
+        return TCG_TARGET_HAS_goto_ptr;
+
+    case INDEX_op_mov_i32:
+    case INDEX_op_movi_i32:
+    case INDEX_op_setcond_i32:
+    case INDEX_op_brcond_i32:
+    case INDEX_op_ld8u_i32:
+    case INDEX_op_ld8s_i32:
+    case INDEX_op_ld16u_i32:
+    case INDEX_op_ld16s_i32:
+    case INDEX_op_ld_i32:
+    case INDEX_op_st8_i32:
+    case INDEX_op_st16_i32:
+    case INDEX_op_st_i32:
+    case INDEX_op_add_i32:
+    case INDEX_op_sub_i32:
+    case INDEX_op_mul_i32:
+    case INDEX_op_and_i32:
+    case INDEX_op_or_i32:
+    case INDEX_op_xor_i32:
+    case INDEX_op_shl_i32:
+    case INDEX_op_shr_i32:
+    case INDEX_op_sar_i32:
+        return true;
+
+    case INDEX_op_movcond_i32:
+        return TCG_TARGET_HAS_movcond_i32;
+    case INDEX_op_div_i32:
+    case INDEX_op_divu_i32:
+        return TCG_TARGET_HAS_div_i32;
+    case INDEX_op_rem_i32:
+    case INDEX_op_remu_i32:
+        return TCG_TARGET_HAS_rem_i32;
+    case INDEX_op_div2_i32:
+    case INDEX_op_divu2_i32:
+        return TCG_TARGET_HAS_div2_i32;
+    case INDEX_op_rotl_i32:
+    case INDEX_op_rotr_i32:
+        return TCG_TARGET_HAS_rot_i32;
+    case INDEX_op_deposit_i32:
+        return TCG_TARGET_HAS_deposit_i32;
+    case INDEX_op_extract_i32:
+        return TCG_TARGET_HAS_extract_i32;
+    case INDEX_op_sextract_i32:
+        return TCG_TARGET_HAS_sextract_i32;
+    case INDEX_op_add2_i32:
+        return TCG_TARGET_HAS_add2_i32;
+    case INDEX_op_sub2_i32:
+        return TCG_TARGET_HAS_sub2_i32;
+    case INDEX_op_mulu2_i32:
+        return TCG_TARGET_HAS_mulu2_i32;
+    case INDEX_op_muls2_i32:
+        return TCG_TARGET_HAS_muls2_i32;
+    case INDEX_op_muluh_i32:
+        return TCG_TARGET_HAS_muluh_i32;
+    case INDEX_op_mulsh_i32:
+        return TCG_TARGET_HAS_mulsh_i32;
+    case INDEX_op_ext8s_i32:
+        return TCG_TARGET_HAS_ext8s_i32;
+    case INDEX_op_ext16s_i32:
+        return TCG_TARGET_HAS_ext16s_i32;
+    case INDEX_op_ext8u_i32:
+        return TCG_TARGET_HAS_ext8u_i32;
+    case INDEX_op_ext16u_i32:
+        return TCG_TARGET_HAS_ext16u_i32;
+    case INDEX_op_bswap16_i32:
+        return TCG_TARGET_HAS_bswap16_i32;
+    case INDEX_op_bswap32_i32:
+        return TCG_TARGET_HAS_bswap32_i32;
+    case INDEX_op_not_i32:
+        return TCG_TARGET_HAS_not_i32;
+    case INDEX_op_neg_i32:
+        return TCG_TARGET_HAS_neg_i32;
+    case INDEX_op_andc_i32:
+        return TCG_TARGET_HAS_andc_i32;
+    case INDEX_op_orc_i32:
+        return TCG_TARGET_HAS_orc_i32;
+    case INDEX_op_eqv_i32:
+        return TCG_TARGET_HAS_eqv_i32;
+    case INDEX_op_nand_i32:
+        return TCG_TARGET_HAS_nand_i32;
+    case INDEX_op_nor_i32:
+        return TCG_TARGET_HAS_nor_i32;
+    case INDEX_op_clz_i32:
+        return TCG_TARGET_HAS_clz_i32;
+    case INDEX_op_ctz_i32:
+        return TCG_TARGET_HAS_ctz_i32;
+    case INDEX_op_ctpop_i32:
+        return TCG_TARGET_HAS_ctpop_i32;
+
+    case INDEX_op_brcond2_i32:
+    case INDEX_op_setcond2_i32:
+        return TCG_TARGET_REG_BITS == 32;
+
+    case INDEX_op_mov_i64:
+    case INDEX_op_movi_i64:
+    case INDEX_op_setcond_i64:
+    case INDEX_op_brcond_i64:
+    case INDEX_op_ld8u_i64:
+    case INDEX_op_ld8s_i64:
+    case INDEX_op_ld16u_i64:
+    case INDEX_op_ld16s_i64:
+    case INDEX_op_ld32u_i64:
+    case INDEX_op_ld32s_i64:
+    case INDEX_op_ld_i64:
+    case INDEX_op_st8_i64:
+    case INDEX_op_st16_i64:
+    case INDEX_op_st32_i64:
+    case INDEX_op_st_i64:
+    case INDEX_op_add_i64:
+    case INDEX_op_sub_i64:
+    case INDEX_op_mul_i64:
+    case INDEX_op_and_i64:
+    case INDEX_op_or_i64:
+    case INDEX_op_xor_i64:
+    case INDEX_op_shl_i64:
+    case INDEX_op_shr_i64:
+    case INDEX_op_sar_i64:
+    case INDEX_op_ext_i32_i64:
+    case INDEX_op_extu_i32_i64:
+        return TCG_TARGET_REG_BITS == 64;
+
+    case INDEX_op_movcond_i64:
+        return TCG_TARGET_HAS_movcond_i64;
+    case INDEX_op_div_i64:
+    case INDEX_op_divu_i64:
+        return TCG_TARGET_HAS_div_i64;
+    case INDEX_op_rem_i64:
+    case INDEX_op_remu_i64:
+        return TCG_TARGET_HAS_rem_i64;
+    case INDEX_op_div2_i64:
+    case INDEX_op_divu2_i64:
+        return TCG_TARGET_HAS_div2_i64;
+    case INDEX_op_rotl_i64:
+    case INDEX_op_rotr_i64:
+        return TCG_TARGET_HAS_rot_i64;
+    case INDEX_op_deposit_i64:
+        return TCG_TARGET_HAS_deposit_i64;
+    case INDEX_op_extract_i64:
+        return TCG_TARGET_HAS_extract_i64;
+    case INDEX_op_sextract_i64:
+        return TCG_TARGET_HAS_sextract_i64;
+    case INDEX_op_extrl_i64_i32:
+        return TCG_TARGET_HAS_extrl_i64_i32;
+    case INDEX_op_extrh_i64_i32:
+        return TCG_TARGET_HAS_extrh_i64_i32;
+    case INDEX_op_ext8s_i64:
+        return TCG_TARGET_HAS_ext8s_i64;
+    case INDEX_op_ext16s_i64:
+        return TCG_TARGET_HAS_ext16s_i64;
+    case INDEX_op_ext32s_i64:
+        return TCG_TARGET_HAS_ext32s_i64;
+    case INDEX_op_ext8u_i64:
+        return TCG_TARGET_HAS_ext8u_i64;
+    case INDEX_op_ext16u_i64:
+        return TCG_TARGET_HAS_ext16u_i64;
+    case INDEX_op_ext32u_i64:
+        return TCG_TARGET_HAS_ext32u_i64;
+    case INDEX_op_bswap16_i64:
+        return TCG_TARGET_HAS_bswap16_i64;
+    case INDEX_op_bswap32_i64:
+        return TCG_TARGET_HAS_bswap32_i64;
+    case INDEX_op_bswap64_i64:
+        return TCG_TARGET_HAS_bswap64_i64;
+    case INDEX_op_not_i64:
+        return TCG_TARGET_HAS_not_i64;
+    case INDEX_op_neg_i64:
+        return TCG_TARGET_HAS_neg_i64;
+    case INDEX_op_andc_i64:
+        return TCG_TARGET_HAS_andc_i64;
+    case INDEX_op_orc_i64:
+        return TCG_TARGET_HAS_orc_i64;
+    case INDEX_op_eqv_i64:
+        return TCG_TARGET_HAS_eqv_i64;
+    case INDEX_op_nand_i64:
+        return TCG_TARGET_HAS_nand_i64;
+    case INDEX_op_nor_i64:
+        return TCG_TARGET_HAS_nor_i64;
+    case INDEX_op_clz_i64:
+        return TCG_TARGET_HAS_clz_i64;
+    case INDEX_op_ctz_i64:
+        return TCG_TARGET_HAS_ctz_i64;
+    case INDEX_op_ctpop_i64:
+        return TCG_TARGET_HAS_ctpop_i64;
+    case INDEX_op_add2_i64:
+        return TCG_TARGET_HAS_add2_i64;
+    case INDEX_op_sub2_i64:
+        return TCG_TARGET_HAS_sub2_i64;
+    case INDEX_op_mulu2_i64:
+        return TCG_TARGET_HAS_mulu2_i64;
+    case INDEX_op_muls2_i64:
+        return TCG_TARGET_HAS_muls2_i64;
+    case INDEX_op_muluh_i64:
+        return TCG_TARGET_HAS_muluh_i64;
+    case INDEX_op_mulsh_i64:
+        return TCG_TARGET_HAS_mulsh_i64;
+
+    case NB_OPS:
+        break;
+    }
+    g_assert_not_reached();
+}
+
 /* Note: we convert the 64 bit args to 32 bit and do some alignment
    and endian swap. Maybe it would be better to do the alignment
    and endian swap in tcg_reg_alloc_call(). */
-void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
-                   int nargs, TCGArg *args)
+void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
 {
-    int i, real_args, nb_rets, pi, pi_first;
+    TCGContext *s = tcg_ctx;
+    int i, real_args, nb_rets, pi;
     unsigned sizemask, flags;
     TCGHelperInfo *info;
+    TCGOp *op;
 
-    info = g_hash_table_lookup(s->helpers, (gpointer)func);
+    info = g_hash_table_lookup(helper_table, (gpointer)func);
     flags = info->flags;
     sizemask = info->sizemask;
 
@@ -738,20 +988,20 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
     int orig_sizemask = sizemask;
     int orig_nargs = nargs;
     TCGv_i64 retl, reth;
+    TCGTemp *split_args[MAX_OPC_PARAM];
 
     TCGV_UNUSED_I64(retl);
     TCGV_UNUSED_I64(reth);
     if (sizemask != 0) {
-        TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2);
         for (i = real_args = 0; i < nargs; ++i) {
             int is_64bit = sizemask & (1 << (i+1)*2);
             if (is_64bit) {
-                TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
+                TCGv_i64 orig = temp_tcgv_i64(args[i]);
                 TCGv_i32 h = tcg_temp_new_i32();
                 TCGv_i32 l = tcg_temp_new_i32();
                 tcg_gen_extr_i64_i32(l, h, orig);
-                split_args[real_args++] = GET_TCGV_I32(h);
-                split_args[real_args++] = GET_TCGV_I32(l);
+                split_args[real_args++] = tcgv_i32_temp(h);
+                split_args[real_args++] = tcgv_i32_temp(l);
             } else {
                 split_args[real_args++] = args[i];
             }
@@ -766,19 +1016,31 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
         int is_signed = sizemask & (2 << (i+1)*2);
         if (!is_64bit) {
             TCGv_i64 temp = tcg_temp_new_i64();
-            TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
+            TCGv_i64 orig = temp_tcgv_i64(args[i]);
             if (is_signed) {
                 tcg_gen_ext32s_i64(temp, orig);
             } else {
                 tcg_gen_ext32u_i64(temp, orig);
             }
-            args[i] = GET_TCGV_I64(temp);
+            args[i] = tcgv_i64_temp(temp);
         }
     }
 #endif /* TCG_TARGET_EXTEND_ARGS */
 
-    pi_first = pi = s->gen_next_parm_idx;
-    if (ret != TCG_CALL_DUMMY_ARG) {
+    i = s->gen_next_op_idx;
+    tcg_debug_assert(i < OPC_BUF_SIZE);
+    s->gen_op_buf[0].prev = i;
+    s->gen_next_op_idx = i + 1;
+    op = &s->gen_op_buf[i];
+
+    /* Set links for sequential allocation during translation.  */
+    memset(op, 0, offsetof(TCGOp, args));
+    op->opc = INDEX_op_call;
+    op->prev = i - 1;
+    op->next = i + 1;
+
+    pi = 0;
+    if (ret != NULL) {
 #if defined(__sparc__) && !defined(__arch64__) \
     && !defined(CONFIG_TCG_INTERPRETER)
         if (orig_sizemask & 1) {
@@ -787,31 +1049,33 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
                two return temporaries, and reassemble below.  */
             retl = tcg_temp_new_i64();
             reth = tcg_temp_new_i64();
-            s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth);
-            s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl);
+            op->args[pi++] = tcgv_i64_arg(reth);
+            op->args[pi++] = tcgv_i64_arg(retl);
             nb_rets = 2;
         } else {
-            s->gen_opparam_buf[pi++] = ret;
+            op->args[pi++] = temp_arg(ret);
             nb_rets = 1;
         }
 #else
         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
 #ifdef HOST_WORDS_BIGENDIAN
-            s->gen_opparam_buf[pi++] = ret + 1;
-            s->gen_opparam_buf[pi++] = ret;
+            op->args[pi++] = temp_arg(ret + 1);
+            op->args[pi++] = temp_arg(ret);
 #else
-            s->gen_opparam_buf[pi++] = ret;
-            s->gen_opparam_buf[pi++] = ret + 1;
+            op->args[pi++] = temp_arg(ret);
+            op->args[pi++] = temp_arg(ret + 1);
 #endif
             nb_rets = 2;
         } else {
-            s->gen_opparam_buf[pi++] = ret;
+            op->args[pi++] = temp_arg(ret);
             nb_rets = 1;
         }
 #endif
     } else {
         nb_rets = 0;
     }
+    op->callo = nb_rets;
+
     real_args = 0;
     for (i = 0; i < nargs; i++) {
         int is_64bit = sizemask & (1 << (i+1)*2);
@@ -819,57 +1083,41 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
             /* some targets want aligned 64 bit args */
             if (real_args & 1) {
-                s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG;
+                op->args[pi++] = TCG_CALL_DUMMY_ARG;
                 real_args++;
             }
 #endif
-           /* If stack grows up, then we will be placing successive
-              arguments at lower addresses, which means we need to
-              reverse the order compared to how we would normally
-              treat either big or little-endian.  For those arguments
-              that will wind up in registers, this still works for
-              HPPA (the only current STACK_GROWSUP target) since the
-              argument registers are *also* allocated in decreasing
-              order.  If another such target is added, this logic may
-              have to get more complicated to differentiate between
-              stack arguments and register arguments.  */
+           /* If stack grows up, then we will be placing successive
+              arguments at lower addresses, which means we need to
+              reverse the order compared to how we would normally
+              treat either big or little-endian.  For those arguments
+              that will wind up in registers, this still works for
+              HPPA (the only current STACK_GROWSUP target) since the
+              argument registers are *also* allocated in decreasing
+              order.  If another such target is added, this logic may
+              have to get more complicated to differentiate between
+              stack arguments and register arguments.  */
 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
-            s->gen_opparam_buf[pi++] = args[i] + 1;
-            s->gen_opparam_buf[pi++] = args[i];
+            op->args[pi++] = temp_arg(args[i] + 1);
+            op->args[pi++] = temp_arg(args[i]);
 #else
-            s->gen_opparam_buf[pi++] = args[i];
-            s->gen_opparam_buf[pi++] = args[i] + 1;
+            op->args[pi++] = temp_arg(args[i]);
+            op->args[pi++] = temp_arg(args[i] + 1);
 #endif
             real_args += 2;
             continue;
         }
 
-        s->gen_opparam_buf[pi++] = args[i];
+        op->args[pi++] = temp_arg(args[i]);
         real_args++;
     }
-    s->gen_opparam_buf[pi++] = (uintptr_t)func;
-    s->gen_opparam_buf[pi++] = flags;
-
-    i = s->gen_next_op_idx;
-    tcg_debug_assert(i < OPC_BUF_SIZE);
-    tcg_debug_assert(pi <= OPPARAM_BUF_SIZE);
+    op->args[pi++] = (uintptr_t)func;
+    op->args[pi++] = flags;
+    op->calli = real_args;
 
-    /* Set links for sequential allocation during translation.  */
-    s->gen_op_buf[i] = (TCGOp){
-        .opc = INDEX_op_call,
-        .callo = nb_rets,
-        .calli = real_args,
-        .args = pi_first,
-        .prev = i - 1,
-        .next = i + 1
-    };
-
-    /* Make sure the calli field didn't overflow.  */
-    tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
-
-    s->gen_last_op_idx = i;
-    s->gen_next_op_idx = i + 1;
-    s->gen_next_parm_idx = pi;
+    /* Make sure the fields didn't overflow.  */
+    tcg_debug_assert(op->calli == real_args);
+    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
 
 #if defined(__sparc__) && !defined(__arch64__) \
     && !defined(CONFIG_TCG_INTERPRETER)
@@ -877,10 +1125,8 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
     for (i = real_args = 0; i < orig_nargs; ++i) {
         int is_64bit = orig_sizemask & (1 << (i+1)*2);
         if (is_64bit) {
-            TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]);
-            TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]);
-            tcg_temp_free_i32(h);
-            tcg_temp_free_i32(l);
+            tcg_temp_free_internal(args[real_args++]);
+            tcg_temp_free_internal(args[real_args++]);
         } else {
             real_args++;
         }
@@ -889,7 +1135,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
            Note that describing these as TCGv_i64 eliminates an unnecessary
            zero-extension that tcg_gen_concat_i32_i64 would create.  */
-        tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth);
+        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
         tcg_temp_free_i64(retl);
         tcg_temp_free_i64(reth);
     }
@@ -897,8 +1143,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
     for (i = 0; i < nargs; ++i) {
         int is_64bit = sizemask & (1 << (i+1)*2);
         if (!is_64bit) {
-            TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
-            tcg_temp_free_i64(temp);
+            tcg_temp_free_internal(args[i]);
         }
     }
 #endif /* TCG_TARGET_EXTEND_ARGS */
@@ -906,23 +1151,16 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
 
 static void tcg_reg_alloc_start(TCGContext *s)
 {
-    int i;
+    int i, n;
     TCGTemp *ts;
-    for(i = 0; i < s->nb_globals; i++) {
+
+    for (i = 0, n = s->nb_globals; i < n; i++) {
         ts = &s->temps[i];
-        if (ts->fixed_reg) {
-            ts->val_type = TEMP_VAL_REG;
-        } else {
-            ts->val_type = TEMP_VAL_MEM;
-        }
+        ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
     }
-    for(i = s->nb_globals; i < s->nb_temps; i++) {
+    for (n = s->nb_temps; i < n; i++) {
         ts = &s->temps[i];
-        if (ts->temp_local) {
-            ts->val_type = TEMP_VAL_MEM;
-        } else {
-            ts->val_type = TEMP_VAL_DEAD;
-        }
+        ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
         ts->mem_allocated = 0;
         ts->fixed_reg = 0;
     }
@@ -933,9 +1171,9 @@ static void tcg_reg_alloc_start(TCGContext *s)
 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
                                  TCGTemp *ts)
 {
-    int idx = temp_idx(s, ts);
+    int idx = temp_idx(ts);
 
-    if (idx < s->nb_globals) {
+    if (ts->temp_global) {
         pstrcpy(buf, buf_size, ts->name);
     } else if (ts->temp_local) {
         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
@@ -945,19 +1183,18 @@ static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
     return buf;
 }
 
-static char *tcg_get_arg_str_idx(TCGContext *s, char *buf,
-                                 int buf_size, int idx)
+static char *tcg_get_arg_str(TCGContext *s, char *buf,
+                             int buf_size, TCGArg arg)
 {
-    assert(idx >= 0 && idx < s->nb_temps);
-    return tcg_get_arg_str_ptr(s, buf, buf_size, &s->temps[idx]);
+    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
 }
 
 /* Find helper name.  */
 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
 {
     const char *ret = NULL;
-    if (s->helpers) {
-        TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val);
+    if (helper_table) {
+        TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
         if (info) {
             ret = info->name;
         }
@@ -997,34 +1234,49 @@ static const char * const ldst_name[] =
     [MO_BEQ]  = "beq",
 };
 
+static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
+#ifdef ALIGNED_ONLY
+    [MO_UNALN >> MO_ASHIFT]    = "un+",
+    [MO_ALIGN >> MO_ASHIFT]    = "",
+#else
+    [MO_UNALN >> MO_ASHIFT]    = "",
+    [MO_ALIGN >> MO_ASHIFT]    = "al+",
+#endif
+    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
+    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
+    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
+    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
+    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
+    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
+};
+
 void tcg_dump_ops(TCGContext *s)
 {
     char buf[128];
     TCGOp *op;
     int oi;
 
-    for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
+    for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) {
         int i, k, nb_oargs, nb_iargs, nb_cargs;
         const TCGOpDef *def;
-        const TCGArg *args;
         TCGOpcode c;
+        int col = 0;
 
         op = &s->gen_op_buf[oi];
         c = op->opc;
         def = &tcg_op_defs[c];
-        args = &s->gen_opparam_buf[op->args];
 
         if (c == INDEX_op_insn_start) {
-            qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : "");
+            col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
 
             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
                 target_ulong a;
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-                a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
+                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
 #else
-                a = args[i];
+                a = op->args[i];
 #endif
-                qemu_log(" " TARGET_FMT_lx, a);
+                col += qemu_log(" " TARGET_FMT_lx, a);
             }
         } else if (c == INDEX_op_call) {
             /* variable number of arguments */
@@ -1033,23 +1285,23 @@ void tcg_dump_ops(TCGContext *s)
             nb_cargs = def->nb_cargs;
 
             /* function name, flags, out args */
-            qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
-                     tcg_find_helper(s, args[nb_oargs + nb_iargs]),
-                     args[nb_oargs + nb_iargs + 1], nb_oargs);
+            col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
+                            tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
+                            op->args[nb_oargs + nb_iargs + 1], nb_oargs);
             for (i = 0; i < nb_oargs; i++) {
-                qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
-                                                   args[i]));
+                col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
+                                                       op->args[i]));
             }
             for (i = 0; i < nb_iargs; i++) {
-                TCGArg arg = args[nb_oargs + i];
+                TCGArg arg = op->args[nb_oargs + i];
                 const char *t = "<dummy>";
                 if (arg != TCG_CALL_DUMMY_ARG) {
-                    t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
+                    t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
                 }
-                qemu_log(",%s", t);
+                col += qemu_log(",%s", t);
             }
         } else {
-            qemu_log(" %s ", def->name);
+            col += qemu_log(" %s ", def->name);
 
             nb_oargs = def->nb_oargs;
             nb_iargs = def->nb_iargs;
@@ -1058,17 +1310,17 @@ void tcg_dump_ops(TCGContext *s)
             k = 0;
             for (i = 0; i < nb_oargs; i++) {
                 if (k != 0) {
-                    qemu_log(",");
+                    col += qemu_log(",");
                 }
-                qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
-                                                   args[k++]));
+                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
+                                                      op->args[k++]));
             }
             for (i = 0; i < nb_iargs; i++) {
                 if (k != 0) {
-                    qemu_log(",");
+                    col += qemu_log(",");
                 }
-                qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
-                                                   args[k++]));
+                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
+                                                      op->args[k++]));
             }
             switch (c) {
             case INDEX_op_brcond_i32:
@@ -1079,10 +1331,11 @@ void tcg_dump_ops(TCGContext *s)
             case INDEX_op_brcond_i64:
             case INDEX_op_setcond_i64:
             case INDEX_op_movcond_i64:
-                if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
-                    qemu_log(",%s", cond_name[args[k++]]);
+                if (op->args[k] < ARRAY_SIZE(cond_name)
+                    && cond_name[op->args[k]]) {
+                    col += qemu_log(",%s", cond_name[op->args[k++]]);
                 } else {
-                    qemu_log(",$0x%" TCG_PRIlx, args[k++]);
+                    col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
                 }
                 i = 1;
                 break;
@@ -1091,23 +1344,17 @@ void tcg_dump_ops(TCGContext *s)
             case INDEX_op_qemu_ld_i64:
             case INDEX_op_qemu_st_i64:
                 {
-                    TCGMemOpIdx oi = args[k++];
+                    TCGMemOpIdx oi = op->args[k++];
                     TCGMemOp op = get_memop(oi);
                     unsigned ix = get_mmuidx(oi);
 
                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
-                        qemu_log(",$0x%x,%u", op, ix);
+                        col += qemu_log(",$0x%x,%u", op, ix);
                     } else {
-                        const char *s_al = "", *s_op;
-                        if (op & MO_AMASK) {
-                            if ((op & MO_AMASK) == MO_ALIGN) {
-                                s_al = "al+";
-                            } else {
-                                s_al = "un+";
-                            }
-                        }
+                        const char *s_al, *s_op;
+                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
-                        qemu_log(",%s%s,%u", s_al, s_op, ix);
+                        col += qemu_log(",%s%s,%u", s_al, s_op, ix);
                     }
                     i = 1;
                 }
@@ -1122,14 +1369,40 @@ void tcg_dump_ops(TCGContext *s)
             case INDEX_op_brcond_i32:
             case INDEX_op_brcond_i64:
             case INDEX_op_brcond2_i32:
-                qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
+                col += qemu_log("%s$L%d", k ? "," : "",
+                                arg_label(op->args[k])->id);
                 i++, k++;
                 break;
             default:
                 break;
             }
             for (; i < nb_cargs; i++, k++) {
-                qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
+                col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
+            }
+        }
+        if (op->life) {
+            unsigned life = op->life;
+
+            for (; col < 48; ++col) {
+                putc(' ', qemu_logfile);
+            }
+
+            if (life & (SYNC_ARG * 3)) {
+                qemu_log("  sync:");
+                for (i = 0; i < 2; ++i) {
+                    if (life & (SYNC_ARG << i)) {
+                        qemu_log(" %d", i);
+                    }
+                }
+            }
+            life /= DEAD_ARG;
+            if (life) {
+                qemu_log("  dead:");
+                for (i = 0; life; ++i, life >>= 1) {
+                    if (life & 1) {
+                        qemu_log(" %d", i);
+                    }
+                }
             }
         }
         qemu_log("\n");
@@ -1180,105 +1453,79 @@ static void sort_constraints(TCGOpDef *def, int start, int n)
     }
 }
 
-void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
+static void process_op_defs(TCGContext *s)
 {
     TCGOpcode op;
-    TCGOpDef *def;
-    const char *ct_str;
-    int i, nb_args;
 
-    for(;;) {
-        if (tdefs->op == (TCGOpcode)-1)
-            break;
-        op = tdefs->op;
-        assert((unsigned)op < NB_OPS);
-        def = &tcg_op_defs[op];
-#if defined(CONFIG_DEBUG_TCG)
-        /* Duplicate entry in op definitions? */
-        assert(!def->used);
-        def->used = 1;
-#endif
+    for (op = 0; op < NB_OPS; op++) {
+        TCGOpDef *def = &tcg_op_defs[op];
+        const TCGTargetOpDef *tdefs;
+        TCGType type;
+        int i, nb_args;
+
+        if (def->flags & TCG_OPF_NOT_PRESENT) {
+            continue;
+        }
+
         nb_args = def->nb_iargs + def->nb_oargs;
-        for(i = 0; i < nb_args; i++) {
-            ct_str = tdefs->args_ct_str[i];
-            /* Incomplete TCGTargetOpDef entry? */
-            assert(ct_str != NULL);
-            tcg_regset_clear(def->args_ct[i].u.regs);
+        if (nb_args == 0) {
+            continue;
+        }
+
+        tdefs = tcg_target_op_def(op);
+        /* Missing TCGTargetOpDef entry. */
+        tcg_debug_assert(tdefs != NULL);
+
+        type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
+        for (i = 0; i < nb_args; i++) {
+            const char *ct_str = tdefs->args_ct_str[i];
+            /* Incomplete TCGTargetOpDef entry. */
+            tcg_debug_assert(ct_str != NULL);
+
+            def->args_ct[i].u.regs = 0;
             def->args_ct[i].ct = 0;
-            if (ct_str[0] >= '0' && ct_str[0] <= '9') {
-                int oarg;
-                oarg = ct_str[0] - '0';
-                assert(oarg < def->nb_oargs);
-                assert(def->args_ct[oarg].ct & TCG_CT_REG);
-                /* TCG_CT_ALIAS is for the output arguments. The input
-                   argument is tagged with TCG_CT_IALIAS. */
-                def->args_ct[i] = def->args_ct[oarg];
-                def->args_ct[oarg].ct = TCG_CT_ALIAS;
-                def->args_ct[oarg].alias_index = i;
-                def->args_ct[i].ct |= TCG_CT_IALIAS;
-                def->args_ct[i].alias_index = oarg;
-            } else {
-                for(;;) {
-                    if (*ct_str == '\0')
-                        break;
-                    switch(*ct_str) {
-                    case 'i':
-                        def->args_ct[i].ct |= TCG_CT_CONST;
-                        ct_str++;
-                        break;
-                    default:
-                        if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) {
-                            fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n",
-                                    ct_str, i, def->name);
-                            exit(1);
-                        }
+            while (*ct_str != '\0') {
+                switch(*ct_str) {
+                case '0' ... '9':
+                    {
+                        int oarg = *ct_str - '0';
+                        tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
+                        tcg_debug_assert(oarg < def->nb_oargs);
+                        tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
+                        /* TCG_CT_ALIAS is for the output arguments.
+                           The input is tagged with TCG_CT_IALIAS. */
+                        def->args_ct[i] = def->args_ct[oarg];
+                        def->args_ct[oarg].ct |= TCG_CT_ALIAS;
+                        def->args_ct[oarg].alias_index = i;
+                        def->args_ct[i].ct |= TCG_CT_IALIAS;
+                        def->args_ct[i].alias_index = oarg;
                     }
+                    ct_str++;
+                    break;
+                case '&':
+                    def->args_ct[i].ct |= TCG_CT_NEWREG;
+                    ct_str++;
+                    break;
+                case 'i':
+                    def->args_ct[i].ct |= TCG_CT_CONST;
+                    ct_str++;
+                    break;
+                default:
+                    ct_str = target_parse_constraint(&def->args_ct[i],
+                                                     ct_str, type);
+                    /* Typo in TCGTargetOpDef constraint. */
+                    tcg_debug_assert(ct_str != NULL);
                 }
             }
         }
 
         /* TCGTargetOpDef entry with too much information? */
-        assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
+        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
 
         /* sort the constraints (XXX: this is just an heuristic) */
         sort_constraints(def, 0, def->nb_oargs);
         sort_constraints(def, def->nb_oargs, def->nb_iargs);
-
-#if 0
-        {
-            int i;
-
-            printf("%s: sorted=", def->name);
-            for(i = 0; i < def->nb_oargs + def->nb_iargs; i++)
-                printf(" %d", def->sorted_args[i]);
-            printf("\n");
-        }
-#endif
-        tdefs++;
     }
-
-#if defined(CONFIG_DEBUG_TCG)
-    i = 0;
-    for (op = 0; op < tcg_op_defs_max; op++) {
-        const TCGOpDef *def = &tcg_op_defs[op];
-        if (def->flags & TCG_OPF_NOT_PRESENT) {
-            /* Wrong entry in op definitions? */
-            if (def->used) {
-                fprintf(stderr, "Invalid op definition for %s\n", def->name);
-                i = 1;
-            }
-        } else {
-            /* Missing entry in op definitions? */
-            if (!def->used) {
-                fprintf(stderr, "Missing op definition for %s\n", def->name);
-                i = 1;
-            }
-        }
-    }
-    if (i == 1) {
-        tcg_abort();
-    }
-#endif
 }
 
 void tcg_op_remove(TCGContext *s, TCGOp *op)
@@ -1286,75 +1533,123 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
     int next = op->next;
     int prev = op->prev;
 
-    if (next >= 0) {
-        s->gen_op_buf[next].prev = prev;
-    } else {
-        s->gen_last_op_idx = prev;
-    }
-    if (prev >= 0) {
-        s->gen_op_buf[prev].next = next;
-    } else {
-        s->gen_first_op_idx = next;
-    }
+    /* We should never attempt to remove the list terminator.  */
+    tcg_debug_assert(op != &s->gen_op_buf[0]);
+
+    s->gen_op_buf[next].prev = prev;
+    s->gen_op_buf[prev].next = next;
 
-    memset(op, -1, sizeof(*op));
+    memset(op, 0, sizeof(*op));
 
 #ifdef CONFIG_PROFILER
     s->del_op_count++;
 #endif
 }
 
-#ifdef USE_LIVENESS_ANALYSIS
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
+                            TCGOpcode opc, int nargs)
+{
+    int oi = s->gen_next_op_idx;
+    int prev = old_op->prev;
+    int next = old_op - s->gen_op_buf;
+    TCGOp *new_op;
+
+    tcg_debug_assert(oi < OPC_BUF_SIZE);
+    s->gen_next_op_idx = oi + 1;
+
+    new_op = &s->gen_op_buf[oi];
+    *new_op = (TCGOp){
+        .opc = opc,
+        .prev = prev,
+        .next = next
+    };
+    s->gen_op_buf[prev].next = oi;
+    old_op->prev = oi;
+
+    return new_op;
+}
+
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
+                           TCGOpcode opc, int nargs)
+{
+    int oi = s->gen_next_op_idx;
+    int prev = old_op - s->gen_op_buf;
+    int next = old_op->next;
+    TCGOp *new_op;
+
+    tcg_debug_assert(oi < OPC_BUF_SIZE);
+    s->gen_next_op_idx = oi + 1;
+
+    new_op = &s->gen_op_buf[oi];
+    *new_op = (TCGOp){
+        .opc = opc,
+        .prev = prev,
+        .next = next
+    };
+    s->gen_op_buf[next].prev = oi;
+    old_op->next = oi;
+
+    return new_op;
+}
+
+#define TS_DEAD  1
+#define TS_MEM   2
+
+#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
+#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
+
 /* liveness analysis: end of function: all temps are dead, and globals
    should be in memory. */
-static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
-                                   uint8_t *mem_temps)
+static void tcg_la_func_end(TCGContext *s)
 {
-    memset(dead_temps, 1, s->nb_temps);
-    memset(mem_temps, 1, s->nb_globals);
-    memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
+    int ng = s->nb_globals;
+    int nt = s->nb_temps;
+    int i;
+
+    for (i = 0; i < ng; ++i) {
+        s->temps[i].state = TS_DEAD | TS_MEM;
+    }
+    for (i = ng; i < nt; ++i) {
+        s->temps[i].state = TS_DEAD;
+    }
 }
 
 /* liveness analysis: end of basic block: all temps are dead, globals
    and local temps should be in memory. */
-static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
-                                 uint8_t *mem_temps)
+static void tcg_la_bb_end(TCGContext *s)
 {
+    int ng = s->nb_globals;
+    int nt = s->nb_temps;
     int i;
 
-    memset(dead_temps, 1, s->nb_temps);
-    memset(mem_temps, 1, s->nb_globals);
-    for(i = s->nb_globals; i < s->nb_temps; i++) {
-        mem_temps[i] = s->temps[i].temp_local;
+    for (i = 0; i < ng; ++i) {
+        s->temps[i].state = TS_DEAD | TS_MEM;
+    }
+    for (i = ng; i < nt; ++i) {
+        s->temps[i].state = (s->temps[i].temp_local
+                             ? TS_DEAD | TS_MEM
+                             : TS_DEAD);
     }
 }
 
-/* Liveness analysis : update the opc_dead_args array to tell if a
+/* Liveness analysis : update the opc_arg_life array to tell if a
    given input arguments is dead. Instructions updating dead
    temporaries are removed. */
-static void tcg_liveness_analysis(TCGContext *s)
+static void liveness_pass_1(TCGContext *s)
 {
-    uint8_t *dead_temps, *mem_temps;
-    int oi, oi_prev, nb_ops;
+    int nb_globals = s->nb_globals;
+    int oi, oi_prev;
 
-    nb_ops = s->gen_next_op_idx;
-    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
-    s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
-    
-    dead_temps = tcg_malloc(s->nb_temps);
-    mem_temps = tcg_malloc(s->nb_temps);
-    tcg_la_func_end(s, dead_temps, mem_temps);
+    tcg_la_func_end(s);
 
-    for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
+    for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
         int i, nb_iargs, nb_oargs;
         TCGOpcode opc_new, opc_new2;
         bool have_opc_new2;
-        uint16_t dead_args;
-        uint8_t sync_args;
-        TCGArg arg;
+        TCGLifeData arg_life = 0;
+        TCGTemp *arg_ts;
 
         TCGOp * const op = &s->gen_op_buf[oi];
-        TCGArg * const args = &s->gen_opparam_buf[op->args];
         TCGOpcode opc = op->opc;
         const TCGOpDef *def = &tcg_op_defs[opc];
 
@@ -1367,13 +1662,13 @@ static void tcg_liveness_analysis(TCGContext *s)
 
                 nb_oargs = op->callo;
                 nb_iargs = op->calli;
-                call_flags = args[nb_oargs + nb_iargs + 1];
+                call_flags = op->args[nb_oargs + nb_iargs + 1];
 
                 /* pure functions can be removed if their result is unused */
                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
                     for (i = 0; i < nb_oargs; i++) {
-                        arg = args[i];
-                        if (!dead_temps[arg] || mem_temps[arg]) {
+                        arg_ts = arg_temp(op->args[i]);
+                        if (arg_ts->state != TS_DEAD) {
                             goto do_not_remove_call;
                         }
                     }
@@ -1382,46 +1677,44 @@ static void tcg_liveness_analysis(TCGContext *s)
                 do_not_remove_call:
 
                     /* output args are dead */
-                    dead_args = 0;
-                    sync_args = 0;
                     for (i = 0; i < nb_oargs; i++) {
-                        arg = args[i];
-                        if (dead_temps[arg]) {
-                            dead_args |= (1 << i);
+                        arg_ts = arg_temp(op->args[i]);
+                        if (arg_ts->state & TS_DEAD) {
+                            arg_life |= DEAD_ARG << i;
                         }
-                        if (mem_temps[arg]) {
-                            sync_args |= (1 << i);
+                        if (arg_ts->state & TS_MEM) {
+                            arg_life |= SYNC_ARG << i;
                         }
-                        dead_temps[arg] = 1;
-                        mem_temps[arg] = 0;
+                        arg_ts->state = TS_DEAD;
                     }
 
-                    if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
-                        /* globals should be synced to memory */
-                        memset(mem_temps, 1, s->nb_globals);
-                    }
                     if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
                                         TCG_CALL_NO_READ_GLOBALS))) {
                         /* globals should go back to memory */
-                        memset(dead_temps, 1, s->nb_globals);
+                        for (i = 0; i < nb_globals; i++) {
+                            s->temps[i].state = TS_DEAD | TS_MEM;
+                        }
+                    } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+                        /* globals should be synced to memory */
+                        for (i = 0; i < nb_globals; i++) {
+                            s->temps[i].state |= TS_MEM;
+                        }
                     }
 
                     /* record arguments that die in this helper */
                     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
-                        arg = args[i];
-                        if (arg != TCG_CALL_DUMMY_ARG) {
-                            if (dead_temps[arg]) {
-                                dead_args |= (1 << i);
-                            }
+                        arg_ts = arg_temp(op->args[i]);
+                        if (arg_ts && arg_ts->state & TS_DEAD) {
+                            arg_life |= DEAD_ARG << i;
                         }
                     }
                     /* input arguments are live for preceding opcodes */
-                    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-                        arg = args[i];
-                        dead_temps[arg] = 0;
+                    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+                        arg_ts = arg_temp(op->args[i]);
+                        if (arg_ts) {
+                            arg_ts->state &= ~TS_DEAD;
+                        }
                     }
-                    s->op_dead_args[oi] = dead_args;
-                    s->op_sync_args[oi] = sync_args;
                 }
             }
             break;
@@ -1429,8 +1722,7 @@ static void tcg_liveness_analysis(TCGContext *s)
             break;
         case INDEX_op_discard:
             /* mark the temporary as dead */
-            dead_temps[args[0]] = 1;
-            mem_temps[args[0]] = 0;
+            arg_temp(op->args[0])->state = TS_DEAD;
             break;
 
         case INDEX_op_add2_i32:
@@ -1451,15 +1743,15 @@ static void tcg_liveness_analysis(TCGContext *s)
                the low part.  The result can be optimized to a simple
                add or sub.  This happens often for x86_64 guest when the
                cpu mode is set to 32 bit.  */
-            if (dead_temps[args[1]] && !mem_temps[args[1]]) {
-                if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+            if (arg_temp(op->args[1])->state == TS_DEAD) {
+                if (arg_temp(op->args[0])->state == TS_DEAD) {
                     goto do_remove;
                 }
                 /* Replace the opcode and adjust the args in place,
                    leaving 3 unused args at the end.  */
                 op->opc = opc = opc_new;
-                args[1] = args[2];
-                args[2] = args[4];
+                op->args[1] = op->args[2];
+                op->args[2] = op->args[4];
                 /* Fall through and mark the single-word operation live.  */
                 nb_iargs = 2;
                 nb_oargs = 1;
@@ -1489,22 +1781,21 @@ static void tcg_liveness_analysis(TCGContext *s)
         do_mul2:
             nb_iargs = 2;
             nb_oargs = 2;
-            if (dead_temps[args[1]] && !mem_temps[args[1]]) {
-                if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+            if (arg_temp(op->args[1])->state == TS_DEAD) {
+                if (arg_temp(op->args[0])->state == TS_DEAD) {
                     /* Both parts of the operation are dead.  */
                     goto do_remove;
                 }
                 /* The high part of the operation is dead; generate the low. */
                 op->opc = opc = opc_new;
-                args[1] = args[2];
-                args[2] = args[3];
-            } else if (have_opc_new2 && dead_temps[args[0]]
-                       && !mem_temps[args[0]]) {
+                op->args[1] = op->args[2];
+                op->args[2] = op->args[3];
+            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
                 /* The low part of the operation is dead; generate the high. */
                 op->opc = opc = opc_new2;
-                args[0] = args[1];
-                args[1] = args[2];
-                args[2] = args[3];
+                op->args[0] = op->args[1];
+                op->args[1] = op->args[2];
+                op->args[2] = op->args[3];
             } else {
                 goto do_not_remove;
             }
@@ -1522,8 +1813,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                implies side effects */
             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
                 for (i = 0; i < nb_oargs; i++) {
-                    arg = args[i];
-                    if (!dead_temps[arg] || mem_temps[arg]) {
+                    if (arg_temp(op->args[i])->state != TS_DEAD) {
                         goto do_not_remove;
                     }
                 }
@@ -1532,61 +1822,201 @@ static void tcg_liveness_analysis(TCGContext *s)
             } else {
             do_not_remove:
                 /* output args are dead */
-                dead_args = 0;
-                sync_args = 0;
                 for (i = 0; i < nb_oargs; i++) {
-                    arg = args[i];
-                    if (dead_temps[arg]) {
-                        dead_args |= (1 << i);
+                    arg_ts = arg_temp(op->args[i]);
+                    if (arg_ts->state & TS_DEAD) {
+                        arg_life |= DEAD_ARG << i;
                     }
-                    if (mem_temps[arg]) {
-                        sync_args |= (1 << i);
+                    if (arg_ts->state & TS_MEM) {
+                        arg_life |= SYNC_ARG << i;
                     }
-                    dead_temps[arg] = 1;
-                    mem_temps[arg] = 0;
+                    arg_ts->state = TS_DEAD;
                 }
 
                 /* if end of basic block, update */
                 if (def->flags & TCG_OPF_BB_END) {
-                    tcg_la_bb_end(s, dead_temps, mem_temps);
+                    tcg_la_bb_end(s);
                 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
                     /* globals should be synced to memory */
-                    memset(mem_temps, 1, s->nb_globals);
+                    for (i = 0; i < nb_globals; i++) {
+                        s->temps[i].state |= TS_MEM;
+                    }
                 }
 
                 /* record arguments that die in this opcode */
                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-                    arg = args[i];
-                    if (dead_temps[arg]) {
-                        dead_args |= (1 << i);
+                    arg_ts = arg_temp(op->args[i]);
+                    if (arg_ts->state & TS_DEAD) {
+                        arg_life |= DEAD_ARG << i;
                     }
                 }
                 /* input arguments are live for preceding opcodes */
                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-                    arg = args[i];
-                    dead_temps[arg] = 0;
+                    arg_temp(op->args[i])->state &= ~TS_DEAD;
                 }
-                s->op_dead_args[oi] = dead_args;
-                s->op_sync_args[oi] = sync_args;
             }
             break;
         }
+        op->life = arg_life;
     }
 }
-#else
-/* dummy liveness analysis */
-static void tcg_liveness_analysis(TCGContext *s)
+
+/* Liveness analysis: Convert indirect regs to direct temporaries.  */
+static bool liveness_pass_2(TCGContext *s)
 {
-    int nb_ops = s->gen_next_op_idx;
+    int nb_globals = s->nb_globals;
+    int nb_temps, i, oi, oi_next;
+    bool changes = false;
+
+    /* Create a temporary for each indirect global.  */
+    for (i = 0; i < nb_globals; ++i) {
+        TCGTemp *its = &s->temps[i];
+        if (its->indirect_reg) {
+            TCGTemp *dts = tcg_temp_alloc(s);
+            dts->type = its->type;
+            dts->base_type = its->base_type;
+            its->state_ptr = dts;
+        } else {
+            its->state_ptr = NULL;
+        }
+        /* All globals begin dead.  */
+        its->state = TS_DEAD;
+    }
+    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
+        TCGTemp *its = &s->temps[i];
+        its->state_ptr = NULL;
+        its->state = TS_DEAD;
+    }
+
+    for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
+        TCGOp *op = &s->gen_op_buf[oi];
+        TCGOpcode opc = op->opc;
+        const TCGOpDef *def = &tcg_op_defs[opc];
+        TCGLifeData arg_life = op->life;
+        int nb_iargs, nb_oargs, call_flags;
+        TCGTemp *arg_ts, *dir_ts;
+
+        oi_next = op->next;
+
+        if (opc == INDEX_op_call) {
+            nb_oargs = op->callo;
+            nb_iargs = op->calli;
+            call_flags = op->args[nb_oargs + nb_iargs + 1];
+        } else {
+            nb_iargs = def->nb_iargs;
+            nb_oargs = def->nb_oargs;
+
+            /* Set flags similar to how calls require.  */
+            if (def->flags & TCG_OPF_BB_END) {
+                /* Like writing globals: save_globals */
+                call_flags = 0;
+            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+                /* Like reading globals: sync_globals */
+                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
+            } else {
+                /* No effect on globals.  */
+                call_flags = (TCG_CALL_NO_READ_GLOBALS |
+                              TCG_CALL_NO_WRITE_GLOBALS);
+            }
+        }
 
-    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
-    memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
-    s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
-    memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
+        /* Make sure that input arguments are available.  */
+        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+            arg_ts = arg_temp(op->args[i]);
+            if (arg_ts) {
+                dir_ts = arg_ts->state_ptr;
+                if (dir_ts && arg_ts->state == TS_DEAD) {
+                    TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
+                                      ? INDEX_op_ld_i32
+                                      : INDEX_op_ld_i64);
+                    TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
+
+                    lop->args[0] = temp_arg(dir_ts);
+                    lop->args[1] = temp_arg(arg_ts->mem_base);
+                    lop->args[2] = arg_ts->mem_offset;
+
+                    /* Loaded, but synced with memory.  */
+                    arg_ts->state = TS_MEM;
+                }
+            }
+        }
+
+        /* Perform input replacement, and mark inputs that became dead.
+           No action is required except keeping temp_state up to date
+           so that we reload when needed.  */
+        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+            arg_ts = arg_temp(op->args[i]);
+            if (arg_ts) {
+                dir_ts = arg_ts->state_ptr;
+                if (dir_ts) {
+                    op->args[i] = temp_arg(dir_ts);
+                    changes = true;
+                    if (IS_DEAD_ARG(i)) {
+                        arg_ts->state = TS_DEAD;
+                    }
+                }
+            }
+        }
+
+        /* Liveness analysis should ensure that the following are
+           all correct, for call sites and basic block end points.  */
+        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
+            /* Nothing to do */
+        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
+            for (i = 0; i < nb_globals; ++i) {
+                /* Liveness should see that globals are synced back,
+                   that is, either TS_DEAD or TS_MEM.  */
+                arg_ts = &s->temps[i];
+                tcg_debug_assert(arg_ts->state_ptr == 0
+                                 || arg_ts->state != 0);
+            }
+        } else {
+            for (i = 0; i < nb_globals; ++i) {
+                /* Liveness should see that globals are saved back,
+                   that is, TS_DEAD, waiting to be reloaded.  */
+                arg_ts = &s->temps[i];
+                tcg_debug_assert(arg_ts->state_ptr == 0
+                                 || arg_ts->state == TS_DEAD);
+            }
+        }
+
+        /* Outputs become available.  */
+        for (i = 0; i < nb_oargs; i++) {
+            arg_ts = arg_temp(op->args[i]);
+            dir_ts = arg_ts->state_ptr;
+            if (!dir_ts) {
+                continue;
+            }
+            op->args[i] = temp_arg(dir_ts);
+            changes = true;
+
+            /* The output is now live and modified.  */
+            arg_ts->state = 0;
+
+            /* Sync outputs upon their last write.  */
+            if (NEED_SYNC_ARG(i)) {
+                TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
+                                  ? INDEX_op_st_i32
+                                  : INDEX_op_st_i64);
+                TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
+
+                sop->args[0] = temp_arg(dir_ts);
+                sop->args[1] = temp_arg(arg_ts->mem_base);
+                sop->args[2] = arg_ts->mem_offset;
+
+                arg_ts->state = TS_MEM;
+            }
+            /* Drop outputs that are dead.  */
+            if (IS_DEAD_ARG(i)) {
+                arg_ts->state = TS_DEAD;
+            }
+        }
+    }
+
+    return changes;
 }
-#endif
 
-#ifndef NDEBUG
+#ifdef CONFIG_DEBUG_TCG
 static void dump_regs(TCGContext *s)
 {
     TCGTemp *ts;
@@ -1595,7 +2025,7 @@ static void dump_regs(TCGContext *s)
 
     for(i = 0; i < s->nb_temps; i++) {
         ts = &s->temps[i];
-        printf("  %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i));
+        printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
         switch(ts->val_type) {
         case TEMP_VAL_REG:
             printf("%s", tcg_target_reg_names[ts->reg]);
@@ -1658,10 +2088,8 @@ static void check_regs(TCGContext *s)
 }
 #endif
 
-static void temp_allocate_frame(TCGContext *s, int temp)
+static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
 {
-    TCGTemp *ts;
-    ts = &s->temps[temp];
 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
     /* Sparc64 stack is accessed with offset of 2047 */
     s->current_frame_offset = (s->current_frame_offset +
@@ -1680,35 +2108,81 @@ static void temp_allocate_frame(TCGContext *s, int temp)
 
 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
 
-/* sync register 'reg' by saving it to the corresponding temporary */
-static void tcg_reg_sync(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
+/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
+   mark it free; otherwise mark it dead.  */
+static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
 {
-    TCGTemp *ts = s->reg_to_temp[reg];
+    if (ts->fixed_reg) {
+        return;
+    }
+    if (ts->val_type == TEMP_VAL_REG) {
+        s->reg_to_temp[ts->reg] = NULL;
+    }
+    ts->val_type = (free_or_dead < 0
+                    || ts->temp_local
+                    || ts->temp_global
+                    ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
+}
 
-    assert(ts->val_type == TEMP_VAL_REG);
-    if (!ts->mem_coherent && !ts->fixed_reg) {
+/* Mark a temporary as dead.  */
+static inline void temp_dead(TCGContext *s, TCGTemp *ts)
+{
+    temp_free_or_dead(s, ts, 1);
+}
+
+/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
+   registers needs to be allocated to store a constant.  If 'free_or_dead'
+   is non-zero, subsequently release the temporary; if it is positive, the
+   temp is dead; if it is negative, the temp is free.  */
+static void temp_sync(TCGContext *s, TCGTemp *ts,
+                      TCGRegSet allocated_regs, int free_or_dead)
+{
+    if (ts->fixed_reg) {
+        return;
+    }
+    if (!ts->mem_coherent) {
         if (!ts->mem_allocated) {
-            temp_allocate_frame(s, temp_idx(s, ts));
-        } else if (ts->indirect_reg) {
-            tcg_regset_set_reg(allocated_regs, ts->reg);
-            temp_load(s, ts->mem_base,
-                      tcg_target_available_regs[TCG_TYPE_PTR],
+            temp_allocate_frame(s, ts);
+        }
+        switch (ts->val_type) {
+        case TEMP_VAL_CONST:
+            /* If we're going to free the temp immediately, then we won't
+               require it later in a register, so attempt to store the
+               constant to memory directly.  */
+            if (free_or_dead
+                && tcg_out_sti(s, ts->type, ts->val,
+                               ts->mem_base->reg, ts->mem_offset)) {
+                break;
+            }
+            temp_load(s, ts, tcg_target_available_regs[ts->type],
                       allocated_regs);
+            /* fallthrough */
+
+        case TEMP_VAL_REG:
+            tcg_out_st(s, ts->type, ts->reg,
+                       ts->mem_base->reg, ts->mem_offset);
+            break;
+
+        case TEMP_VAL_MEM:
+            break;
+
+        case TEMP_VAL_DEAD:
+        default:
+            tcg_abort();
         }
-        tcg_out_st(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
+        ts->mem_coherent = 1;
+    }
+    if (free_or_dead) {
+        temp_free_or_dead(s, ts, free_or_dead);
     }
-    ts->mem_coherent = 1;
 }
 
 /* free register 'reg' by spilling the corresponding temporary if necessary */
 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
 {
     TCGTemp *ts = s->reg_to_temp[reg];
-
     if (ts != NULL) {
-        tcg_reg_sync(s, reg, allocated_regs);
-        ts->val_type = TEMP_VAL_MEM;
-        s->reg_to_temp[reg] = NULL;
+        temp_sync(s, ts, allocated_regs, -1);
     }
 }
 
@@ -1721,7 +2195,7 @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
     TCGReg reg;
     TCGRegSet reg_ct;
 
-    tcg_regset_andnot(reg_ct, desired_regs, allocated_regs);
+    reg_ct = desired_regs & ~allocated_regs;
     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
 
     /* first try free registers */
@@ -1760,12 +2234,6 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
         break;
     case TEMP_VAL_MEM:
         reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
-        if (ts->indirect_reg) {
-            tcg_regset_set_reg(allocated_regs, reg);
-            temp_load(s, ts->mem_base,
-                      tcg_target_available_regs[TCG_TYPE_PTR],
-                      allocated_regs);
-        }
         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
         ts->mem_coherent = 1;
         break;
@@ -1778,57 +2246,13 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
     s->reg_to_temp[reg] = ts;
 }
 
-/* mark a temporary as dead. */
-static inline void temp_dead(TCGContext *s, TCGTemp *ts)
+/* Save a temporary to memory. 'allocated_regs' is used in case a
+   temporary registers needs to be allocated to store a constant.  */
+static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
 {
-    if (ts->fixed_reg) {
-        return;
-    }
-    if (ts->val_type == TEMP_VAL_REG) {
-        s->reg_to_temp[ts->reg] = NULL;
-    }
-    ts->val_type = (temp_idx(s, ts) < s->nb_globals || ts->temp_local
-                    ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
-}
-
-/* sync a temporary to memory. 'allocated_regs' is used in case a
-   temporary registers needs to be allocated to store a constant. */
-static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
-{
-    if (ts->fixed_reg) {
-        return;
-    }
-    switch (ts->val_type) {
-    case TEMP_VAL_CONST:
-        temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs);
-        /* fallthrough */
-    case TEMP_VAL_REG:
-        tcg_reg_sync(s, ts->reg, allocated_regs);
-        break;
-    case TEMP_VAL_DEAD:
-    case TEMP_VAL_MEM:
-        break;
-    default:
-        tcg_abort();
-    }
-}
-
-/* save a temporary to memory. 'allocated_regs' is used in case a
-   temporary registers needs to be allocated to store a constant. */
-static inline void temp_save(TCGContext *s, TCGTemp *ts,
-                             TCGRegSet allocated_regs)
-{
-#ifdef USE_LIVENESS_ANALYSIS
-    /* ??? Liveness does not yet incorporate indirect bases.  */
-    if (!ts->indirect_base) {
-        /* The liveness analysis already ensures that globals are back
-           in memory. Keep an assert for safety. */
-        tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
-        return;
-    }
-#endif
-    temp_sync(s, ts, allocated_regs);
-    temp_dead(s, ts);
+    /* The liveness analysis already ensures that globals are back
+       in memory. Keep an tcg_debug_assert for safety. */
+    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
 }
 
 /* save globals to their canonical location and assume they can be
@@ -1836,9 +2260,9 @@ static inline void temp_save(TCGContext *s, TCGTemp *ts,
    temporary registers needs to be allocated to store a constant. */
 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
 {
-    int i;
+    int i, n;
 
-    for (i = 0; i < s->nb_globals; i++) {
+    for (i = 0, n = s->nb_globals; i < n; i++) {
         temp_save(s, &s->temps[i], allocated_regs);
     }
 }
@@ -1848,20 +2272,13 @@ static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
    temporary registers needs to be allocated to store a constant. */
 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
 {
-    int i;
+    int i, n;
 
-    for (i = 0; i < s->nb_globals; i++) {
+    for (i = 0, n = s->nb_globals; i < n; i++) {
         TCGTemp *ts = &s->temps[i];
-#ifdef USE_LIVENESS_ANALYSIS
-        /* ??? Liveness does not yet incorporate indirect bases.  */
-        if (!ts->indirect_base) {
-            tcg_debug_assert(ts->val_type != TEMP_VAL_REG
-                             || ts->fixed_reg
-                             || ts->mem_coherent);
-            continue;
-        }
-#endif
-        temp_sync(s, ts, allocated_regs);
+        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
+                         || ts->fixed_reg
+                         || ts->mem_coherent);
     }
 }
 
@@ -1876,113 +2293,93 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
         if (ts->temp_local) {
             temp_save(s, ts, allocated_regs);
         } else {
-#ifdef USE_LIVENESS_ANALYSIS
-            /* ??? Liveness does not yet incorporate indirect bases.  */
-            if (!ts->indirect_base) {
-                /* The liveness analysis already ensures that temps are dead.
-                   Keep an assert for safety. */
-                assert(ts->val_type == TEMP_VAL_DEAD);
-                continue;
-            }
-#endif
-            temp_dead(s, ts);
+            /* The liveness analysis already ensures that temps are dead.
+               Keep an tcg_debug_assert for safety. */
+            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
         }
     }
 
     save_globals(s, allocated_regs);
 }
 
-#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
-#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
-
-static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
-                               uint16_t dead_args, uint8_t sync_args)
+static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
+                                  tcg_target_ulong val, TCGLifeData arg_life)
 {
-    TCGTemp *ots;
-    tcg_target_ulong val;
-
-    ots = &s->temps[args[0]];
-    val = args[1];
-
     if (ots->fixed_reg) {
-        /* for fixed registers, we do not do any constant
-           propagation */
+        /* For fixed registers, we do not do any constant propagation.  */
         tcg_out_movi(s, ots->type, ots->reg, val);
-    } else {
-        /* The movi is not explicitly generated here */
-        if (ots->val_type == TEMP_VAL_REG) {
-            s->reg_to_temp[ots->reg] = NULL;
-        }
-        ots->val_type = TEMP_VAL_CONST;
-        ots->val = val;
+        return;
     }
-    if (NEED_SYNC_ARG(0)) {
-        temp_sync(s, ots, s->reserved_regs);
+
+    /* The movi is not explicitly generated here.  */
+    if (ots->val_type == TEMP_VAL_REG) {
+        s->reg_to_temp[ots->reg] = NULL;
     }
-    if (IS_DEAD_ARG(0)) {
+    ots->val_type = TEMP_VAL_CONST;
+    ots->val = val;
+    ots->mem_coherent = 0;
+    if (NEED_SYNC_ARG(0)) {
+        temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
+    } else if (IS_DEAD_ARG(0)) {
         temp_dead(s, ots);
     }
 }
 
-static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
-                              const TCGArg *args, uint16_t dead_args,
-                              uint8_t sync_args)
+static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
+{
+    TCGTemp *ots = arg_temp(op->args[0]);
+    tcg_target_ulong val = op->args[1];
+
+    tcg_reg_alloc_do_movi(s, ots, val, op->life);
+}
+
+static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
 {
+    const TCGLifeData arg_life = op->life;
     TCGRegSet allocated_regs;
     TCGTemp *ts, *ots;
     TCGType otype, itype;
 
-    tcg_regset_set(allocated_regs, s->reserved_regs);
-    ots = &s->temps[args[0]];
-    ts = &s->temps[args[1]];
+    allocated_regs = s->reserved_regs;
+    ots = arg_temp(op->args[0]);
+    ts = arg_temp(op->args[1]);
 
     /* Note that otype != itype for no-op truncation.  */
     otype = ots->type;
     itype = ts->type;
 
-    /* If the source value is not in a register, and we're going to be
-       forced to have it in a register in order to perform the copy,
-       then copy the SOURCE value into its own register first.  That way
-       we don't have to reload SOURCE the next time it is used. */
-    if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
-        || ts->val_type == TEMP_VAL_MEM) {
+    if (ts->val_type == TEMP_VAL_CONST) {
+        /* propagate constant or generate sti */
+        tcg_target_ulong val = ts->val;
+        if (IS_DEAD_ARG(1)) {
+            temp_dead(s, ts);
+        }
+        tcg_reg_alloc_do_movi(s, ots, val, arg_life);
+        return;
+    }
+
+    /* If the source value is in memory we're going to be forced
+       to have it in a register in order to perform the copy.  Copy
+       the SOURCE value into its own register first, that way we
+       don't have to reload SOURCE the next time it is used. */
+    if (ts->val_type == TEMP_VAL_MEM) {
         temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
     }
 
+    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
         /* mov to a non-saved dead register makes no sense (even with
            liveness analysis disabled). */
-        assert(NEED_SYNC_ARG(0));
-        /* The code above should have moved the temp to a register. */
-        assert(ts->val_type == TEMP_VAL_REG);
+        tcg_debug_assert(NEED_SYNC_ARG(0));
         if (!ots->mem_allocated) {
-            temp_allocate_frame(s, args[0]);
-        }
-        if (ots->indirect_reg) {
-            tcg_regset_set_reg(allocated_regs, ts->reg);
-            temp_load(s, ots->mem_base,
-                      tcg_target_available_regs[TCG_TYPE_PTR],
-                      allocated_regs);
+            temp_allocate_frame(s, ots);
         }
         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
         if (IS_DEAD_ARG(1)) {
             temp_dead(s, ts);
         }
         temp_dead(s, ots);
-    } else if (ts->val_type == TEMP_VAL_CONST) {
-        /* propagate constant */
-        if (ots->val_type == TEMP_VAL_REG) {
-            s->reg_to_temp[ots->reg] = NULL;
-        }
-        ots->val_type = TEMP_VAL_CONST;
-        ots->val = ts->val;
-        if (IS_DEAD_ARG(1)) {
-            temp_dead(s, ts);
-        }
     } else {
-        /* The code in the first if block should have moved the
-           temp to a register. */
-        assert(ts->val_type == TEMP_VAL_REG);
         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
             /* the mov can be suppressed */
             if (ots->val_type == TEMP_VAL_REG) {
@@ -2004,17 +2401,17 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
         ots->mem_coherent = 0;
         s->reg_to_temp[ots->reg] = ots;
         if (NEED_SYNC_ARG(0)) {
-            tcg_reg_sync(s, ots->reg, allocated_regs);
+            temp_sync(s, ots, allocated_regs, 0);
         }
     }
 }
 
-static void tcg_reg_alloc_op(TCGContext *s, 
-                             const TCGOpDef *def, TCGOpcode opc,
-                             const TCGArg *args, uint16_t dead_args,
-                             uint8_t sync_args)
+static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
 {
-    TCGRegSet allocated_regs;
+    const TCGLifeData arg_life = op->life;
+    const TCGOpDef * const def = &tcg_op_defs[op->opc];
+    TCGRegSet i_allocated_regs;
+    TCGRegSet o_allocated_regs;
     int i, k, nb_iargs, nb_oargs;
     TCGReg reg;
     TCGArg arg;
@@ -2028,16 +2425,18 @@ static void tcg_reg_alloc_op(TCGContext *s,
 
     /* copy constants */
     memcpy(new_args + nb_oargs + nb_iargs, 
-           args + nb_oargs + nb_iargs, 
+           op->args + nb_oargs + nb_iargs,
            sizeof(TCGArg) * def->nb_cargs);
 
+    i_allocated_regs = s->reserved_regs;
+    o_allocated_regs = s->reserved_regs;
+
     /* satisfy input constraints */ 
-    tcg_regset_set(allocated_regs, s->reserved_regs);
-    for(k = 0; k < nb_iargs; k++) {
+    for (k = 0; k < nb_iargs; k++) {
         i = def->sorted_args[nb_oargs + k];
-        arg = args[i];
+        arg = op->args[i];
         arg_ct = &def->args_ct[i];
-        ts = &s->temps[arg];
+        ts = arg_temp(arg);
 
         if (ts->val_type == TEMP_VAL_CONST
             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
@@ -2047,13 +2446,13 @@ static void tcg_reg_alloc_op(TCGContext *s,
             goto iarg_end;
         }
 
-        temp_load(s, ts, arg_ct->u.regs, allocated_regs);
+        temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
 
         if (arg_ct->ct & TCG_CT_IALIAS) {
             if (ts->fixed_reg) {
                 /* if fixed register, we must allocate a new register
                    if the alias is not the same register */
-                if (arg != args[arg_ct->alias_index])
+                if (arg != op->args[arg_ct->alias_index])
                     goto allocate_in_reg;
             } else {
                 /* if the input is aliased to an output and if it is
@@ -2081,49 +2480,53 @@ static void tcg_reg_alloc_op(TCGContext *s,
         allocate_in_reg:
             /* allocate a new register matching the constraint 
                and move the temporary register into it */
-            reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs,
+            reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
                                 ts->indirect_base);
             tcg_out_mov(s, ts->type, reg, ts->reg);
         }
         new_args[i] = reg;
         const_args[i] = 0;
-        tcg_regset_set_reg(allocated_regs, reg);
+        tcg_regset_set_reg(i_allocated_regs, reg);
     iarg_end: ;
     }
     
     /* mark dead temporaries and free the associated registers */
     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
         if (IS_DEAD_ARG(i)) {
-            temp_dead(s, &s->temps[args[i]]);
+            temp_dead(s, arg_temp(op->args[i]));
         }
     }
 
     if (def->flags & TCG_OPF_BB_END) {
-        tcg_reg_alloc_bb_end(s, allocated_regs);
+        tcg_reg_alloc_bb_end(s, i_allocated_regs);
     } else {
         if (def->flags & TCG_OPF_CALL_CLOBBER) {
             /* XXX: permit generic clobber register list ? */ 
             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
-                    tcg_reg_free(s, i, allocated_regs);
+                    tcg_reg_free(s, i, i_allocated_regs);
                 }
             }
         }
         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
             /* sync globals if the op has side effects and might trigger
                an exception. */
-            sync_globals(s, allocated_regs);
+            sync_globals(s, i_allocated_regs);
         }
         
         /* satisfy the output constraints */
-        tcg_regset_set(allocated_regs, s->reserved_regs);
         for(k = 0; k < nb_oargs; k++) {
             i = def->sorted_args[k];
-            arg = args[i];
+            arg = op->args[i];
             arg_ct = &def->args_ct[i];
-            ts = &s->temps[arg];
-            if (arg_ct->ct & TCG_CT_ALIAS) {
+            ts = arg_temp(arg);
+            if ((arg_ct->ct & TCG_CT_ALIAS)
+                && !const_args[arg_ct->alias_index]) {
                 reg = new_args[arg_ct->alias_index];
+            } else if (arg_ct->ct & TCG_CT_NEWREG) {
+                reg = tcg_reg_alloc(s, arg_ct->u.regs,
+                                    i_allocated_regs | o_allocated_regs,
+                                    ts->indirect_base);
             } else {
                 /* if fixed register, we try to use it */
                 reg = ts->reg;
@@ -2131,10 +2534,10 @@ static void tcg_reg_alloc_op(TCGContext *s,
                     tcg_regset_test_reg(arg_ct->u.regs, reg)) {
                     goto oarg_end;
                 }
-                reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs,
+                reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
                                     ts->indirect_base);
             }
-            tcg_regset_set_reg(allocated_regs, reg);
+            tcg_regset_set_reg(o_allocated_regs, reg);
             /* if a fixed register is used, then a move will be done afterwards */
             if (!ts->fixed_reg) {
                 if (ts->val_type == TEMP_VAL_REG) {
@@ -2153,19 +2556,18 @@ static void tcg_reg_alloc_op(TCGContext *s,
     }
 
     /* emit instruction */
-    tcg_out_op(s, opc, new_args, const_args);
+    tcg_out_op(s, op->opc, new_args, const_args);
     
     /* move the outputs in the correct register if needed */
     for(i = 0; i < nb_oargs; i++) {
-        ts = &s->temps[args[i]];
+        ts = arg_temp(op->args[i]);
         reg = new_args[i];
         if (ts->fixed_reg && ts->reg != reg) {
             tcg_out_mov(s, ts->type, ts->reg, reg);
         }
         if (NEED_SYNC_ARG(i)) {
-            tcg_reg_sync(s, reg, allocated_regs);
-        }
-        if (IS_DEAD_ARG(i)) {
+            temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
+        } else if (IS_DEAD_ARG(i)) {
             temp_dead(s, ts);
         }
     }
@@ -2177,10 +2579,11 @@ static void tcg_reg_alloc_op(TCGContext *s,
 #define STACK_DIR(x) (x)
 #endif
 
-static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
-                               const TCGArg * const args, uint16_t dead_args,
-                               uint8_t sync_args)
+static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
 {
+    const int nb_oargs = op->callo;
+    const int nb_iargs = op->calli;
+    const TCGLifeData arg_life = op->life;
     int flags, nb_regs, i;
     TCGReg reg;
     TCGArg arg;
@@ -2191,8 +2594,8 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
     int allocate_args;
     TCGRegSet allocated_regs;
 
-    func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
-    flags = args[nb_oargs + nb_iargs + 1];
+    func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
+    flags = op->args[nb_oargs + nb_iargs + 1];
 
     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
     if (nb_regs > nb_iargs) {
@@ -2211,13 +2614,13 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
     }
 
     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
-    for(i = nb_regs; i < nb_iargs; i++) {
-        arg = args[nb_oargs + i];
+    for (i = nb_regs; i < nb_iargs; i++) {
+        arg = op->args[nb_oargs + i];
 #ifdef TCG_TARGET_STACK_GROWSUP
         stack_offset -= sizeof(tcg_target_long);
 #endif
         if (arg != TCG_CALL_DUMMY_ARG) {
-            ts = &s->temps[arg];
+            ts = arg_temp(arg);
             temp_load(s, ts, tcg_target_available_regs[ts->type],
                       s->reserved_regs);
             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
@@ -2228,11 +2631,11 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
     }
     
     /* assign input registers */
-    tcg_regset_set(allocated_regs, s->reserved_regs);
-    for(i = 0; i < nb_regs; i++) {
-        arg = args[nb_oargs + i];
+    allocated_regs = s->reserved_regs;
+    for (i = 0; i < nb_regs; i++) {
+        arg = op->args[nb_oargs + i];
         if (arg != TCG_CALL_DUMMY_ARG) {
-            ts = &s->temps[arg];
+            ts = arg_temp(arg);
             reg = tcg_target_call_iarg_regs[i];
             tcg_reg_free(s, reg, allocated_regs);
 
@@ -2241,9 +2644,8 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
                     tcg_out_mov(s, ts->type, reg, ts->reg);
                 }
             } else {
-                TCGRegSet arg_set;
+                TCGRegSet arg_set = 0;
 
-                tcg_regset_clear(arg_set);
                 tcg_regset_set_reg(arg_set, reg);
                 temp_load(s, ts, arg_set, allocated_regs);
             }
@@ -2253,9 +2655,9 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
     }
     
     /* mark dead temporaries and free the associated registers */
-    for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
         if (IS_DEAD_ARG(i)) {
-            temp_dead(s, &s->temps[args[i]]);
+            temp_dead(s, arg_temp(op->args[i]));
         }
     }
     
@@ -2280,10 +2682,10 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
 
     /* assign output registers and emit moves if needed */
     for(i = 0; i < nb_oargs; i++) {
-        arg = args[i];
-        ts = &s->temps[arg];
+        arg = op->args[i];
+        ts = arg_temp(arg);
         reg = tcg_target_call_oarg_regs[i];
-        assert(s->reg_to_temp[reg] == NULL);
+        tcg_debug_assert(s->reg_to_temp[reg] == NULL);
 
         if (ts->fixed_reg) {
             if (ts->reg != reg) {
@@ -2298,9 +2700,8 @@ static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
             ts->mem_coherent = 0;
             s->reg_to_temp[reg] = ts;
             if (NEED_SYNC_ARG(i)) {
-                tcg_reg_sync(s, reg, allocated_regs);
-            }
-            if (IS_DEAD_ARG(i)) {
+                temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
+            } else if (IS_DEAD_ARG(i)) {
                 temp_dead(s, ts);
             }
         }
@@ -2336,7 +2737,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     {
         int n;
 
-        n = s->gen_last_op_idx + 1;
+        n = s->gen_op_buf[0].prev + 1;
         s->op_count += n;
         if (n > s->op_count_max) {
             s->op_count_max = n;
@@ -2353,9 +2754,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
                  && qemu_log_in_addr_range(tb->pc))) {
+        qemu_log_lock();
         qemu_log("OP:\n");
         tcg_dump_ops(s);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 
@@ -2372,7 +2775,25 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     s->la_time -= profile_getclock();
 #endif
 
-    tcg_liveness_analysis(s);
+    liveness_pass_1(s);
+
+    if (s->nb_indirects > 0) {
+#ifdef DEBUG_DISAS
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
+                     && qemu_log_in_addr_range(tb->pc))) {
+            qemu_log_lock();
+            qemu_log("OP before indirect lowering:\n");
+            tcg_dump_ops(s);
+            qemu_log("\n");
+            qemu_log_unlock();
+        }
+#endif
+        /* Replace indirect temps with direct temps.  */
+        if (liveness_pass_2(s)) {
+            /* If changes were made, re-run liveness.  */
+            liveness_pass_1(s);
+        }
+    }
 
 #ifdef CONFIG_PROFILER
     s->la_time += profile_getclock();
@@ -2381,27 +2802,30 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
                  && qemu_log_in_addr_range(tb->pc))) {
+        qemu_log_lock();
         qemu_log("OP after optimization and liveness analysis:\n");
         tcg_dump_ops(s);
         qemu_log("\n");
+        qemu_log_unlock();
     }
 #endif
 
     tcg_reg_alloc_start(s);
 
-    s->code_buf = tb->tc_ptr;
-    s->code_ptr = tb->tc_ptr;
+    s->code_buf = tb->tc.ptr;
+    s->code_ptr = tb->tc.ptr;
 
-    tcg_out_tb_init(s);
+#ifdef TCG_TARGET_NEED_LDST_LABELS
+    s->ldst_labels = NULL;
+#endif
+#ifdef TCG_TARGET_NEED_POOL_LABELS
+    s->pool_labels = NULL;
+#endif
 
     num_insns = -1;
-    for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
+    for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
         TCGOp * const op = &s->gen_op_buf[oi];
-        TCGArg * const args = &s->gen_opparam_buf[op->args];
         TCGOpcode opc = op->opc;
-        const TCGOpDef *def = &tcg_op_defs[opc];
-        uint16_t dead_args = s->op_dead_args[oi];
-        uint8_t sync_args = s->op_sync_args[oi];
 
         oi_next = op->next;
 #ifdef CONFIG_PROFILER
@@ -2411,11 +2835,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
         switch (opc) {
         case INDEX_op_mov_i32:
         case INDEX_op_mov_i64:
-            tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
+            tcg_reg_alloc_mov(s, op);
             break;
         case INDEX_op_movi_i32:
         case INDEX_op_movi_i64:
-            tcg_reg_alloc_movi(s, args, dead_args, sync_args);
+            tcg_reg_alloc_movi(s, op);
             break;
         case INDEX_op_insn_start:
             if (num_insns >= 0) {
@@ -2425,36 +2849,33 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
                 target_ulong a;
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-                a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
+                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
 #else
-                a = args[i];
+                a = op->args[i];
 #endif
                 s->gen_insn_data[num_insns][i] = a;
             }
             break;
         case INDEX_op_discard:
-            temp_dead(s, &s->temps[args[0]]);
+            temp_dead(s, arg_temp(op->args[0]));
             break;
         case INDEX_op_set_label:
             tcg_reg_alloc_bb_end(s, s->reserved_regs);
-            tcg_out_label(s, arg_label(args[0]), s->code_ptr);
+            tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
             break;
         case INDEX_op_call:
-            tcg_reg_alloc_call(s, op->callo, op->calli, args,
-                               dead_args, sync_args);
+            tcg_reg_alloc_call(s, op);
             break;
         default:
             /* Sanity check that we've not introduced any unhandled opcodes. */
-            if (def->flags & TCG_OPF_NOT_PRESENT) {
-                tcg_abort();
-            }
+            tcg_debug_assert(tcg_op_supported(opc));
             /* Note: in order to speed up the code, it would be much
                faster to have specialized register allocator functions for
                some common argument patterns */
-            tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
+            tcg_reg_alloc_op(s, op);
             break;
         }
-#ifndef NDEBUG
+#ifdef CONFIG_DEBUG_TCG
         check_regs(s);
 #endif
         /* Test for (pending) buffer overflow.  The assumption is that any
@@ -2469,9 +2890,16 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
 
     /* Generate TB finalization at the end of block */
-    if (!tcg_out_tb_finalize(s)) {
+#ifdef TCG_TARGET_NEED_LDST_LABELS
+    if (!tcg_out_ldst_finalize(s)) {
         return -1;
     }
+#endif
+#ifdef TCG_TARGET_NEED_POOL_LABELS
+    if (!tcg_out_pool_finalize(s)) {
+        return -1;
+    }
+#endif
 
     /* flush instruction cache */
     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
@@ -2482,7 +2910,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
 #ifdef CONFIG_PROFILER
 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
 {
-    TCGContext *s = &tcg_ctx;
+    TCGContext *s = tcg_ctx;
     int64_t tb_count = s->tb_count;
     int64_t tb_div_count = tb_count ? tb_count : 1;
     int64_t tot = s->interm_time + s->code_time;
This page took 0.114096 seconds and 4 git commands to generate.