X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/3a13c3f34ce2058e0c2decc3b0f9f56be24c9400..9c39b94f1448770e7e573e9516d2483816785d1b:/tcg/tcg-op.c diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 5305f1d34a..87f673ef49 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -22,8 +22,14 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "cpu.h" +#include "exec/exec-all.h" #include "tcg.h" #include "tcg-op.h" +#include "trace-tcg.h" +#include "trace/mem.h" /* Reduce the number of ifdefs below. This assumes that all uses of TCGV_HIGH and TCGV_LOW are properly protected by a conditional that @@ -35,100 +41,118 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); #define TCGV_HIGH TCGV_HIGH_link_error #endif -void tcg_gen_op0(TCGContext *ctx, TCGOpcode opc) +/* Note that this is optimized for sequential allocation during translate. + Up to and including filling in the forward link immediately. We'll do + proper termination of the end of the list after we finish translation. */ + +static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args) { - *ctx->gen_opc_ptr++ = opc; + int oi = ctx->gen_next_op_idx; + int ni = oi + 1; + int pi = oi - 1; + + tcg_debug_assert(oi < OPC_BUF_SIZE); + ctx->gen_op_buf[0].prev = oi; + ctx->gen_next_op_idx = ni; + + ctx->gen_op_buf[oi] = (TCGOp){ + .opc = opc, + .args = args, + .prev = pi, + .next = ni + }; } void tcg_gen_op1(TCGContext *ctx, TCGOpcode opc, TCGArg a1) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; - op[0] = opc; - opp[0] = a1; + tcg_debug_assert(pi + 1 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 1; + ctx->gen_opparam_buf[pi] = a1; - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 1; + tcg_emit_op(ctx, opc, pi); } void tcg_gen_op2(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; - op[0] = opc; - opp[0] = a1; - opp[1] = a2; + tcg_debug_assert(pi + 2 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 2; + ctx->gen_opparam_buf[pi + 0] = a1; + ctx->gen_opparam_buf[pi + 1] = a2; - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 2; + tcg_emit_op(ctx, opc, pi); } void tcg_gen_op3(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; - op[0] = opc; - opp[0] = a1; - opp[1] = a2; - opp[2] = a3; + tcg_debug_assert(pi + 3 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 3; + ctx->gen_opparam_buf[pi + 0] = a1; + ctx->gen_opparam_buf[pi + 1] = a2; + ctx->gen_opparam_buf[pi + 2] = a3; - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 3; + tcg_emit_op(ctx, opc, pi); } void tcg_gen_op4(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; - op[0] = opc; - opp[0] = a1; - opp[1] = a2; - opp[2] = a3; - opp[3] = a4; + tcg_debug_assert(pi + 4 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 4; + ctx->gen_opparam_buf[pi + 0] = a1; + ctx->gen_opparam_buf[pi + 1] = a2; + ctx->gen_opparam_buf[pi + 2] = a3; + ctx->gen_opparam_buf[pi + 3] = a4; - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 4; + tcg_emit_op(ctx, opc, pi); } void tcg_gen_op5(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; - op[0] = opc; - opp[0] = a1; - opp[1] = a2; - opp[2] = a3; - opp[3] = a4; - opp[4] = a5; + tcg_debug_assert(pi + 5 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 5; + ctx->gen_opparam_buf[pi + 0] = a1; + ctx->gen_opparam_buf[pi + 1] = a2; + ctx->gen_opparam_buf[pi + 2] = a3; + ctx->gen_opparam_buf[pi + 3] = a4; + ctx->gen_opparam_buf[pi + 4] = a5; - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 5; + tcg_emit_op(ctx, opc, pi); } void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6) { - uint16_t *op = ctx->gen_opc_ptr; - TCGArg *opp = ctx->gen_opparam_ptr; + int pi = ctx->gen_next_parm_idx; + + tcg_debug_assert(pi + 6 <= OPPARAM_BUF_SIZE); + ctx->gen_next_parm_idx = pi + 6; + ctx->gen_opparam_buf[pi + 0] = a1; + ctx->gen_opparam_buf[pi + 1] = a2; + ctx->gen_opparam_buf[pi + 2] = a3; + ctx->gen_opparam_buf[pi + 3] = a4; + ctx->gen_opparam_buf[pi + 4] = a5; + ctx->gen_opparam_buf[pi + 5] = a6; - op[0] = opc; - opp[0] = a1; - opp[1] = a2; - opp[2] = a3; - opp[3] = a4; - opp[4] = a5; - opp[5] = a6; + tcg_emit_op(ctx, opc, pi); +} - ctx->gen_opc_ptr = op + 1; - ctx->gen_opparam_ptr = opp + 6; +void tcg_gen_mb(TCGBar mb_type) +{ + if (parallel_cpus) { + tcg_gen_op1(&tcg_ctx, INDEX_op_mb, mb_type); + } } /* 32 bit ops */ @@ -264,20 +288,24 @@ void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2) } } -void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, int label) +void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1, TCGv_i32 arg2, TCGLabel *l) { if (cond == TCG_COND_ALWAYS) { - tcg_gen_br(label); + tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { - tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label); + tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_arg(l)); } } -void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, int label) +void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1, int32_t arg2, TCGLabel *l) { - TCGv_i32 t0 = tcg_const_i32(arg2); - tcg_gen_brcond_i32(cond, arg1, t0, label); - tcg_temp_free_i32(t0); + if (cond == TCG_COND_ALWAYS) { + tcg_gen_br(l); + } else if (cond != TCG_COND_NEVER) { + TCGv_i32 t0 = tcg_const_i32(arg2); + tcg_gen_brcond_i32(cond, arg1, t0, l); + tcg_temp_free_i32(t0); + } } void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret, @@ -429,6 +457,117 @@ void tcg_gen_orc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) } } +void tcg_gen_clz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCG_TARGET_HAS_clz_i32) { + tcg_gen_op3_i32(INDEX_op_clz_i32, ret, arg1, arg2); + } else if (TCG_TARGET_HAS_clz_i64) { + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(t1, arg1); + tcg_gen_extu_i32_i64(t2, arg2); + tcg_gen_addi_i64(t2, t2, 32); + tcg_gen_clz_i64(t1, t1, t2); + tcg_gen_extrl_i64_i32(ret, t1); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + tcg_gen_subi_i32(ret, ret, 32); + } else { + gen_helper_clz_i32(ret, arg1, arg2); + } +} + +void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) +{ + TCGv_i32 t = tcg_const_i32(arg2); + tcg_gen_clz_i32(ret, arg1, t); + tcg_temp_free_i32(t); +} + +void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCG_TARGET_HAS_ctz_i32) { + tcg_gen_op3_i32(INDEX_op_ctz_i32, ret, arg1, arg2); + } else if (TCG_TARGET_HAS_ctz_i64) { + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(t1, arg1); + tcg_gen_extu_i32_i64(t2, arg2); + tcg_gen_ctz_i64(t1, t1, t2); + tcg_gen_extrl_i64_i32(ret, t1); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); + } else if (TCG_TARGET_HAS_ctpop_i32 + || TCG_TARGET_HAS_ctpop_i64 + || TCG_TARGET_HAS_clz_i32 + || TCG_TARGET_HAS_clz_i64) { + TCGv_i32 z, t = tcg_temp_new_i32(); + + if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) { + tcg_gen_subi_i32(t, arg1, 1); + tcg_gen_andc_i32(t, t, arg1); + tcg_gen_ctpop_i32(t, t); + } else { + /* Since all non-x86 hosts have clz(0) == 32, don't fight it. */ + tcg_gen_neg_i32(t, arg1); + tcg_gen_and_i32(t, t, arg1); + tcg_gen_clzi_i32(t, t, 32); + tcg_gen_xori_i32(t, t, 31); + } + z = tcg_const_i32(0); + tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t); + tcg_temp_free_i32(t); + tcg_temp_free_i32(z); + } else { + gen_helper_ctz_i32(ret, arg1, arg2); + } +} + +void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2) +{ + if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) { + /* This equivalence has the advantage of not requiring a fixup. */ + TCGv_i32 t = tcg_temp_new_i32(); + tcg_gen_subi_i32(t, arg1, 1); + tcg_gen_andc_i32(t, t, arg1); + tcg_gen_ctpop_i32(ret, t); + tcg_temp_free_i32(t); + } else { + TCGv_i32 t = tcg_const_i32(arg2); + tcg_gen_ctz_i32(ret, arg1, t); + tcg_temp_free_i32(t); + } +} + +void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg) +{ + if (TCG_TARGET_HAS_clz_i32) { + TCGv_i32 t = tcg_temp_new_i32(); + tcg_gen_sari_i32(t, arg, 31); + tcg_gen_xor_i32(t, t, arg); + tcg_gen_clzi_i32(t, t, 32); + tcg_gen_subi_i32(ret, t, 1); + tcg_temp_free_i32(t); + } else { + gen_helper_clrsb_i32(ret, arg); + } +} + +void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1) +{ + if (TCG_TARGET_HAS_ctpop_i32) { + tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1); + } else if (TCG_TARGET_HAS_ctpop_i64) { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(t, arg1); + tcg_gen_ctpop_i64(t, t); + tcg_gen_extrl_i64_i32(ret, t); + tcg_temp_free_i64(t); + } else { + gen_helper_ctpop_i32(ret, arg1); + } +} + void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_rot_i32) { @@ -505,10 +644,11 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, TCGv_i32 t1; tcg_debug_assert(ofs < 32); + tcg_debug_assert(len > 0); tcg_debug_assert(len <= 32); tcg_debug_assert(ofs + len <= 32); - if (ofs == 0 && len == 32) { + if (len == 32) { tcg_gen_mov_i32(ret, arg2); return; } @@ -532,10 +672,197 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2, tcg_temp_free_i32(t1); } +void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg, + unsigned int ofs, unsigned int len) +{ + tcg_debug_assert(ofs < 32); + tcg_debug_assert(len > 0); + tcg_debug_assert(len <= 32); + tcg_debug_assert(ofs + len <= 32); + + if (ofs + len == 32) { + tcg_gen_shli_i32(ret, arg, ofs); + } else if (ofs == 0) { + tcg_gen_andi_i32(ret, arg, (1u << len) - 1); + } else if (TCG_TARGET_HAS_deposit_i32 + && TCG_TARGET_deposit_i32_valid(ofs, len)) { + TCGv_i32 zero = tcg_const_i32(0); + tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len); + tcg_temp_free_i32(zero); + } else { + /* To help two-operand hosts we prefer to zero-extend first, + which allows ARG to stay live. */ + switch (len) { + case 16: + if (TCG_TARGET_HAS_ext16u_i32) { + tcg_gen_ext16u_i32(ret, arg); + tcg_gen_shli_i32(ret, ret, ofs); + return; + } + break; + case 8: + if (TCG_TARGET_HAS_ext8u_i32) { + tcg_gen_ext8u_i32(ret, arg); + tcg_gen_shli_i32(ret, ret, ofs); + return; + } + break; + } + /* Otherwise prefer zero-extension over AND for code size. */ + switch (ofs + len) { + case 16: + if (TCG_TARGET_HAS_ext16u_i32) { + tcg_gen_shli_i32(ret, arg, ofs); + tcg_gen_ext16u_i32(ret, ret); + return; + } + break; + case 8: + if (TCG_TARGET_HAS_ext8u_i32) { + tcg_gen_shli_i32(ret, arg, ofs); + tcg_gen_ext8u_i32(ret, ret); + return; + } + break; + } + tcg_gen_andi_i32(ret, arg, (1u << len) - 1); + tcg_gen_shli_i32(ret, ret, ofs); + } +} + +void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg, + unsigned int ofs, unsigned int len) +{ + tcg_debug_assert(ofs < 32); + tcg_debug_assert(len > 0); + tcg_debug_assert(len <= 32); + tcg_debug_assert(ofs + len <= 32); + + /* Canonicalize certain special cases, even if extract is supported. */ + if (ofs + len == 32) { + tcg_gen_shri_i32(ret, arg, 32 - len); + return; + } + if (ofs == 0) { + tcg_gen_andi_i32(ret, arg, (1u << len) - 1); + return; + } + + if (TCG_TARGET_HAS_extract_i32 + && TCG_TARGET_extract_i32_valid(ofs, len)) { + tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len); + return; + } + + /* Assume that zero-extension, if available, is cheaper than a shift. */ + switch (ofs + len) { + case 16: + if (TCG_TARGET_HAS_ext16u_i32) { + tcg_gen_ext16u_i32(ret, arg); + tcg_gen_shri_i32(ret, ret, ofs); + return; + } + break; + case 8: + if (TCG_TARGET_HAS_ext8u_i32) { + tcg_gen_ext8u_i32(ret, arg); + tcg_gen_shri_i32(ret, ret, ofs); + return; + } + break; + } + + /* ??? Ideally we'd know what values are available for immediate AND. + Assume that 8 bits are available, plus the special case of 16, + so that we get ext8u, ext16u. */ + switch (len) { + case 1 ... 8: case 16: + tcg_gen_shri_i32(ret, arg, ofs); + tcg_gen_andi_i32(ret, ret, (1u << len) - 1); + break; + default: + tcg_gen_shli_i32(ret, arg, 32 - len - ofs); + tcg_gen_shri_i32(ret, ret, 32 - len); + break; + } +} + +void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg, + unsigned int ofs, unsigned int len) +{ + tcg_debug_assert(ofs < 32); + tcg_debug_assert(len > 0); + tcg_debug_assert(len <= 32); + tcg_debug_assert(ofs + len <= 32); + + /* Canonicalize certain special cases, even if extract is supported. */ + if (ofs + len == 32) { + tcg_gen_sari_i32(ret, arg, 32 - len); + return; + } + if (ofs == 0) { + switch (len) { + case 16: + tcg_gen_ext16s_i32(ret, arg); + return; + case 8: + tcg_gen_ext8s_i32(ret, arg); + return; + } + } + + if (TCG_TARGET_HAS_sextract_i32 + && TCG_TARGET_extract_i32_valid(ofs, len)) { + tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len); + return; + } + + /* Assume that sign-extension, if available, is cheaper than a shift. */ + switch (ofs + len) { + case 16: + if (TCG_TARGET_HAS_ext16s_i32) { + tcg_gen_ext16s_i32(ret, arg); + tcg_gen_sari_i32(ret, ret, ofs); + return; + } + break; + case 8: + if (TCG_TARGET_HAS_ext8s_i32) { + tcg_gen_ext8s_i32(ret, arg); + tcg_gen_sari_i32(ret, ret, ofs); + return; + } + break; + } + switch (len) { + case 16: + if (TCG_TARGET_HAS_ext16s_i32) { + tcg_gen_shri_i32(ret, arg, ofs); + tcg_gen_ext16s_i32(ret, ret); + return; + } + break; + case 8: + if (TCG_TARGET_HAS_ext8s_i32) { + tcg_gen_shri_i32(ret, arg, ofs); + tcg_gen_ext8s_i32(ret, ret); + return; + } + break; + } + + tcg_gen_shli_i32(ret, arg, 32 - len - ofs); + tcg_gen_sari_i32(ret, ret, 32 - len); +} + void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret, TCGv_i32 c1, TCGv_i32 c2, TCGv_i32 v1, TCGv_i32 v2) { - if (TCG_TARGET_HAS_movcond_i32) { + if (cond == TCG_COND_ALWAYS) { + tcg_gen_mov_i32(ret, v1); + } else if (cond == TCG_COND_NEVER) { + tcg_gen_mov_i32(ret, v2); + } else if (TCG_TARGET_HAS_movcond_i32) { tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond); } else { TCGv_i32 t0 = tcg_temp_new_i32(); @@ -555,8 +882,6 @@ void tcg_gen_add2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, { if (TCG_TARGET_HAS_add2_i32) { tcg_gen_op6_i32(INDEX_op_add2_i32, rl, rh, al, ah, bl, bh); - /* Allow the optimizer room to replace add2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -574,8 +899,6 @@ void tcg_gen_sub2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 al, { if (TCG_TARGET_HAS_sub2_i32) { tcg_gen_op6_i32(INDEX_op_sub2_i32, rl, rh, al, ah, bl, bh); - /* Allow the optimizer room to replace sub2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -592,8 +915,6 @@ void tcg_gen_mulu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_mulu2_i32) { tcg_gen_op4_i32(INDEX_op_mulu2_i32, rl, rh, arg1, arg2); - /* Allow the optimizer room to replace mulu2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else if (TCG_TARGET_HAS_muluh_i32) { TCGv_i32 t = tcg_temp_new_i32(); tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); @@ -616,8 +937,6 @@ void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) { if (TCG_TARGET_HAS_muls2_i32) { tcg_gen_op4_i32(INDEX_op_muls2_i32, rl, rh, arg1, arg2); - /* Allow the optimizer room to replace muls2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else if (TCG_TARGET_HAS_mulsh_i32) { TCGv_i32 t = tcg_temp_new_i32(); tcg_gen_op3_i32(INDEX_op_mul_i32, t, arg1, arg2); @@ -654,6 +973,33 @@ void tcg_gen_muls2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) } } +void tcg_gen_mulsu2_i32(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, TCGv_i32 arg2) +{ + if (TCG_TARGET_REG_BITS == 32) { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + tcg_gen_mulu2_i32(t0, t1, arg1, arg2); + /* Adjust for negative input for the signed arg1. */ + tcg_gen_sari_i32(t2, arg1, 31); + tcg_gen_and_i32(t2, t2, arg2); + tcg_gen_sub_i32(rh, t1, t2); + tcg_gen_mov_i32(rl, t0); + tcg_temp_free_i32(t0); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); + } else { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + tcg_gen_ext_i32_i64(t0, arg1); + tcg_gen_extu_i32_i64(t1, arg2); + tcg_gen_mul_i64(t0, t0, t1); + tcg_gen_extr_i64_i32(rl, rh, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + } +} + void tcg_gen_ext8s_i32(TCGv_i32 ret, TCGv_i32 arg) { if (TCG_TARGET_HAS_ext8s_i32) { @@ -766,7 +1112,7 @@ void tcg_gen_ld8u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_ld8s_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { tcg_gen_ld8s_i32(TCGV_LOW(ret), arg2, offset); - tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), 31); + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); } void tcg_gen_ld16u_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) @@ -797,7 +1143,7 @@ void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) { /* Since arg2 and ret have different types, they cannot be the same temporary */ -#ifdef TCG_TARGET_WORDS_BIGENDIAN +#ifdef HOST_WORDS_BIGENDIAN tcg_gen_ld_i32(TCGV_HIGH(ret), arg2, offset); tcg_gen_ld_i32(TCGV_LOW(ret), arg2, offset + 4); #else @@ -808,7 +1154,7 @@ void tcg_gen_ld_i64(TCGv_i64 ret, TCGv_ptr arg2, tcg_target_long offset) void tcg_gen_st_i64(TCGv_i64 arg1, TCGv_ptr arg2, tcg_target_long offset) { -#ifdef TCG_TARGET_WORDS_BIGENDIAN +#ifdef HOST_WORDS_BIGENDIAN tcg_gen_st_i32(TCGV_HIGH(arg1), arg2, offset); tcg_gen_st_i32(TCGV_LOW(arg1), arg2, offset + 4); #else @@ -1081,28 +1427,29 @@ void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2) } } -void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, int label) +void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1, TCGv_i64 arg2, TCGLabel *l) { if (cond == TCG_COND_ALWAYS) { - tcg_gen_br(label); + tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { if (TCG_TARGET_REG_BITS == 32) { tcg_gen_op6ii_i32(INDEX_op_brcond2_i32, TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2), - TCGV_HIGH(arg2), cond, label); + TCGV_HIGH(arg2), cond, label_arg(l)); } else { - tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label); + tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, + label_arg(l)); } } } -void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, int label) +void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1, int64_t arg2, TCGLabel *l) { if (cond == TCG_COND_ALWAYS) { - tcg_gen_br(label); + tcg_gen_br(l); } else if (cond != TCG_COND_NEVER) { TCGv_i64 t0 = tcg_const_i64(arg2); - tcg_gen_brcond_i64(cond, arg1, t0, label); + tcg_gen_brcond_i64(cond, arg1, t0, l); tcg_temp_free_i64(t0); } } @@ -1467,6 +1814,115 @@ void tcg_gen_orc_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) } } +void tcg_gen_clz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCG_TARGET_HAS_clz_i64) { + tcg_gen_op3_i64(INDEX_op_clz_i64, ret, arg1, arg2); + } else { + gen_helper_clz_i64(ret, arg1, arg2); + } +} + +void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) +{ + if (TCG_TARGET_REG_BITS == 32 + && TCG_TARGET_HAS_clz_i32 + && arg2 <= 0xffffffffu) { + TCGv_i32 t = tcg_const_i32((uint32_t)arg2 - 32); + tcg_gen_clz_i32(t, TCGV_LOW(arg1), t); + tcg_gen_addi_i32(t, t, 32); + tcg_gen_clz_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), t); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + tcg_temp_free_i32(t); + } else { + TCGv_i64 t = tcg_const_i64(arg2); + tcg_gen_clz_i64(ret, arg1, t); + tcg_temp_free_i64(t); + } +} + +void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) +{ + if (TCG_TARGET_HAS_ctz_i64) { + tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2); + } else if (TCG_TARGET_HAS_ctpop_i64 || TCG_TARGET_HAS_clz_i64) { + TCGv_i64 z, t = tcg_temp_new_i64(); + + if (TCG_TARGET_HAS_ctpop_i64) { + tcg_gen_subi_i64(t, arg1, 1); + tcg_gen_andc_i64(t, t, arg1); + tcg_gen_ctpop_i64(t, t); + } else { + /* Since all non-x86 hosts have clz(0) == 64, don't fight it. */ + tcg_gen_neg_i64(t, arg1); + tcg_gen_and_i64(t, t, arg1); + tcg_gen_clzi_i64(t, t, 64); + tcg_gen_xori_i64(t, t, 63); + } + z = tcg_const_i64(0); + tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t); + tcg_temp_free_i64(t); + tcg_temp_free_i64(z); + } else { + gen_helper_ctz_i64(ret, arg1, arg2); + } +} + +void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2) +{ + if (TCG_TARGET_REG_BITS == 32 + && TCG_TARGET_HAS_ctz_i32 + && arg2 <= 0xffffffffu) { + TCGv_i32 t32 = tcg_const_i32((uint32_t)arg2 - 32); + tcg_gen_ctz_i32(t32, TCGV_HIGH(arg1), t32); + tcg_gen_addi_i32(t32, t32, 32); + tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + tcg_temp_free_i32(t32); + } else if (!TCG_TARGET_HAS_ctz_i64 + && TCG_TARGET_HAS_ctpop_i64 + && arg2 == 64) { + /* This equivalence has the advantage of not requiring a fixup. */ + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_subi_i64(t, arg1, 1); + tcg_gen_andc_i64(t, t, arg1); + tcg_gen_ctpop_i64(ret, t); + tcg_temp_free_i64(t); + } else { + TCGv_i64 t64 = tcg_const_i64(arg2); + tcg_gen_ctz_i64(ret, arg1, t64); + tcg_temp_free_i64(t64); + } +} + +void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg) +{ + if (TCG_TARGET_HAS_clz_i64 || TCG_TARGET_HAS_clz_i32) { + TCGv_i64 t = tcg_temp_new_i64(); + tcg_gen_sari_i64(t, arg, 63); + tcg_gen_xor_i64(t, t, arg); + tcg_gen_clzi_i64(t, t, 64); + tcg_gen_subi_i64(ret, t, 1); + tcg_temp_free_i64(t); + } else { + gen_helper_clrsb_i64(ret, arg); + } +} + +void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1) +{ + if (TCG_TARGET_HAS_ctpop_i64) { + tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1); + } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) { + tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1)); + tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1)); + tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret)); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + } else { + gen_helper_ctpop_i64(ret, arg1); + } +} + void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_rot_i64) { @@ -1541,10 +1997,11 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, TCGv_i64 t1; tcg_debug_assert(ofs < 64); + tcg_debug_assert(len > 0); tcg_debug_assert(len <= 64); tcg_debug_assert(ofs + len <= 64); - if (ofs == 0 && len == 64) { + if (len == 64) { tcg_gen_mov_i64(ret, arg2); return; } @@ -1583,10 +2040,297 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2, tcg_temp_free_i64(t1); } +void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg, + unsigned int ofs, unsigned int len) +{ + tcg_debug_assert(ofs < 64); + tcg_debug_assert(len > 0); + tcg_debug_assert(len <= 64); + tcg_debug_assert(ofs + len <= 64); + + if (ofs + len == 64) { + tcg_gen_shli_i64(ret, arg, ofs); + } else if (ofs == 0) { + tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); + } else if (TCG_TARGET_HAS_deposit_i64 + && TCG_TARGET_deposit_i64_valid(ofs, len)) { + TCGv_i64 zero = tcg_const_i64(0); + tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len); + tcg_temp_free_i64(zero); + } else { + if (TCG_TARGET_REG_BITS == 32) { + if (ofs >= 32) { + tcg_gen_deposit_z_i32(TCGV_HIGH(ret), TCGV_LOW(arg), + ofs - 32, len); + tcg_gen_movi_i32(TCGV_LOW(ret), 0); + return; + } + if (ofs + len <= 32) { + tcg_gen_deposit_z_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + return; + } + } + /* To help two-operand hosts we prefer to zero-extend first, + which allows ARG to stay live. */ + switch (len) { + case 32: + if (TCG_TARGET_HAS_ext32u_i64) { + tcg_gen_ext32u_i64(ret, arg); + tcg_gen_shli_i64(ret, ret, ofs); + return; + } + break; + case 16: + if (TCG_TARGET_HAS_ext16u_i64) { + tcg_gen_ext16u_i64(ret, arg); + tcg_gen_shli_i64(ret, ret, ofs); + return; + } + break; + case 8: + if (TCG_TARGET_HAS_ext8u_i64) { + tcg_gen_ext8u_i64(ret, arg); + tcg_gen_shli_i64(ret, ret, ofs); + return; + } + break; + } + /* Otherwise prefer zero-extension over AND for code size. */ + switch (ofs + len) { + case 32: + if (TCG_TARGET_HAS_ext32u_i64) { + tcg_gen_shli_i64(ret, arg, ofs); + tcg_gen_ext32u_i64(ret, ret); + return; + } + break; + case 16: + if (TCG_TARGET_HAS_ext16u_i64) { + tcg_gen_shli_i64(ret, arg, ofs); + tcg_gen_ext16u_i64(ret, ret); + return; + } + break; + case 8: + if (TCG_TARGET_HAS_ext8u_i64) { + tcg_gen_shli_i64(ret, arg, ofs); + tcg_gen_ext8u_i64(ret, ret); + return; + } + break; + } + tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); + tcg_gen_shli_i64(ret, ret, ofs); + } +} + +void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg, + unsigned int ofs, unsigned int len) +{ + tcg_debug_assert(ofs < 64); + tcg_debug_assert(len > 0); + tcg_debug_assert(len <= 64); + tcg_debug_assert(ofs + len <= 64); + + /* Canonicalize certain special cases, even if extract is supported. */ + if (ofs + len == 64) { + tcg_gen_shri_i64(ret, arg, 64 - len); + return; + } + if (ofs == 0) { + tcg_gen_andi_i64(ret, arg, (1ull << len) - 1); + return; + } + + if (TCG_TARGET_REG_BITS == 32) { + /* Look for a 32-bit extract within one of the two words. */ + if (ofs >= 32) { + tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + return; + } + if (ofs + len <= 32) { + tcg_gen_extract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len); + tcg_gen_movi_i32(TCGV_HIGH(ret), 0); + return; + } + /* The field is split across two words. One double-word + shift is better than two double-word shifts. */ + goto do_shift_and; + } + + if (TCG_TARGET_HAS_extract_i64 + && TCG_TARGET_extract_i64_valid(ofs, len)) { + tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len); + return; + } + + /* Assume that zero-extension, if available, is cheaper than a shift. */ + switch (ofs + len) { + case 32: + if (TCG_TARGET_HAS_ext32u_i64) { + tcg_gen_ext32u_i64(ret, arg); + tcg_gen_shri_i64(ret, ret, ofs); + return; + } + break; + case 16: + if (TCG_TARGET_HAS_ext16u_i64) { + tcg_gen_ext16u_i64(ret, arg); + tcg_gen_shri_i64(ret, ret, ofs); + return; + } + break; + case 8: + if (TCG_TARGET_HAS_ext8u_i64) { + tcg_gen_ext8u_i64(ret, arg); + tcg_gen_shri_i64(ret, ret, ofs); + return; + } + break; + } + + /* ??? Ideally we'd know what values are available for immediate AND. + Assume that 8 bits are available, plus the special cases of 16 and 32, + so that we get ext8u, ext16u, and ext32u. */ + switch (len) { + case 1 ... 8: case 16: case 32: + do_shift_and: + tcg_gen_shri_i64(ret, arg, ofs); + tcg_gen_andi_i64(ret, ret, (1ull << len) - 1); + break; + default: + tcg_gen_shli_i64(ret, arg, 64 - len - ofs); + tcg_gen_shri_i64(ret, ret, 64 - len); + break; + } +} + +void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg, + unsigned int ofs, unsigned int len) +{ + tcg_debug_assert(ofs < 64); + tcg_debug_assert(len > 0); + tcg_debug_assert(len <= 64); + tcg_debug_assert(ofs + len <= 64); + + /* Canonicalize certain special cases, even if sextract is supported. */ + if (ofs + len == 64) { + tcg_gen_sari_i64(ret, arg, 64 - len); + return; + } + if (ofs == 0) { + switch (len) { + case 32: + tcg_gen_ext32s_i64(ret, arg); + return; + case 16: + tcg_gen_ext16s_i64(ret, arg); + return; + case 8: + tcg_gen_ext8s_i64(ret, arg); + return; + } + } + + if (TCG_TARGET_REG_BITS == 32) { + /* Look for a 32-bit extract within one of the two words. */ + if (ofs >= 32) { + tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_HIGH(arg), ofs - 32, len); + } else if (ofs + len <= 32) { + tcg_gen_sextract_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len); + } else if (ofs == 0) { + tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg)); + tcg_gen_sextract_i32(TCGV_HIGH(ret), TCGV_HIGH(arg), 0, len - 32); + return; + } else if (len > 32) { + TCGv_i32 t = tcg_temp_new_i32(); + /* Extract the bits for the high word normally. */ + tcg_gen_sextract_i32(t, TCGV_HIGH(arg), ofs + 32, len - 32); + /* Shift the field down for the low part. */ + tcg_gen_shri_i64(ret, arg, ofs); + /* Overwrite the shift into the high part. */ + tcg_gen_mov_i32(TCGV_HIGH(ret), t); + tcg_temp_free_i32(t); + return; + } else { + /* Shift the field down for the low part, such that the + field sits at the MSB. */ + tcg_gen_shri_i64(ret, arg, ofs + len - 32); + /* Shift the field down from the MSB, sign extending. */ + tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_LOW(ret), 32 - len); + } + /* Sign-extend the field from 32 bits. */ + tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); + return; + } + + if (TCG_TARGET_HAS_sextract_i64 + && TCG_TARGET_extract_i64_valid(ofs, len)) { + tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len); + return; + } + + /* Assume that sign-extension, if available, is cheaper than a shift. */ + switch (ofs + len) { + case 32: + if (TCG_TARGET_HAS_ext32s_i64) { + tcg_gen_ext32s_i64(ret, arg); + tcg_gen_sari_i64(ret, ret, ofs); + return; + } + break; + case 16: + if (TCG_TARGET_HAS_ext16s_i64) { + tcg_gen_ext16s_i64(ret, arg); + tcg_gen_sari_i64(ret, ret, ofs); + return; + } + break; + case 8: + if (TCG_TARGET_HAS_ext8s_i64) { + tcg_gen_ext8s_i64(ret, arg); + tcg_gen_sari_i64(ret, ret, ofs); + return; + } + break; + } + switch (len) { + case 32: + if (TCG_TARGET_HAS_ext32s_i64) { + tcg_gen_shri_i64(ret, arg, ofs); + tcg_gen_ext32s_i64(ret, ret); + return; + } + break; + case 16: + if (TCG_TARGET_HAS_ext16s_i64) { + tcg_gen_shri_i64(ret, arg, ofs); + tcg_gen_ext16s_i64(ret, ret); + return; + } + break; + case 8: + if (TCG_TARGET_HAS_ext8s_i64) { + tcg_gen_shri_i64(ret, arg, ofs); + tcg_gen_ext8s_i64(ret, ret); + return; + } + break; + } + tcg_gen_shli_i64(ret, arg, 64 - len - ofs); + tcg_gen_sari_i64(ret, ret, 64 - len); +} + void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret, TCGv_i64 c1, TCGv_i64 c2, TCGv_i64 v1, TCGv_i64 v2) { - if (TCG_TARGET_REG_BITS == 32) { + if (cond == TCG_COND_ALWAYS) { + tcg_gen_mov_i64(ret, v1); + } else if (cond == TCG_COND_NEVER) { + tcg_gen_mov_i64(ret, v2); + } else if (TCG_TARGET_REG_BITS == 32) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0, @@ -1632,8 +2376,6 @@ void tcg_gen_add2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, { if (TCG_TARGET_HAS_add2_i64) { tcg_gen_op6_i64(INDEX_op_add2_i64, rl, rh, al, ah, bl, bh); - /* Allow the optimizer room to replace add2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -1652,8 +2394,6 @@ void tcg_gen_sub2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 al, { if (TCG_TARGET_HAS_sub2_i64) { tcg_gen_op6_i64(INDEX_op_sub2_i64, rl, rh, al, ah, bl, bh); - /* Allow the optimizer room to replace sub2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -1671,8 +2411,6 @@ void tcg_gen_mulu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_mulu2_i64) { tcg_gen_op4_i64(INDEX_op_mulu2_i64, rl, rh, arg1, arg2); - /* Allow the optimizer room to replace mulu2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else if (TCG_TARGET_HAS_muluh_i64) { TCGv_i64 t = tcg_temp_new_i64(); tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); @@ -1692,8 +2430,6 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) { if (TCG_TARGET_HAS_muls2_i64) { tcg_gen_op4_i64(INDEX_op_muls2_i64, rl, rh, arg1, arg2); - /* Allow the optimizer room to replace muls2 with two moves. */ - tcg_gen_op0(&tcg_ctx, INDEX_op_nop); } else if (TCG_TARGET_HAS_mulsh_i64) { TCGv_i64 t = tcg_temp_new_i64(); tcg_gen_op3_i64(INDEX_op_mul_i64, t, arg1, arg2); @@ -1727,30 +2463,46 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) } } +void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + tcg_gen_mulu2_i64(t0, t1, arg1, arg2); + /* Adjust for negative input for the signed arg1. */ + tcg_gen_sari_i64(t2, arg1, 63); + tcg_gen_and_i64(t2, t2, arg2); + tcg_gen_sub_i64(rh, t1, t2); + tcg_gen_mov_i64(rl, t0); + tcg_temp_free_i64(t0); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); +} + /* Size changing operations. */ -void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned count) +void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { - tcg_debug_assert(count < 64); if (TCG_TARGET_REG_BITS == 32) { - if (count >= 32) { - tcg_gen_shri_i32(ret, TCGV_HIGH(arg), count - 32); - } else if (count == 0) { - tcg_gen_mov_i32(ret, TCGV_LOW(arg)); - } else { - TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_shri_i64(t, arg, count); - tcg_gen_mov_i32(ret, TCGV_LOW(t)); - tcg_temp_free_i64(t); - } - } else if (TCG_TARGET_HAS_trunc_shr_i32) { - tcg_gen_op3i_i32(INDEX_op_trunc_shr_i32, ret, - MAKE_TCGV_I32(GET_TCGV_I64(arg)), count); - } else if (count == 0) { + tcg_gen_mov_i32(ret, TCGV_LOW(arg)); + } else if (TCG_TARGET_HAS_extrl_i64_i32) { + tcg_gen_op2(&tcg_ctx, INDEX_op_extrl_i64_i32, + GET_TCGV_I32(ret), GET_TCGV_I64(arg)); + } else { tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); + } +} + +void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_mov_i32(ret, TCGV_HIGH(arg)); + } else if (TCG_TARGET_HAS_extrh_i64_i32) { + tcg_gen_op2(&tcg_ctx, INDEX_op_extrh_i64_i32, + GET_TCGV_I32(ret), GET_TCGV_I64(arg)); } else { TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_shri_i64(t, arg, count); + tcg_gen_shri_i64(t, arg, 32); tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(t))); tcg_temp_free_i64(t); } @@ -1762,9 +2514,8 @@ void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) tcg_gen_mov_i32(TCGV_LOW(ret), arg); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } else { - /* Note: we assume the target supports move between - 32 and 64 bit registers. */ - tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); + tcg_gen_op2(&tcg_ctx, INDEX_op_extu_i32_i64, + GET_TCGV_I64(ret), GET_TCGV_I32(arg)); } } @@ -1774,9 +2525,8 @@ void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg) tcg_gen_mov_i32(TCGV_LOW(ret), arg); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); } else { - /* Note: we assume the target supports move between - 32 and 64 bit registers. */ - tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); + tcg_gen_op2(&tcg_ctx, INDEX_op_ext_i32_i64, + GET_TCGV_I64(ret), GET_TCGV_I32(arg)); } } @@ -1812,8 +2562,8 @@ void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg) tcg_gen_mov_i32(lo, TCGV_LOW(arg)); tcg_gen_mov_i32(hi, TCGV_HIGH(arg)); } else { - tcg_gen_trunc_shr_i64_i32(lo, arg, 0); - tcg_gen_trunc_shr_i64_i32(hi, arg, 32); + tcg_gen_extrl_i64_i32(lo, arg); + tcg_gen_extrh_i64_i32(hi, arg); } } @@ -1837,8 +2587,23 @@ void tcg_gen_goto_tb(unsigned idx) tcg_gen_op1i(INDEX_op_goto_tb, idx); } +void tcg_gen_lookup_and_goto_ptr(TCGv addr) +{ + if (TCG_TARGET_HAS_goto_ptr && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + TCGv_ptr ptr = tcg_temp_new_ptr(); + gen_helper_lookup_tb_ptr(ptr, tcg_ctx.tcg_env, addr); + tcg_gen_op1i(INDEX_op_goto_ptr, GET_TCGV_PTR(ptr)); + tcg_temp_free_ptr(ptr); + } else { + tcg_gen_exit_tb(0); + } +} + static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st) { + /* Trigger the asserts within as early as possible. */ + (void)get_alignment_bits(op); + switch (op & MO_SIZE) { case MO_8: op &= ~MO_BSWAP; @@ -1862,53 +2627,59 @@ static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st) return op; } -static inline void tcg_add_param_i32(TCGv_i32 val) -{ - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(val); -} - -static inline void tcg_add_param_i64(TCGv_i64 val) +static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr, + TCGMemOp memop, TCGArg idx) { + TCGMemOpIdx oi = make_memop_idx(memop, idx); +#if TARGET_LONG_BITS == 32 + tcg_gen_op3i_i32(opc, val, addr, oi); +#else if (TCG_TARGET_REG_BITS == 32) { - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_LOW(val)); - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_HIGH(val)); + tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi); } else { - *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val); + tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I32(val), GET_TCGV_I64(addr), oi); } +#endif } +static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr, + TCGMemOp memop, TCGArg idx) +{ + TCGMemOpIdx oi = make_memop_idx(memop, idx); #if TARGET_LONG_BITS == 32 -# define tcg_add_param_tl tcg_add_param_i32 + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi); + } else { + tcg_gen_op3(&tcg_ctx, opc, GET_TCGV_I64(val), GET_TCGV_I32(addr), oi); + } #else -# define tcg_add_param_tl tcg_add_param_i64 + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), + TCGV_LOW(addr), TCGV_HIGH(addr), oi); + } else { + tcg_gen_op3i_i64(opc, val, addr, oi); + } #endif +} void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) { memop = tcg_canonicalize_memop(memop, 0, 0); - - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32; - tcg_add_param_i32(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; + trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env, + addr, trace_mem_get_info(memop, 0)); + gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx); } void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop) { memop = tcg_canonicalize_memop(memop, 0, 1); - - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32; - tcg_add_param_i32(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; + trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env, + addr, trace_mem_get_info(memop, 1)); + gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx); } void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) { - memop = tcg_canonicalize_memop(memop, 1, 0); - if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop); if (memop & MO_SIGN) { @@ -1919,25 +2690,369 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) return; } - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64; - tcg_add_param_i64(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; + memop = tcg_canonicalize_memop(memop, 1, 0); + trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env, + addr, trace_mem_get_info(memop, 0)); + gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx); } void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop) { - memop = tcg_canonicalize_memop(memop, 1, 1); - if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) { tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop); return; } - *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64; - tcg_add_param_i64(val); - tcg_add_param_tl(addr); - *tcg_ctx.gen_opparam_ptr++ = memop; - *tcg_ctx.gen_opparam_ptr++ = idx; + memop = tcg_canonicalize_memop(memop, 1, 1); + trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env, + addr, trace_mem_get_info(memop, 1)); + gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx); } + +static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, TCGMemOp opc) +{ + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_gen_ext8s_i32(ret, val); + break; + case MO_UB: + tcg_gen_ext8u_i32(ret, val); + break; + case MO_SW: + tcg_gen_ext16s_i32(ret, val); + break; + case MO_UW: + tcg_gen_ext16u_i32(ret, val); + break; + default: + tcg_gen_mov_i32(ret, val); + break; + } +} + +static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, TCGMemOp opc) +{ + switch (opc & MO_SSIZE) { + case MO_SB: + tcg_gen_ext8s_i64(ret, val); + break; + case MO_UB: + tcg_gen_ext8u_i64(ret, val); + break; + case MO_SW: + tcg_gen_ext16s_i64(ret, val); + break; + case MO_UW: + tcg_gen_ext16u_i64(ret, val); + break; + case MO_SL: + tcg_gen_ext32s_i64(ret, val); + break; + case MO_UL: + tcg_gen_ext32u_i64(ret, val); + break; + default: + tcg_gen_mov_i64(ret, val); + break; + } +} + +#ifdef CONFIG_SOFTMMU +typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv, + TCGv_i32, TCGv_i32, TCGv_i32); +typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv, + TCGv_i64, TCGv_i64, TCGv_i32); +typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv, + TCGv_i32, TCGv_i32); +typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, + TCGv_i64, TCGv_i32); +#else +typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32, TCGv_i32); +typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64, TCGv_i64); +typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv, TCGv_i32); +typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv, TCGv_i64); +#endif + +#ifdef CONFIG_ATOMIC64 +# define WITH_ATOMIC64(X) X, +#else +# define WITH_ATOMIC64(X) +#endif + +static void * const table_cmpxchg[16] = { + [MO_8] = gen_helper_atomic_cmpxchgb, + [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le, + [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be, + [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le, + [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be, + WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le) + WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be) +}; + +void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv, + TCGv_i32 newv, TCGArg idx, TCGMemOp memop) +{ + memop = tcg_canonicalize_memop(memop, 0, 0); + + if (!parallel_cpus) { + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE); + + tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN); + tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1); + tcg_gen_qemu_st_i32(t2, addr, idx, memop); + tcg_temp_free_i32(t2); + + if (memop & MO_SIGN) { + tcg_gen_ext_i32(retv, t1, memop); + } else { + tcg_gen_mov_i32(retv, t1); + } + tcg_temp_free_i32(t1); + } else { + gen_atomic_cx_i32 gen; + + gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)]; + tcg_debug_assert(gen != NULL); + +#ifdef CONFIG_SOFTMMU + { + TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx)); + gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv, oi); + tcg_temp_free_i32(oi); + } +#else + gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv); +#endif + + if (memop & MO_SIGN) { + tcg_gen_ext_i32(retv, retv, memop); + } + } +} + +void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv, + TCGv_i64 newv, TCGArg idx, TCGMemOp memop) +{ + memop = tcg_canonicalize_memop(memop, 1, 0); + + if (!parallel_cpus) { + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE); + + tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN); + tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1); + tcg_gen_qemu_st_i64(t2, addr, idx, memop); + tcg_temp_free_i64(t2); + + if (memop & MO_SIGN) { + tcg_gen_ext_i64(retv, t1, memop); + } else { + tcg_gen_mov_i64(retv, t1); + } + tcg_temp_free_i64(t1); + } else if ((memop & MO_SIZE) == MO_64) { +#ifdef CONFIG_ATOMIC64 + gen_atomic_cx_i64 gen; + + gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)]; + tcg_debug_assert(gen != NULL); + +#ifdef CONFIG_SOFTMMU + { + TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop, idx)); + gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv, oi); + tcg_temp_free_i32(oi); + } +#else + gen(retv, tcg_ctx.tcg_env, addr, cmpv, newv); +#endif +#else + gen_helper_exit_atomic(tcg_ctx.tcg_env); + /* Produce a result, so that we have a well-formed opcode stream + with respect to uses of the result in the (dead) code following. */ + tcg_gen_movi_i64(retv, 0); +#endif /* CONFIG_ATOMIC64 */ + } else { + TCGv_i32 c32 = tcg_temp_new_i32(); + TCGv_i32 n32 = tcg_temp_new_i32(); + TCGv_i32 r32 = tcg_temp_new_i32(); + + tcg_gen_extrl_i64_i32(c32, cmpv); + tcg_gen_extrl_i64_i32(n32, newv); + tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN); + tcg_temp_free_i32(c32); + tcg_temp_free_i32(n32); + + tcg_gen_extu_i32_i64(retv, r32); + tcg_temp_free_i32(r32); + + if (memop & MO_SIGN) { + tcg_gen_ext_i64(retv, retv, memop); + } + } +} + +static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val, + TCGArg idx, TCGMemOp memop, bool new_val, + void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32)) +{ + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + memop = tcg_canonicalize_memop(memop, 0, 0); + + tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN); + gen(t2, t1, val); + tcg_gen_qemu_st_i32(t2, addr, idx, memop); + + tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop); + tcg_temp_free_i32(t1); + tcg_temp_free_i32(t2); +} + +static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val, + TCGArg idx, TCGMemOp memop, void * const table[]) +{ + gen_atomic_op_i32 gen; + + memop = tcg_canonicalize_memop(memop, 0, 0); + + gen = table[memop & (MO_SIZE | MO_BSWAP)]; + tcg_debug_assert(gen != NULL); + +#ifdef CONFIG_SOFTMMU + { + TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx)); + gen(ret, tcg_ctx.tcg_env, addr, val, oi); + tcg_temp_free_i32(oi); + } +#else + gen(ret, tcg_ctx.tcg_env, addr, val); +#endif + + if (memop & MO_SIGN) { + tcg_gen_ext_i32(ret, ret, memop); + } +} + +static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val, + TCGArg idx, TCGMemOp memop, bool new_val, + void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64)) +{ + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + memop = tcg_canonicalize_memop(memop, 1, 0); + + tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN); + gen(t2, t1, val); + tcg_gen_qemu_st_i64(t2, addr, idx, memop); + + tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop); + tcg_temp_free_i64(t1); + tcg_temp_free_i64(t2); +} + +static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val, + TCGArg idx, TCGMemOp memop, void * const table[]) +{ + memop = tcg_canonicalize_memop(memop, 1, 0); + + if ((memop & MO_SIZE) == MO_64) { +#ifdef CONFIG_ATOMIC64 + gen_atomic_op_i64 gen; + + gen = table[memop & (MO_SIZE | MO_BSWAP)]; + tcg_debug_assert(gen != NULL); + +#ifdef CONFIG_SOFTMMU + { + TCGv_i32 oi = tcg_const_i32(make_memop_idx(memop & ~MO_SIGN, idx)); + gen(ret, tcg_ctx.tcg_env, addr, val, oi); + tcg_temp_free_i32(oi); + } +#else + gen(ret, tcg_ctx.tcg_env, addr, val); +#endif +#else + gen_helper_exit_atomic(tcg_ctx.tcg_env); + /* Produce a result, so that we have a well-formed opcode stream + with respect to uses of the result in the (dead) code following. */ + tcg_gen_movi_i64(ret, 0); +#endif /* CONFIG_ATOMIC64 */ + } else { + TCGv_i32 v32 = tcg_temp_new_i32(); + TCGv_i32 r32 = tcg_temp_new_i32(); + + tcg_gen_extrl_i64_i32(v32, val); + do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table); + tcg_temp_free_i32(v32); + + tcg_gen_extu_i32_i64(ret, r32); + tcg_temp_free_i32(r32); + + if (memop & MO_SIGN) { + tcg_gen_ext_i64(ret, ret, memop); + } + } +} + +#define GEN_ATOMIC_HELPER(NAME, OP, NEW) \ +static void * const table_##NAME[16] = { \ + [MO_8] = gen_helper_atomic_##NAME##b, \ + [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \ + [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \ + [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \ + [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \ + WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \ + WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \ +}; \ +void tcg_gen_atomic_##NAME##_i32 \ + (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, TCGMemOp memop) \ +{ \ + if (parallel_cpus) { \ + do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \ + } else { \ + do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \ + tcg_gen_##OP##_i32); \ + } \ +} \ +void tcg_gen_atomic_##NAME##_i64 \ + (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, TCGMemOp memop) \ +{ \ + if (parallel_cpus) { \ + do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \ + } else { \ + do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \ + tcg_gen_##OP##_i64); \ + } \ +} + +GEN_ATOMIC_HELPER(fetch_add, add, 0) +GEN_ATOMIC_HELPER(fetch_and, and, 0) +GEN_ATOMIC_HELPER(fetch_or, or, 0) +GEN_ATOMIC_HELPER(fetch_xor, xor, 0) + +GEN_ATOMIC_HELPER(add_fetch, add, 1) +GEN_ATOMIC_HELPER(and_fetch, and, 1) +GEN_ATOMIC_HELPER(or_fetch, or, 1) +GEN_ATOMIC_HELPER(xor_fetch, xor, 1) + +static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mov_i32(r, b); +} + +static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mov_i64(r, b); +} + +GEN_ATOMIC_HELPER(xchg, mov2, 0) + +#undef GEN_ATOMIC_HELPER