uint16_t prev_copy;
uint16_t next_copy;
tcg_target_ulong val;
+ tcg_target_ulong mask;
};
static struct tcg_temp_info temps[TCG_MAX_TEMPS];
}
}
temps[temp].state = TCG_TEMP_UNDEF;
+ temps[temp].mask = -1;
+}
+
+/* Reset all temporaries, given that there are NB_TEMPS of them. */
+static void reset_all_temps(int nb_temps)
+{
+ int i;
+ for (i = 0; i < nb_temps; i++) {
+ temps[i].state = TCG_TEMP_UNDEF;
+ temps[i].mask = -1;
+ }
}
static int op_bits(TCGOpcode op)
static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
TCGArg dst, TCGArg src)
{
- reset_temp(dst);
- assert(temps[src].state != TCG_TEMP_CONST);
-
- if (s->temps[src].type == s->temps[dst].type) {
- if (temps[src].state != TCG_TEMP_COPY) {
- temps[src].state = TCG_TEMP_COPY;
- temps[src].next_copy = src;
- temps[src].prev_copy = src;
- }
- temps[dst].state = TCG_TEMP_COPY;
- temps[dst].next_copy = temps[src].next_copy;
- temps[dst].prev_copy = src;
- temps[temps[dst].next_copy].prev_copy = dst;
- temps[src].next_copy = dst;
+ reset_temp(dst);
+ temps[dst].mask = temps[src].mask;
+ assert(temps[src].state != TCG_TEMP_CONST);
+
+ if (s->temps[src].type == s->temps[dst].type) {
+ if (temps[src].state != TCG_TEMP_COPY) {
+ temps[src].state = TCG_TEMP_COPY;
+ temps[src].next_copy = src;
+ temps[src].prev_copy = src;
}
+ temps[dst].state = TCG_TEMP_COPY;
+ temps[dst].next_copy = temps[src].next_copy;
+ temps[dst].prev_copy = src;
+ temps[temps[dst].next_copy].prev_copy = dst;
+ temps[src].next_copy = dst;
+ }
- gen_args[0] = dst;
- gen_args[1] = src;
+ gen_args[0] = dst;
+ gen_args[1] = src;
}
static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
{
- reset_temp(dst);
- temps[dst].state = TCG_TEMP_CONST;
- temps[dst].val = val;
- gen_args[0] = dst;
- gen_args[1] = val;
+ reset_temp(dst);
+ temps[dst].state = TCG_TEMP_CONST;
+ temps[dst].val = val;
+ temps[dst].mask = val;
+ gen_args[0] = dst;
+ gen_args[1] = val;
}
static TCGOpcode op_to_mov(TCGOpcode op)
static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
{
+ uint64_t l64, h64;
+
switch (op) {
CASE_OP_32_64(add):
return x + y;
return (int64_t)x >> (int64_t)y;
case INDEX_op_rotr_i32:
- x = ((uint32_t)x << (32 - y)) | ((uint32_t)x >> y);
- return x;
+ return ror32(x, y);
case INDEX_op_rotr_i64:
- x = ((uint64_t)x << (64 - y)) | ((uint64_t)x >> y);
- return x;
+ return ror64(x, y);
case INDEX_op_rotl_i32:
- x = ((uint32_t)x << y) | ((uint32_t)x >> (32 - y));
- return x;
+ return rol32(x, y);
case INDEX_op_rotl_i64:
- x = ((uint64_t)x << y) | ((uint64_t)x >> (64 - y));
- return x;
+ return rol64(x, y);
CASE_OP_32_64(not):
return ~x;
case INDEX_op_ext32u_i64:
return (uint32_t)x;
+ case INDEX_op_muluh_i32:
+ return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
+ case INDEX_op_mulsh_i32:
+ return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
+
+ case INDEX_op_muluh_i64:
+ mulu64(&l64, &h64, x, y);
+ return h64;
+ case INDEX_op_mulsh_i64:
+ muls64(&l64, &h64, x, y);
+ return h64;
+
+ case INDEX_op_div_i32:
+ /* Avoid crashing on divide by zero, otherwise undefined. */
+ return (int32_t)x / ((int32_t)y ? : 1);
+ case INDEX_op_divu_i32:
+ return (uint32_t)x / ((uint32_t)y ? : 1);
+ case INDEX_op_div_i64:
+ return (int64_t)x / ((int64_t)y ? : 1);
+ case INDEX_op_divu_i64:
+ return (uint64_t)x / ((uint64_t)y ? : 1);
+
+ case INDEX_op_rem_i32:
+ return (int32_t)x % ((int32_t)y ? : 1);
+ case INDEX_op_remu_i32:
+ return (uint32_t)x % ((uint32_t)y ? : 1);
+ case INDEX_op_rem_i64:
+ return (int64_t)x % ((int64_t)y ? : 1);
+ case INDEX_op_remu_i64:
+ return (uint64_t)x % ((uint64_t)y ? : 1);
+
default:
fprintf(stderr,
"Unrecognized operation %d in do_constant_folding.\n", op);
TCGArg *args, TCGOpDef *tcg_op_defs)
{
int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
+ tcg_target_ulong mask, affected;
TCGOpcode op;
const TCGOpDef *def;
TCGArg *gen_args;
nb_temps = s->nb_temps;
nb_globals = s->nb_globals;
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ reset_all_temps(nb_temps);
nb_ops = tcg_opc_ptr - s->gen_opc_buf;
gen_args = args;
CASE_OP_32_64(eqv):
CASE_OP_32_64(nand):
CASE_OP_32_64(nor):
+ CASE_OP_32_64(muluh):
+ CASE_OP_32_64(mulsh):
swap_commutative(args[0], &args[1], &args[2]);
break;
CASE_OP_32_64(brcond):
args[5] = tcg_invert_cond(args[5]);
}
break;
- case INDEX_op_add2_i32:
+ CASE_OP_32_64(add2):
swap_commutative(args[0], &args[2], &args[4]);
swap_commutative(args[1], &args[3], &args[5]);
break;
- case INDEX_op_mulu2_i32:
+ CASE_OP_32_64(mulu2):
+ CASE_OP_32_64(muls2):
swap_commutative(args[0], &args[2], &args[3]);
break;
case INDEX_op_brcond2_i32:
break;
}
- /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
+ /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
+ and "sub r, 0, a => neg r, a" case. */
switch (op) {
CASE_OP_32_64(shl):
CASE_OP_32_64(shr):
continue;
}
break;
+ CASE_OP_32_64(sub):
+ {
+ TCGOpcode neg_op;
+ bool have_neg;
+
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ /* Proceed with possible constant folding. */
+ break;
+ }
+ if (op == INDEX_op_sub_i32) {
+ neg_op = INDEX_op_neg_i32;
+ have_neg = TCG_TARGET_HAS_neg_i32;
+ } else {
+ neg_op = INDEX_op_neg_i64;
+ have_neg = TCG_TARGET_HAS_neg_i64;
+ }
+ if (!have_neg) {
+ break;
+ }
+ if (temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[1]].val == 0) {
+ s->gen_opc_buf[op_index] = neg_op;
+ reset_temp(args[0]);
+ gen_args[0] = args[0];
+ gen_args[1] = args[2];
+ args += 3;
+ gen_args += 2;
+ continue;
+ }
+ }
+ break;
+ CASE_OP_32_64(xor):
+ CASE_OP_32_64(nand):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == -1) {
+ i = 1;
+ goto try_not;
+ }
+ break;
+ CASE_OP_32_64(nor):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == 0) {
+ i = 1;
+ goto try_not;
+ }
+ break;
+ CASE_OP_32_64(andc):
+ if (temps[args[2]].state != TCG_TEMP_CONST
+ && temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[1]].val == -1) {
+ i = 2;
+ goto try_not;
+ }
+ break;
+ CASE_OP_32_64(orc):
+ CASE_OP_32_64(eqv):
+ if (temps[args[2]].state != TCG_TEMP_CONST
+ && temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[1]].val == 0) {
+ i = 2;
+ goto try_not;
+ }
+ break;
+ try_not:
+ {
+ TCGOpcode not_op;
+ bool have_not;
+
+ if (def->flags & TCG_OPF_64BIT) {
+ not_op = INDEX_op_not_i64;
+ have_not = TCG_TARGET_HAS_not_i64;
+ } else {
+ not_op = INDEX_op_not_i32;
+ have_not = TCG_TARGET_HAS_not_i32;
+ }
+ if (!have_not) {
+ break;
+ }
+ s->gen_opc_buf[op_index] = not_op;
+ reset_temp(args[0]);
+ gen_args[0] = args[0];
+ gen_args[1] = args[i];
+ args += 3;
+ gen_args += 2;
+ continue;
+ }
default:
break;
}
- /* Simplify expression for "op r, a, 0 => mov r, a" cases */
+ /* Simplify expression for "op r, a, const => mov r, a" cases */
switch (op) {
CASE_OP_32_64(add):
CASE_OP_32_64(sub):
CASE_OP_32_64(rotr):
CASE_OP_32_64(or):
CASE_OP_32_64(xor):
- if (temps[args[1]].state == TCG_TEMP_CONST) {
- /* Proceed with possible constant folding. */
+ CASE_OP_32_64(andc):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == 0) {
+ goto do_mov3;
+ }
+ break;
+ CASE_OP_32_64(and):
+ CASE_OP_32_64(orc):
+ CASE_OP_32_64(eqv):
+ if (temps[args[1]].state != TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == -1) {
+ goto do_mov3;
+ }
+ break;
+ do_mov3:
+ if (temps_are_copies(args[0], args[1])) {
+ s->gen_opc_buf[op_index] = INDEX_op_nop;
+ } else {
+ s->gen_opc_buf[op_index] = op_to_mov(op);
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
+ gen_args += 2;
+ }
+ args += 3;
+ continue;
+ default:
+ break;
+ }
+
+ /* Simplify using known-zero bits. Currently only ops with a single
+ output argument is supported. */
+ mask = -1;
+ affected = -1;
+ switch (op) {
+ CASE_OP_32_64(ext8s):
+ if ((temps[args[1]].mask & 0x80) != 0) {
break;
}
- if (temps[args[2]].state == TCG_TEMP_CONST
- && temps[args[2]].val == 0) {
- if (temps_are_copies(args[0], args[1])) {
- s->gen_opc_buf[op_index] = INDEX_op_nop;
- } else {
- s->gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
- gen_args += 2;
+ CASE_OP_32_64(ext8u):
+ mask = 0xff;
+ goto and_const;
+ CASE_OP_32_64(ext16s):
+ if ((temps[args[1]].mask & 0x8000) != 0) {
+ break;
+ }
+ CASE_OP_32_64(ext16u):
+ mask = 0xffff;
+ goto and_const;
+ case INDEX_op_ext32s_i64:
+ if ((temps[args[1]].mask & 0x80000000) != 0) {
+ break;
+ }
+ case INDEX_op_ext32u_i64:
+ mask = 0xffffffffU;
+ goto and_const;
+
+ CASE_OP_32_64(and):
+ mask = temps[args[2]].mask;
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ and_const:
+ affected = temps[args[1]].mask & ~mask;
+ }
+ mask = temps[args[1]].mask & mask;
+ break;
+
+ CASE_OP_32_64(andc):
+ /* Known-zeros does not imply known-ones. Therefore unless
+ args[2] is constant, we can't infer anything from it. */
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = ~temps[args[2]].mask;
+ goto and_const;
+ }
+ /* But we certainly know nothing outside args[1] may be set. */
+ mask = temps[args[1]].mask;
+ break;
+
+ case INDEX_op_sar_i32:
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = (int32_t)temps[args[1]].mask >> temps[args[2]].val;
+ }
+ break;
+ case INDEX_op_sar_i64:
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = (int64_t)temps[args[1]].mask >> temps[args[2]].val;
+ }
+ break;
+
+ case INDEX_op_shr_i32:
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = (uint32_t)temps[args[1]].mask >> temps[args[2]].val;
+ }
+ break;
+ case INDEX_op_shr_i64:
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = (uint64_t)temps[args[1]].mask >> temps[args[2]].val;
+ }
+ break;
+
+ CASE_OP_32_64(shl):
+ if (temps[args[2]].state == TCG_TEMP_CONST) {
+ mask = temps[args[1]].mask << temps[args[2]].val;
+ }
+ break;
+
+ CASE_OP_32_64(neg):
+ /* Set to 1 all bits to the left of the rightmost. */
+ mask = -(temps[args[1]].mask & -temps[args[1]].mask);
+ break;
+
+ CASE_OP_32_64(deposit):
+ tmp = ((1ull << args[4]) - 1);
+ mask = ((temps[args[1]].mask & ~(tmp << args[3]))
+ | ((temps[args[2]].mask & tmp) << args[3]));
+ break;
+
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
+ mask = temps[args[1]].mask | temps[args[2]].mask;
+ break;
+
+ CASE_OP_32_64(setcond):
+ mask = 1;
+ break;
+
+ CASE_OP_32_64(movcond):
+ mask = temps[args[3]].mask | temps[args[4]].mask;
+ break;
+
+ CASE_OP_32_64(ld8u):
+ case INDEX_op_qemu_ld8u:
+ mask = 0xff;
+ break;
+ CASE_OP_32_64(ld16u):
+ case INDEX_op_qemu_ld16u:
+ mask = 0xffff;
+ break;
+ case INDEX_op_ld32u_i64:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_qemu_ld32u:
+#endif
+ mask = 0xffffffffu;
+ break;
+
+ CASE_OP_32_64(qemu_ld):
+ {
+ TCGMemOp mop = args[def->nb_oargs + def->nb_iargs];
+ if (!(mop & MO_SIGN)) {
+ mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
}
- args += 3;
- continue;
}
break;
+
default:
break;
}
+ /* 32-bit ops (non 64-bit ops and non load/store ops) generate 32-bit
+ results */
+ if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_64BIT))) {
+ mask &= 0xffffffffu;
+ }
+
+ if (mask == 0) {
+ assert(def->nb_oargs == 1);
+ s->gen_opc_buf[op_index] = op_to_movi(op);
+ tcg_opt_gen_movi(gen_args, args[0], 0);
+ args += def->nb_oargs + def->nb_iargs + def->nb_cargs;
+ gen_args += 2;
+ continue;
+ }
+ if (affected == 0) {
+ assert(def->nb_oargs == 1);
+ if (temps_are_copies(args[0], args[1])) {
+ s->gen_opc_buf[op_index] = INDEX_op_nop;
+ } else if (temps[args[1]].state != TCG_TEMP_CONST) {
+ s->gen_opc_buf[op_index] = op_to_mov(op);
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
+ gen_args += 2;
+ } else {
+ s->gen_opc_buf[op_index] = op_to_movi(op);
+ tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val);
+ gen_args += 2;
+ }
+ args += def->nb_iargs + 1;
+ continue;
+ }
+
/* Simplify expression for "op r, a, 0 => movi r, 0" cases */
switch (op) {
CASE_OP_32_64(and):
CASE_OP_32_64(mul):
+ CASE_OP_32_64(muluh):
+ CASE_OP_32_64(mulsh):
if ((temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0)) {
s->gen_opc_buf[op_index] = op_to_movi(op);
/* Simplify expression for "op r, a, a => movi r, 0" cases */
switch (op) {
+ CASE_OP_32_64(andc):
CASE_OP_32_64(sub):
CASE_OP_32_64(xor):
if (temps_are_copies(args[1], args[2])) {
CASE_OP_32_64(eqv):
CASE_OP_32_64(nand):
CASE_OP_32_64(nor):
+ CASE_OP_32_64(muluh):
+ CASE_OP_32_64(mulsh):
+ CASE_OP_32_64(div):
+ CASE_OP_32_64(divu):
+ CASE_OP_32_64(rem):
+ CASE_OP_32_64(remu):
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST) {
s->gen_opc_buf[op_index] = op_to_movi(op);
tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
if (tmp != 2) {
if (tmp) {
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ reset_all_temps(nb_temps);
s->gen_opc_buf[op_index] = INDEX_op_br;
gen_args[0] = args[3];
gen_args += 1;
tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
if (tmp != 2) {
if (tmp) {
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ reset_all_temps(nb_temps);
s->gen_opc_buf[op_index] = INDEX_op_br;
gen_args[0] = args[5];
gen_args += 1;
&& temps[args[3]].val == 0) {
/* Simplify LT/GE comparisons vs zero to a single compare
vs the high word of the input. */
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ reset_all_temps(nb_temps);
s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
gen_args[0] = args[1];
gen_args[1] = args[3];
/* Simplify LT/GE comparisons vs zero to a single compare
vs the high word of the input. */
s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
+ reset_temp(args[0]);
gen_args[0] = args[0];
gen_args[1] = args[2];
gen_args[2] = args[4];
/* Default case: we know nothing about operation (or were unable
to compute the operation result) so no propagation is done.
We trash everything if the operation is the end of a basic
- block, otherwise we only trash the output args. */
+ block, otherwise we only trash the output args. "mask" is
+ the non-zero bits mask for the first output arg. */
if (def->flags & TCG_OPF_BB_END) {
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ reset_all_temps(nb_temps);
} else {
for (i = 0; i < def->nb_oargs; i++) {
reset_temp(args[i]);
+ /* Save the corresponding known-zero bits mask for the
+ first output argument (only one supported so far). */
+ if (i == 0) {
+ temps[args[i]].mask = mask;
+ }
}
}
for (i = 0; i < def->nb_args; i++) {