static TCGv_ptr cpu_ptr0, cpu_ptr1;
static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
static TCGv_i64 cpu_tmp1_i64;
-static TCGv cpu_tmp5;
static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
[CC_OP_ADCX] = USES_CC_DST | USES_CC_SRC,
[CC_OP_ADOX] = USES_CC_SRC | USES_CC_SRC2,
[CC_OP_ADCOX] = USES_CC_DST | USES_CC_SRC | USES_CC_SRC2,
+ [CC_OP_CLR] = 0,
};
static void set_cc_op(DisasContext *s, CCOp op)
tcg_gen_discard_tl(cpu_cc_srcT);
}
+ if (op == CC_OP_DYNAMIC) {
+ /* The DYNAMIC setting is translator only, and should never be
+ stored. Thus we always consider it clean. */
+ s->cc_op_dirty = false;
+ } else {
+ /* Discard any computed CC_OP value (see shifts). */
+ if (s->cc_op == CC_OP_DYNAMIC) {
+ tcg_gen_discard_i32(cpu_cc_op);
+ }
+ s->cc_op_dirty = true;
+ }
s->cc_op = op;
- /* The DYNAMIC setting is translator only, and should never be
- stored. Thus we always consider it clean. */
- s->cc_op_dirty = (op != CC_OP_DYNAMIC);
}
static void gen_update_cc_op(DisasContext *s)
if (s->cc_op == CC_OP_EFLAGS) {
return;
}
+ if (s->cc_op == CC_OP_CLR) {
+ tcg_gen_movi_tl(cpu_cc_src, CC_Z);
+ set_cc_op(s, CC_OP_EFLAGS);
+ return;
+ }
TCGV_UNUSED(zero);
dst = cpu_cc_dst;
.reg2 = t1, .mask = -1, .use_reg2 = true };
case CC_OP_LOGICB ... CC_OP_LOGICQ:
+ case CC_OP_CLR:
return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
case CC_OP_INCB ... CC_OP_INCQ:
case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
.mask = CC_S };
+ case CC_OP_CLR:
+ return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
default:
{
int size = (s->cc_op - CC_OP_ADDB) & 3;
case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2,
.mask = -1, .no_setcond = true };
-
+ case CC_OP_CLR:
+ return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 };
default:
gen_compute_eflags(s);
return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
case CC_OP_ADCOX:
return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src,
.mask = CC_Z };
+ case CC_OP_CLR:
+ return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 };
default:
{
int size = (s->cc_op - CC_OP_ADDB) & 3;
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
}
-static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
- int is_right, int is_arith)
+static void gen_shift_flags(DisasContext *s, int ot, TCGv result, TCGv shm1,
+ TCGv count, bool is_right)
{
- target_ulong mask;
- int shift_label;
- TCGv t0, t1, t2;
+ TCGv_i32 z32, s32, oldop;
+ TCGv z_tl;
+
+ /* Store the results into the CC variables. If we know that the
+ variable must be dead, store unconditionally. Otherwise we'll
+ need to not disrupt the current contents. */
+ z_tl = tcg_const_tl(0);
+ if (cc_op_live[s->cc_op] & USES_CC_DST) {
+ tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
+ result, cpu_cc_dst);
+ } else {
+ tcg_gen_mov_tl(cpu_cc_dst, result);
+ }
+ if (cc_op_live[s->cc_op] & USES_CC_SRC) {
+ tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
+ shm1, cpu_cc_src);
+ } else {
+ tcg_gen_mov_tl(cpu_cc_src, shm1);
+ }
+ tcg_temp_free(z_tl);
- if (ot == OT_QUAD) {
- mask = 0x3f;
+ /* Get the two potential CC_OP values into temporaries. */
+ tcg_gen_movi_i32(cpu_tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
+ if (s->cc_op == CC_OP_DYNAMIC) {
+ oldop = cpu_cc_op;
} else {
- mask = 0x1f;
+ tcg_gen_movi_i32(cpu_tmp3_i32, s->cc_op);
+ oldop = cpu_tmp3_i32;
}
+ /* Conditionally store the CC_OP value. */
+ z32 = tcg_const_i32(0);
+ s32 = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(s32, count);
+ tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, cpu_tmp2_i32, oldop);
+ tcg_temp_free_i32(z32);
+ tcg_temp_free_i32(s32);
+
+ /* The CC_OP value is no longer predictable. */
+ set_cc_op(s, CC_OP_DYNAMIC);
+}
+
+static void gen_shift_rm_T1(DisasContext *s, int ot, int op1,
+ int is_right, int is_arith)
+{
+ target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+
/* load */
if (op1 == OR_TMP0) {
gen_op_ld_T0_A0(ot + s->mem_index);
gen_op_mov_TN_reg(ot, 0, op1);
}
- t0 = tcg_temp_local_new();
- t1 = tcg_temp_local_new();
- t2 = tcg_temp_local_new();
-
- tcg_gen_andi_tl(t2, cpu_T[1], mask);
+ tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
+ tcg_gen_subi_tl(cpu_tmp0, cpu_T[1], 1);
if (is_right) {
if (is_arith) {
gen_exts(ot, cpu_T[0]);
- tcg_gen_mov_tl(t0, cpu_T[0]);
- tcg_gen_sar_tl(cpu_T[0], cpu_T[0], t2);
+ tcg_gen_sar_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ tcg_gen_sar_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
} else {
gen_extu(ot, cpu_T[0]);
- tcg_gen_mov_tl(t0, cpu_T[0]);
- tcg_gen_shr_tl(cpu_T[0], cpu_T[0], t2);
+ tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ tcg_gen_shr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
}
} else {
- tcg_gen_mov_tl(t0, cpu_T[0]);
- tcg_gen_shl_tl(cpu_T[0], cpu_T[0], t2);
+ tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ tcg_gen_shl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
}
/* store */
gen_op_mov_reg_T0(ot, op1);
}
- /* Update eflags data because we cannot predict flags afterward. */
- gen_update_cc_op(s);
- set_cc_op(s, CC_OP_DYNAMIC);
-
- tcg_gen_mov_tl(t1, cpu_T[0]);
-
- shift_label = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, shift_label);
-
- tcg_gen_addi_tl(t2, t2, -1);
- tcg_gen_mov_tl(cpu_cc_dst, t1);
-
- if (is_right) {
- if (is_arith) {
- tcg_gen_sar_tl(cpu_cc_src, t0, t2);
- } else {
- tcg_gen_shr_tl(cpu_cc_src, t0, t2);
- }
- } else {
- tcg_gen_shl_tl(cpu_cc_src, t0, t2);
- }
-
- if (is_right) {
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
- } else {
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
- }
-
- gen_set_label(shift_label);
-
- tcg_temp_free(t0);
- tcg_temp_free(t1);
- tcg_temp_free(t2);
+ gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, cpu_T[1], is_right);
}
static void gen_shift_rm_im(DisasContext *s, int ot, int op1, int op2,
int is_right, int is_arith)
{
- int mask;
-
- if (ot == OT_QUAD)
- mask = 0x3f;
- else
- mask = 0x1f;
+ int mask = (ot == OT_QUAD ? 0x3f : 0x1f);
/* load */
if (op1 == OR_TMP0)
tcg_gen_shri_tl(ret, arg1, -arg2);
}
-static void gen_rot_rm_T1(DisasContext *s, int ot, int op1,
- int is_right)
+static void gen_rot_rm_T1(DisasContext *s, int ot, int op1, int is_right)
{
- target_ulong mask;
- int label1, label2, data_bits;
- TCGv t0, t1, t2, a0;
-
- /* XXX: inefficient, but we must use local temps */
- t0 = tcg_temp_local_new();
- t1 = tcg_temp_local_new();
- t2 = tcg_temp_local_new();
- a0 = tcg_temp_local_new();
-
- if (ot == OT_QUAD)
- mask = 0x3f;
- else
- mask = 0x1f;
+ target_ulong mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+ TCGv_i32 t0, t1;
/* load */
if (op1 == OR_TMP0) {
- tcg_gen_mov_tl(a0, cpu_A0);
- gen_op_ld_v(ot + s->mem_index, t0, a0);
+ gen_op_ld_T0_A0(ot + s->mem_index);
} else {
- gen_op_mov_v_reg(ot, t0, op1);
+ gen_op_mov_TN_reg(ot, 0, op1);
}
- tcg_gen_mov_tl(t1, cpu_T[1]);
-
- tcg_gen_andi_tl(t1, t1, mask);
+ tcg_gen_andi_tl(cpu_T[1], cpu_T[1], mask);
- /* Must test zero case to avoid using undefined behaviour in TCG
- shifts. */
- label1 = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label1);
-
- if (ot <= OT_WORD)
- tcg_gen_andi_tl(cpu_tmp0, t1, (1 << (3 + ot)) - 1);
- else
- tcg_gen_mov_tl(cpu_tmp0, t1);
-
- gen_extu(ot, t0);
- tcg_gen_mov_tl(t2, t0);
-
- data_bits = 8 << ot;
- /* XXX: rely on behaviour of shifts when operand 2 overflows (XXX:
- fix TCG definition) */
- if (is_right) {
- tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp0);
- tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0);
- tcg_gen_shl_tl(t0, t0, cpu_tmp0);
- } else {
- tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp0);
- tcg_gen_subfi_tl(cpu_tmp0, data_bits, cpu_tmp0);
- tcg_gen_shr_tl(t0, t0, cpu_tmp0);
+ switch (ot) {
+ case OT_BYTE:
+ /* Replicate the 8-bit input so that a 32-bit rotate works. */
+ tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
+ tcg_gen_muli_tl(cpu_T[0], cpu_T[0], 0x01010101);
+ goto do_long;
+ case OT_WORD:
+ /* Replicate the 16-bit input so that a 32-bit rotate works. */
+ tcg_gen_deposit_tl(cpu_T[0], cpu_T[0], cpu_T[0], 16, 16);
+ goto do_long;
+ do_long:
+#ifdef TARGET_X86_64
+ case OT_LONG:
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+ if (is_right) {
+ tcg_gen_rotr_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+ } else {
+ tcg_gen_rotl_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+ }
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ break;
+#endif
+ default:
+ if (is_right) {
+ tcg_gen_rotr_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ } else {
+ tcg_gen_rotl_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ }
+ break;
}
- tcg_gen_or_tl(t0, t0, cpu_tmp4);
- gen_set_label(label1);
/* store */
if (op1 == OR_TMP0) {
- gen_op_st_v(ot + s->mem_index, t0, a0);
+ gen_op_st_T0_A0(ot + s->mem_index);
} else {
- gen_op_mov_reg_v(ot, op1, t0);
+ gen_op_mov_reg_T0(ot, op1);
}
-
- /* update eflags. It is needed anyway most of the time, do it always. */
- gen_compute_eflags(s);
- assert(s->cc_op == CC_OP_EFLAGS);
- label2 = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, label2);
+ /* We'll need the flags computed into CC_SRC. */
+ gen_compute_eflags(s);
- tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
- tcg_gen_xor_tl(cpu_tmp0, t2, t0);
- tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
- tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
- tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+ /* The value that was "rotated out" is now present at the other end
+ of the word. Compute C into CC_DST and O into CC_SRC2. Note that
+ since we've computed the flags into CC_SRC, these variables are
+ currently dead. */
if (is_right) {
- tcg_gen_shri_tl(t0, t0, data_bits - 1);
+ tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
+ tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
+ tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
+ } else {
+ tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
+ tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
}
- tcg_gen_andi_tl(t0, t0, CC_C);
- tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
-
- gen_set_label(label2);
-
- tcg_temp_free(t0);
- tcg_temp_free(t1);
- tcg_temp_free(t2);
- tcg_temp_free(a0);
+ tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
+ tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
+
+ /* Now conditionally store the new CC_OP value. If the shift count
+ is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live.
+ Otherwise reuse CC_OP_ADCOX which have the C and O flags split out
+ exactly as we computed above. */
+ t0 = tcg_const_i32(0);
+ t1 = tcg_temp_new_i32();
+ tcg_gen_trunc_tl_i32(t1, cpu_T[1]);
+ tcg_gen_movi_i32(cpu_tmp2_i32, CC_OP_ADCOX);
+ tcg_gen_movi_i32(cpu_tmp3_i32, CC_OP_EFLAGS);
+ tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0,
+ cpu_tmp2_i32, cpu_tmp3_i32);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+
+ /* The CC_OP value is no longer predictable. */
+ set_cc_op(s, CC_OP_DYNAMIC);
}
static void gen_rot_rm_im(DisasContext *s, int ot, int op1, int op2,
int is_right)
{
- int mask;
- int data_bits;
- TCGv t0, t1, a0;
-
- /* XXX: inefficient, but we must use local temps */
- t0 = tcg_temp_local_new();
- t1 = tcg_temp_local_new();
- a0 = tcg_temp_local_new();
-
- if (ot == OT_QUAD)
- mask = 0x3f;
- else
- mask = 0x1f;
+ int mask = (ot == OT_QUAD ? 0x3f : 0x1f);
+ int shift;
/* load */
if (op1 == OR_TMP0) {
- tcg_gen_mov_tl(a0, cpu_A0);
- gen_op_ld_v(ot + s->mem_index, t0, a0);
+ gen_op_ld_T0_A0(ot + s->mem_index);
} else {
- gen_op_mov_v_reg(ot, t0, op1);
+ gen_op_mov_TN_reg(ot, 0, op1);
}
- gen_extu(ot, t0);
- tcg_gen_mov_tl(t1, t0);
-
op2 &= mask;
- data_bits = 8 << ot;
if (op2 != 0) {
- int shift = op2 & ((1 << (3 + ot)) - 1);
- if (is_right) {
- tcg_gen_shri_tl(cpu_tmp4, t0, shift);
- tcg_gen_shli_tl(t0, t0, data_bits - shift);
- }
- else {
- tcg_gen_shli_tl(cpu_tmp4, t0, shift);
- tcg_gen_shri_tl(t0, t0, data_bits - shift);
+ switch (ot) {
+#ifdef TARGET_X86_64
+ case OT_LONG:
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ if (is_right) {
+ tcg_gen_rotri_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+ } else {
+ tcg_gen_rotli_i32(cpu_tmp2_i32, cpu_tmp2_i32, op2);
+ }
+ tcg_gen_extu_i32_tl(cpu_T[0], cpu_tmp2_i32);
+ break;
+#endif
+ default:
+ if (is_right) {
+ tcg_gen_rotri_tl(cpu_T[0], cpu_T[0], op2);
+ } else {
+ tcg_gen_rotli_tl(cpu_T[0], cpu_T[0], op2);
+ }
+ break;
+ case OT_BYTE:
+ mask = 7;
+ goto do_shifts;
+ case OT_WORD:
+ mask = 15;
+ do_shifts:
+ shift = op2 & mask;
+ if (is_right) {
+ shift = mask + 1 - shift;
+ }
+ gen_extu(ot, cpu_T[0]);
+ tcg_gen_shli_tl(cpu_tmp0, cpu_T[0], shift);
+ tcg_gen_shri_tl(cpu_T[0], cpu_T[0], mask + 1 - shift);
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_tmp0);
+ break;
}
- tcg_gen_or_tl(t0, t0, cpu_tmp4);
}
/* store */
if (op1 == OR_TMP0) {
- gen_op_st_v(ot + s->mem_index, t0, a0);
+ gen_op_st_T0_A0(ot + s->mem_index);
} else {
- gen_op_mov_reg_v(ot, op1, t0);
+ gen_op_mov_reg_T0(ot, op1);
}
if (op2 != 0) {
- /* update eflags */
+ /* Compute the flags into CC_SRC. */
gen_compute_eflags(s);
- assert(s->cc_op == CC_OP_EFLAGS);
- tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~(CC_O | CC_C));
- tcg_gen_xor_tl(cpu_tmp0, t1, t0);
- tcg_gen_lshift(cpu_tmp0, cpu_tmp0, 11 - (data_bits - 1));
- tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, CC_O);
- tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+ /* The value that was "rotated out" is now present at the other end
+ of the word. Compute C into CC_DST and O into CC_SRC2. Note that
+ since we've computed the flags into CC_SRC, these variables are
+ currently dead. */
if (is_right) {
- tcg_gen_shri_tl(t0, t0, data_bits - 1);
+ tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask - 1);
+ tcg_gen_shri_tl(cpu_cc_dst, cpu_T[0], mask);
+ tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1);
+ } else {
+ tcg_gen_shri_tl(cpu_cc_src2, cpu_T[0], mask);
+ tcg_gen_andi_tl(cpu_cc_dst, cpu_T[0], 1);
}
- tcg_gen_andi_tl(t0, t0, CC_C);
- tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
+ tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1);
+ tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst);
+ set_cc_op(s, CC_OP_ADCOX);
}
-
- tcg_temp_free(t0);
- tcg_temp_free(t1);
- tcg_temp_free(a0);
}
/* XXX: add faster immediate = 1 case */
/* XXX: add faster immediate case */
static void gen_shiftd_rm_T1(DisasContext *s, int ot, int op1,
- int is_right, TCGv count)
+ bool is_right, TCGv count_in)
{
- int label1, label2, data_bits;
- target_ulong mask;
- TCGv t0, t1, t2, a0;
-
- t0 = tcg_temp_local_new();
- t1 = tcg_temp_local_new();
- t2 = tcg_temp_local_new();
- a0 = tcg_temp_local_new();
-
- if (ot == OT_QUAD)
- mask = 0x3f;
- else
- mask = 0x1f;
+ target_ulong mask = (ot == OT_QUAD ? 63 : 31);
+ TCGv count;
/* load */
if (op1 == OR_TMP0) {
- tcg_gen_mov_tl(a0, cpu_A0);
- gen_op_ld_v(ot + s->mem_index, t0, a0);
+ gen_op_ld_T0_A0(ot + s->mem_index);
} else {
- gen_op_mov_v_reg(ot, t0, op1);
+ gen_op_mov_TN_reg(ot, 0, op1);
}
- tcg_gen_andi_tl(t2, count, mask);
- tcg_gen_mov_tl(t1, cpu_T[1]);
+ count = tcg_temp_new();
+ tcg_gen_andi_tl(count, count_in, mask);
- /* Must test zero case to avoid using undefined behaviour in TCG
- shifts. */
- label1 = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
-
- tcg_gen_addi_tl(cpu_tmp5, t2, -1);
- if (ot == OT_WORD) {
- /* Note: we implement the Intel behaviour for shift count > 16 */
+ switch (ot) {
+ case OT_WORD:
+ /* Note: we implement the Intel behaviour for shift count > 16.
+ This means "shrdw C, B, A" shifts A:B:A >> C. Build the B:A
+ portion by constructing it as a 32-bit value. */
if (is_right) {
- tcg_gen_andi_tl(t0, t0, 0xffff);
- tcg_gen_shli_tl(cpu_tmp0, t1, 16);
- tcg_gen_or_tl(t0, t0, cpu_tmp0);
- tcg_gen_ext32u_tl(t0, t0);
-
- tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
-
- /* only needed if count > 16, but a test would complicate */
- tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
- tcg_gen_shl_tl(cpu_tmp0, t0, cpu_tmp5);
-
- tcg_gen_shr_tl(t0, t0, t2);
-
- tcg_gen_or_tl(t0, t0, cpu_tmp0);
+ tcg_gen_deposit_tl(cpu_tmp0, cpu_T[0], cpu_T[1], 16, 16);
+ tcg_gen_mov_tl(cpu_T[1], cpu_T[0]);
+ tcg_gen_mov_tl(cpu_T[0], cpu_tmp0);
} else {
- /* XXX: not optimal */
- tcg_gen_andi_tl(t0, t0, 0xffff);
- tcg_gen_shli_tl(t1, t1, 16);
- tcg_gen_or_tl(t1, t1, t0);
- tcg_gen_ext32u_tl(t1, t1);
-
- tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
- tcg_gen_subfi_tl(cpu_tmp0, 32, cpu_tmp5);
- tcg_gen_shr_tl(cpu_tmp5, t1, cpu_tmp0);
- tcg_gen_or_tl(cpu_tmp4, cpu_tmp4, cpu_tmp5);
-
- tcg_gen_shl_tl(t0, t0, t2);
- tcg_gen_subfi_tl(cpu_tmp5, 32, t2);
- tcg_gen_shr_tl(t1, t1, cpu_tmp5);
- tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_deposit_tl(cpu_T[1], cpu_T[0], cpu_T[1], 16, 16);
}
- } else {
- data_bits = 8 << ot;
+ /* FALLTHRU */
+#ifdef TARGET_X86_64
+ case OT_LONG:
+ /* Concatenate the two 32-bit values and use a 64-bit shift. */
+ tcg_gen_subi_tl(cpu_tmp0, count, 1);
if (is_right) {
- if (ot == OT_LONG)
- tcg_gen_ext32u_tl(t0, t0);
-
- tcg_gen_shr_tl(cpu_tmp4, t0, cpu_tmp5);
+ tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[0], cpu_T[1]);
+ tcg_gen_shr_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ tcg_gen_shr_i64(cpu_T[0], cpu_T[0], count);
+ } else {
+ tcg_gen_concat_tl_i64(cpu_T[0], cpu_T[1], cpu_T[0]);
+ tcg_gen_shl_i64(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ tcg_gen_shl_i64(cpu_T[0], cpu_T[0], count);
+ tcg_gen_shri_i64(cpu_tmp0, cpu_tmp0, 32);
+ tcg_gen_shri_i64(cpu_T[0], cpu_T[0], 32);
+ }
+ break;
+#endif
+ default:
+ tcg_gen_subi_tl(cpu_tmp0, count, 1);
+ if (is_right) {
+ tcg_gen_shr_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
- tcg_gen_shr_tl(t0, t0, t2);
- tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
- tcg_gen_shl_tl(t1, t1, cpu_tmp5);
- tcg_gen_or_tl(t0, t0, t1);
-
+ tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+ tcg_gen_shr_tl(cpu_T[0], cpu_T[0], count);
+ tcg_gen_shl_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
} else {
- if (ot == OT_LONG)
- tcg_gen_ext32u_tl(t1, t1);
-
- tcg_gen_shl_tl(cpu_tmp4, t0, cpu_tmp5);
-
- tcg_gen_shl_tl(t0, t0, t2);
- tcg_gen_subfi_tl(cpu_tmp5, data_bits, t2);
- tcg_gen_shr_tl(t1, t1, cpu_tmp5);
- tcg_gen_or_tl(t0, t0, t1);
+ tcg_gen_shl_tl(cpu_tmp0, cpu_T[0], cpu_tmp0);
+ if (ot == OT_WORD) {
+ /* Only needed if count > 16, for Intel behaviour. */
+ tcg_gen_subfi_tl(cpu_tmp4, 33, count);
+ tcg_gen_shr_tl(cpu_tmp4, cpu_T[1], cpu_tmp4);
+ tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, cpu_tmp4);
+ }
+
+ tcg_gen_subfi_tl(cpu_tmp4, mask + 1, count);
+ tcg_gen_shl_tl(cpu_T[0], cpu_T[0], count);
+ tcg_gen_shr_tl(cpu_T[1], cpu_T[1], cpu_tmp4);
}
+ tcg_gen_movi_tl(cpu_tmp4, 0);
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[1], count, cpu_tmp4,
+ cpu_tmp4, cpu_T[1]);
+ tcg_gen_or_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+ break;
}
- tcg_gen_mov_tl(t1, cpu_tmp4);
- gen_set_label(label1);
/* store */
if (op1 == OR_TMP0) {
- gen_op_st_v(ot + s->mem_index, t0, a0);
- } else {
- gen_op_mov_reg_v(ot, op1, t0);
- }
-
- /* Update eflags data because we cannot predict flags afterward. */
- gen_update_cc_op(s);
- set_cc_op(s, CC_OP_DYNAMIC);
-
- label2 = gen_new_label();
- tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label2);
-
- tcg_gen_mov_tl(cpu_cc_src, t1);
- tcg_gen_mov_tl(cpu_cc_dst, t0);
- if (is_right) {
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_SARB + ot);
+ gen_op_st_T0_A0(ot + s->mem_index);
} else {
- tcg_gen_movi_i32(cpu_cc_op, CC_OP_SHLB + ot);
+ gen_op_mov_reg_T0(ot, op1);
}
- gen_set_label(label2);
- tcg_temp_free(t0);
- tcg_temp_free(t1);
- tcg_temp_free(t2);
- tcg_temp_free(a0);
+ gen_shift_flags(s, ot, cpu_T[0], cpu_tmp0, count, is_right);
+ tcg_temp_free(count);
}
static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 }
#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 }
#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 }
+#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \
+ CPUID_EXT_PCLMULQDQ }
+#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES }
static const struct SSEOpHelper_epp sse_op_table6[256] = {
[0x00] = SSSE3_OP(pshufb),
[0x3f] = SSE41_OP(pmaxud),
[0x40] = SSE41_OP(pmulld),
[0x41] = SSE41_OP(phminposuw),
+ [0xdb] = AESNI_OP(aesimc),
+ [0xdc] = AESNI_OP(aesenc),
+ [0xdd] = AESNI_OP(aesenclast),
+ [0xde] = AESNI_OP(aesdec),
+ [0xdf] = AESNI_OP(aesdeclast),
};
static const struct SSEOpHelper_eppi sse_op_table7[256] = {
[0x40] = SSE41_OP(dpps),
[0x41] = SSE41_OP(dppd),
[0x42] = SSE41_OP(mpsadbw),
+ [0x44] = PCLMULQDQ_OP(pclmulqdq),
[0x60] = SSE42_OP(pcmpestrm),
[0x61] = SSE42_OP(pcmpestri),
[0x62] = SSE42_OP(pcmpistrm),
[0x63] = SSE42_OP(pcmpistri),
+ [0xdf] = AESNI_OP(aeskeygenassist),
};
static void gen_sse(CPUX86State *env, DisasContext *s, int b,
ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
switch (ot) {
- TCGv_i64 t0, t1;
default:
- t0 = tcg_temp_new_i64();
- t1 = tcg_temp_new_i64();
-#ifdef TARGET_X86_64
- tcg_gen_ext32u_i64(t0, cpu_T[0]);
- tcg_gen_ext32u_i64(t1, cpu_regs[R_EDX]);
-#else
- tcg_gen_extu_i32_i64(t0, cpu_T[0]);
- tcg_gen_extu_i32_i64(t0, cpu_regs[R_EDX]);
-#endif
- tcg_gen_mul_i64(t0, t0, t1);
- tcg_gen_trunc_i64_tl(cpu_T[0], t0);
- tcg_gen_shri_i64(t0, t0, 32);
- tcg_gen_trunc_i64_tl(cpu_T[1], t0);
- tcg_temp_free_i64(t0);
- tcg_temp_free_i64(t1);
- gen_op_mov_reg_T0(OT_LONG, s->vex_v);
- gen_op_mov_reg_T1(OT_LONG, reg);
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
+ tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+ cpu_tmp2_i32, cpu_tmp3_i32);
+ tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
break;
#ifdef TARGET_X86_64
case OT_QUAD:
- tcg_gen_mov_tl(cpu_T[1], cpu_regs[R_EDX]);
- tcg_gen_mul_tl(cpu_regs[s->vex_v], cpu_T[0], cpu_T[1]);
- gen_helper_umulh(cpu_regs[reg], cpu_T[0], cpu_T[1]);
+ tcg_gen_mulu2_i64(cpu_regs[s->vex_v], cpu_regs[reg],
+ cpu_T[0], cpu_regs[R_EDX]);
break;
#endif
}
if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) {
goto illegal_op;
} else {
- TCGv carry_in, carry_out;
+ TCGv carry_in, carry_out, zero;
int end_op;
ot = (s->dflag == 2 ? OT_QUAD : OT_LONG);
carry_in = carry_out;
break;
default:
- end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADCOX);
+ end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX);
break;
}
/* If we can't reuse carry-out, get it out of EFLAGS. */
#endif
default:
/* Otherwise compute the carry-out in two steps. */
- tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_regs[reg]);
- tcg_gen_setcond_tl(TCG_COND_LTU, cpu_tmp4,
- cpu_T[0], cpu_regs[reg]);
- tcg_gen_add_tl(cpu_regs[reg], cpu_T[0], carry_in);
- tcg_gen_setcond_tl(TCG_COND_LTU, carry_out,
- cpu_regs[reg], cpu_T[0]);
- tcg_gen_or_tl(carry_out, carry_out, cpu_tmp4);
+ zero = tcg_const_tl(0);
+ tcg_gen_add2_tl(cpu_T[0], carry_out,
+ cpu_T[0], zero,
+ carry_in, zero);
+ tcg_gen_add2_tl(cpu_regs[reg], carry_out,
+ cpu_regs[reg], carry_out,
+ cpu_T[0], zero);
+ tcg_temp_free(zero);
break;
}
- /* We began with all flags computed to CC_SRC, and we
- have now placed the carry-out in CC_DST. All that
- is left is to record the CC_OP. */
set_cc_op(s, end_op);
}
break;
if (mod == 3)
gen_op_mov_TN_reg(OT_LONG, 0, rm);
else
- tcg_gen_qemu_ld8u(cpu_tmp0, cpu_A0,
+ tcg_gen_qemu_ld8u(cpu_T[0], cpu_A0,
(s->mem_index >> 2) - 1);
- tcg_gen_st8_tl(cpu_tmp0, cpu_env, offsetof(CPUX86State,
+ tcg_gen_st8_tl(cpu_T[0], cpu_env, offsetof(CPUX86State,
xmm_regs[reg].XMM_B(val & 15)));
break;
case 0x21: /* insertps */
}
s->pc = pc_start;
prefixes = 0;
- aflag = s->code32;
- dflag = s->code32;
s->override = -1;
rex_w = -1;
rex_r = 0;
}
s->pc++;
- /* 4.1.1-4.1.3: No preceeding lock, 66, f2, f3, or rex prefixes. */
+ /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
| PREFIX_LOCK | PREFIX_DATA)) {
goto illegal_op;
}
/* Post-process prefixes. */
- if (prefixes & PREFIX_DATA) {
- dflag ^= 1;
- }
- if (prefixes & PREFIX_ADR) {
- aflag ^= 1;
- }
-#ifdef TARGET_X86_64
if (CODE64(s)) {
- if (rex_w == 1) {
- /* 0x66 is ignored if rex.w is set */
- dflag = 2;
+ /* In 64-bit mode, the default data size is 32-bit. Select 64-bit
+ data with rex_w, and 16-bit data with 0x66; rex_w takes precedence
+ over 0x66 if both are present. */
+ dflag = (rex_w > 0 ? 2 : prefixes & PREFIX_DATA ? 0 : 1);
+ /* In 64-bit mode, 0x67 selects 32-bit addressing. */
+ aflag = (prefixes & PREFIX_ADR ? 1 : 2);
+ } else {
+ /* In 16/32-bit mode, 0x66 selects the opposite data size. */
+ dflag = s->code32;
+ if (prefixes & PREFIX_DATA) {
+ dflag ^= 1;
}
- if (!(prefixes & PREFIX_ADR)) {
- aflag = 2;
+ /* In 16/32-bit mode, 0x67 selects the opposite addressing. */
+ aflag = s->code32;
+ if (prefixes & PREFIX_ADR) {
+ aflag ^= 1;
}
}
-#endif
s->prefix = prefixes;
s->aflag = aflag;
} else if (op == OP_XORL && rm == reg) {
xor_zero:
/* xor reg, reg optimisation */
+ set_cc_op(s, CC_OP_CLR);
gen_op_movl_T0_0();
- set_cc_op(s, CC_OP_LOGICB + ot);
gen_op_mov_reg_T0(ot, reg);
- gen_op_update1_cc();
break;
} else {
opreg = rm;
break;
default:
case OT_LONG:
-#ifdef TARGET_X86_64
- gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
- tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
- tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
- tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
- gen_op_mov_reg_T0(OT_LONG, R_EAX);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
- tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
- gen_op_mov_reg_T0(OT_LONG, R_EDX);
- tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
-#else
- {
- TCGv_i64 t0, t1;
- t0 = tcg_temp_new_i64();
- t1 = tcg_temp_new_i64();
- gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
- tcg_gen_extu_i32_i64(t0, cpu_T[0]);
- tcg_gen_extu_i32_i64(t1, cpu_T[1]);
- tcg_gen_mul_i64(t0, t0, t1);
- tcg_gen_trunc_i64_i32(cpu_T[0], t0);
- gen_op_mov_reg_T0(OT_LONG, R_EAX);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
- tcg_gen_shri_i64(t0, t0, 32);
- tcg_gen_trunc_i64_i32(cpu_T[0], t0);
- gen_op_mov_reg_T0(OT_LONG, R_EDX);
- tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
- }
-#endif
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
+ tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+ cpu_tmp2_i32, cpu_tmp3_i32);
+ tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
set_cc_op(s, CC_OP_MULL);
break;
#ifdef TARGET_X86_64
case OT_QUAD:
- gen_helper_mulq_EAX_T0(cpu_env, cpu_T[0]);
+ tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
+ cpu_T[0], cpu_regs[R_EAX]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+ tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
set_cc_op(s, CC_OP_MULQ);
break;
#endif
break;
default:
case OT_LONG:
-#ifdef TARGET_X86_64
- gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
- tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
- tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
- tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
- gen_op_mov_reg_T0(OT_LONG, R_EAX);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
- tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
- tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
- tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
- gen_op_mov_reg_T0(OT_LONG, R_EDX);
-#else
- {
- TCGv_i64 t0, t1;
- t0 = tcg_temp_new_i64();
- t1 = tcg_temp_new_i64();
- gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
- tcg_gen_ext_i32_i64(t0, cpu_T[0]);
- tcg_gen_ext_i32_i64(t1, cpu_T[1]);
- tcg_gen_mul_i64(t0, t0, t1);
- tcg_gen_trunc_i64_i32(cpu_T[0], t0);
- gen_op_mov_reg_T0(OT_LONG, R_EAX);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
- tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
- tcg_gen_shri_i64(t0, t0, 32);
- tcg_gen_trunc_i64_i32(cpu_T[0], t0);
- gen_op_mov_reg_T0(OT_LONG, R_EDX);
- tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
- }
-#endif
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
+ tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+ cpu_tmp2_i32, cpu_tmp3_i32);
+ tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
+ tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
+ tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+ tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+ tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
set_cc_op(s, CC_OP_MULL);
break;
#ifdef TARGET_X86_64
case OT_QUAD:
- gen_helper_imulq_EAX_T0(cpu_env, cpu_T[0]);
+ tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
+ cpu_T[0], cpu_regs[R_EAX]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
+ tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
set_cc_op(s, CC_OP_MULQ);
break;
#endif
} else {
gen_op_mov_TN_reg(ot, 1, reg);
}
-
-#ifdef TARGET_X86_64
- if (ot == OT_QUAD) {
- gen_helper_imulq_T0_T1(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
- } else
-#endif
- if (ot == OT_LONG) {
+ switch (ot) {
#ifdef TARGET_X86_64
- tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
- tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
- tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
- tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
- tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
-#else
- {
- TCGv_i64 t0, t1;
- t0 = tcg_temp_new_i64();
- t1 = tcg_temp_new_i64();
- tcg_gen_ext_i32_i64(t0, cpu_T[0]);
- tcg_gen_ext_i32_i64(t1, cpu_T[1]);
- tcg_gen_mul_i64(t0, t0, t1);
- tcg_gen_trunc_i64_i32(cpu_T[0], t0);
- tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
- tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
- tcg_gen_shri_i64(t0, t0, 32);
- tcg_gen_trunc_i64_i32(cpu_T[1], t0);
- tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
- }
+ case OT_QUAD:
+ tcg_gen_muls2_i64(cpu_regs[reg], cpu_T[1], cpu_T[0], cpu_T[1]);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
+ tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
+ tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T[1]);
+ break;
#endif
- } else {
+ case OT_LONG:
+ tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
+ tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
+ tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
+ cpu_tmp2_i32, cpu_tmp3_i32);
+ tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
+ tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
+ tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
+ tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
+ break;
+ default:
tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
/* XXX: use 32 bit mul which could be faster */
tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+ gen_op_mov_reg_T0(ot, reg);
+ break;
}
- gen_op_mov_reg_T0(ot, reg);
set_cc_op(s, CC_OP_MULB + ot);
break;
case 0x1c0:
tcg_gen_movi_tl(cpu_cc_dst, 0);
}
break;
- case 0x1bc: /* bsf */
- case 0x1bd: /* bsr */
- {
- int label1;
- TCGv t0;
-
- ot = dflag + OT_WORD;
- modrm = cpu_ldub_code(env, s->pc++);
- reg = ((modrm >> 3) & 7) | rex_r;
- gen_ldst_modrm(env, s,modrm, ot, OR_TMP0, 0);
- gen_extu(ot, cpu_T[0]);
- t0 = tcg_temp_local_new();
- tcg_gen_mov_tl(t0, cpu_T[0]);
- if ((b & 1) && (prefixes & PREFIX_REPZ) &&
- (s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
- switch(ot) {
- case OT_WORD: gen_helper_lzcnt(cpu_T[0], t0,
- tcg_const_i32(16)); break;
- case OT_LONG: gen_helper_lzcnt(cpu_T[0], t0,
- tcg_const_i32(32)); break;
- case OT_QUAD: gen_helper_lzcnt(cpu_T[0], t0,
- tcg_const_i32(64)); break;
- }
- gen_op_mov_reg_T0(ot, reg);
+ case 0x1bc: /* bsf / tzcnt */
+ case 0x1bd: /* bsr / lzcnt */
+ ot = dflag + OT_WORD;
+ modrm = cpu_ldub_code(env, s->pc++);
+ reg = ((modrm >> 3) & 7) | rex_r;
+ gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
+ gen_extu(ot, cpu_T[0]);
+
+ /* Note that lzcnt and tzcnt are in different extensions. */
+ if ((prefixes & PREFIX_REPZ)
+ && (b & 1
+ ? s->cpuid_ext3_features & CPUID_EXT3_ABM
+ : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
+ int size = 8 << ot;
+ tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+ if (b & 1) {
+ /* For lzcnt, reduce the target_ulong result by the
+ number of zeros that we expect to find at the top. */
+ gen_helper_clz(cpu_T[0], cpu_T[0]);
+ tcg_gen_subi_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - size);
} else {
- label1 = gen_new_label();
- tcg_gen_movi_tl(cpu_cc_dst, 0);
- tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, label1);
- if (b & 1) {
- gen_helper_bsr(cpu_T[0], t0);
- } else {
- gen_helper_bsf(cpu_T[0], t0);
- }
- gen_op_mov_reg_T0(ot, reg);
- tcg_gen_movi_tl(cpu_cc_dst, 1);
- gen_set_label(label1);
- set_cc_op(s, CC_OP_LOGICB + ot);
+ /* For tzcnt, a zero input must return the operand size:
+ force all bits outside the operand size to 1. */
+ target_ulong mask = (target_ulong)-2 << (size - 1);
+ tcg_gen_ori_tl(cpu_T[0], cpu_T[0], mask);
+ gen_helper_ctz(cpu_T[0], cpu_T[0]);
+ }
+ /* For lzcnt/tzcnt, C and Z bits are defined and are
+ related to the result. */
+ gen_op_update1_cc();
+ set_cc_op(s, CC_OP_BMILGB + ot);
+ } else {
+ /* For bsr/bsf, only the Z bit is defined and it is related
+ to the input and not the result. */
+ tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+ set_cc_op(s, CC_OP_LOGICB + ot);
+ if (b & 1) {
+ /* For bsr, return the bit index of the first 1 bit,
+ not the count of leading zeros. */
+ gen_helper_clz(cpu_T[0], cpu_T[0]);
+ tcg_gen_xori_tl(cpu_T[0], cpu_T[0], TARGET_LONG_BITS - 1);
+ } else {
+ gen_helper_ctz(cpu_T[0], cpu_T[0]);
}
- tcg_temp_free(t0);
+ /* ??? The manual says that the output is undefined when the
+ input is zero, but real hardware leaves it unchanged, and
+ real programs appear to depend on that. */
+ tcg_gen_movi_tl(cpu_tmp0, 0);
+ tcg_gen_movcond_tl(TCG_COND_EQ, cpu_T[0], cpu_cc_dst, cpu_tmp0,
+ cpu_regs[reg], cpu_T[0]);
}
+ gen_op_mov_reg_T0(ot, reg);
break;
/************************/
/* bcd */
if (flags & HF_SOFTMMU_MASK) {
dc->mem_index = (cpu_mmu_index(env) + 1) << 2;
}
- dc->cpuid_features = env->cpuid_features;
- dc->cpuid_ext_features = env->cpuid_ext_features;
- dc->cpuid_ext2_features = env->cpuid_ext2_features;
- dc->cpuid_ext3_features = env->cpuid_ext3_features;
- dc->cpuid_7_0_ebx_features = env->cpuid_7_0_ebx_features;
+ dc->cpuid_features = env->features[FEAT_1_EDX];
+ dc->cpuid_ext_features = env->features[FEAT_1_ECX];
+ dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX];
+ dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX];
+ dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX];
#ifdef TARGET_X86_64
dc->lma = (flags >> HF_LMA_SHIFT) & 1;
dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
cpu_tmp2_i32 = tcg_temp_new_i32();
cpu_tmp3_i32 = tcg_temp_new_i32();
cpu_tmp4 = tcg_temp_new();
- cpu_tmp5 = tcg_temp_new();
cpu_ptr0 = tcg_temp_new_ptr();
cpu_ptr1 = tcg_temp_new_ptr();
cpu_cc_srcT = tcg_temp_local_new();
if (max_insns == 0)
max_insns = CF_COUNT_MASK;
- gen_icount_start();
+ gen_tb_start();
for(;;) {
if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
}
if (tb->cflags & CF_LAST_IO)
gen_io_end();
- gen_icount_end(tb, num_insns);
+ gen_tb_end(tb, num_insns);
*tcg_ctx.gen_opc_ptr = INDEX_op_end;
/* we don't forget to fill the last values */
if (search_pc) {