/* Function prototype for gen_ functions for calling Neon helpers */
typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
+typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
+typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
+typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
+typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
/* initialize TCG globals. */
void a64_translate_init(void)
crn, crm, op0, op1, op2));
if (!ri) {
- /* Unknown register */
+ /* Unknown register; this might be a guest error or a QEMU
+ * unimplemented feature.
+ */
+ qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
+ "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
+ isread ? "read" : "write", op0, op1, crn, crm, op2);
unallocated_encoding(s);
return;
}
*/
static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
{
- unsupported_encoding(s, insn);
+ int u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ TCGv_ptr fpst;
+
+ /* For some ops (the FP ones), size[1] is part of the encoding.
+ * For ADDP strictly it is not but size[1] is always 1 for valid
+ * encodings.
+ */
+ opcode |= (extract32(size, 1, 1) << 5);
+
+ switch (opcode) {
+ case 0x3b: /* ADDP */
+ if (u || size != 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ TCGV_UNUSED_PTR(fpst);
+ break;
+ case 0xc: /* FMAXNMP */
+ case 0xd: /* FADDP */
+ case 0xf: /* FMAXP */
+ case 0x2c: /* FMINNMP */
+ case 0x2f: /* FMINP */
+ /* FP op, size[0] is 32 or 64 bit */
+ if (!u) {
+ unallocated_encoding(s);
+ return;
+ }
+ size = extract32(size, 0, 1) ? 3 : 2;
+ fpst = get_fpstatus_ptr();
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size == 3) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op1, rn, 0, MO_64);
+ read_vec_element(s, tcg_op2, rn, 1, MO_64);
+
+ switch (opcode) {
+ case 0x3b: /* ADDP */
+ tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
+ break;
+ case 0xc: /* FMAXNMP */
+ gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xd: /* FADDP */
+ gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xf: /* FMAXP */
+ gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2c: /* FMINNMP */
+ gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2f: /* FMINP */
+ gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_fp_dreg(s, rd, tcg_res);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ tcg_temp_free_i64(tcg_res);
+ } else {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
+ read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
+
+ switch (opcode) {
+ case 0xc: /* FMAXNMP */
+ gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xd: /* FADDP */
+ gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xf: /* FMAXP */
+ gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2c: /* FMINNMP */
+ gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2f: /* FMINP */
+ gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_fp_sreg(s, rd, tcg_res);
+
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ tcg_temp_free_i32(tcg_res);
+ }
+
+ if (!TCGV_IS_UNUSED_PTR(fpst)) {
+ tcg_temp_free_ptr(fpst);
+ }
+}
+
+/*
+ * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
+ *
+ * This code is handles the common shifting code and is used by both
+ * the vector and scalar code.
+ */
+static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
+ TCGv_i64 tcg_rnd, bool accumulate,
+ bool is_u, int size, int shift)
+{
+ bool extended_result = false;
+ bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
+ int ext_lshift = 0;
+ TCGv_i64 tcg_src_hi;
+
+ if (round && size == 3) {
+ extended_result = true;
+ ext_lshift = 64 - shift;
+ tcg_src_hi = tcg_temp_new_i64();
+ } else if (shift == 64) {
+ if (!accumulate && is_u) {
+ /* result is zero */
+ tcg_gen_movi_i64(tcg_res, 0);
+ return;
+ }
+ }
+
+ /* Deal with the rounding step */
+ if (round) {
+ if (extended_result) {
+ TCGv_i64 tcg_zero = tcg_const_i64(0);
+ if (!is_u) {
+ /* take care of sign extending tcg_res */
+ tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
+ tcg_gen_add2_i64(tcg_src, tcg_src_hi,
+ tcg_src, tcg_src_hi,
+ tcg_rnd, tcg_zero);
+ } else {
+ tcg_gen_add2_i64(tcg_src, tcg_src_hi,
+ tcg_src, tcg_zero,
+ tcg_rnd, tcg_zero);
+ }
+ tcg_temp_free_i64(tcg_zero);
+ } else {
+ tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
+ }
+ }
+
+ /* Now do the shift right */
+ if (round && extended_result) {
+ /* extended case, >64 bit precision required */
+ if (ext_lshift == 0) {
+ /* special case, only high bits matter */
+ tcg_gen_mov_i64(tcg_src, tcg_src_hi);
+ } else {
+ tcg_gen_shri_i64(tcg_src, tcg_src, shift);
+ tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
+ tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
+ }
+ } else {
+ if (is_u) {
+ if (shift == 64) {
+ /* essentially shifting in 64 zeros */
+ tcg_gen_movi_i64(tcg_src, 0);
+ } else {
+ tcg_gen_shri_i64(tcg_src, tcg_src, shift);
+ }
+ } else {
+ if (shift == 64) {
+ /* effectively extending the sign-bit */
+ tcg_gen_sari_i64(tcg_src, tcg_src, 63);
+ } else {
+ tcg_gen_sari_i64(tcg_src, tcg_src, shift);
+ }
+ }
+ }
+
+ if (accumulate) {
+ tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
+ } else {
+ tcg_gen_mov_i64(tcg_res, tcg_src);
+ }
+
+ if (extended_result) {
+ tcg_temp_free_i64(tcg_src_hi);
+ }
+}
+
+/* Common SHL/SLI - Shift left with an optional insert */
+static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
+ bool insert, int shift)
+{
+ if (insert) { /* SLI */
+ tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
+ } else { /* SHL */
+ tcg_gen_shli_i64(tcg_res, tcg_src, shift);
+ }
+}
+
+/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
+static void handle_scalar_simd_shri(DisasContext *s,
+ bool is_u, int immh, int immb,
+ int opcode, int rn, int rd)
+{
+ const int size = 3;
+ int immhb = immh << 3 | immb;
+ int shift = 2 * (8 << size) - immhb;
+ bool accumulate = false;
+ bool round = false;
+ TCGv_i64 tcg_rn;
+ TCGv_i64 tcg_rd;
+ TCGv_i64 tcg_round;
+
+ if (!extract32(immh, 3, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (opcode) {
+ case 0x02: /* SSRA / USRA (accumulate) */
+ accumulate = true;
+ break;
+ case 0x04: /* SRSHR / URSHR (rounding) */
+ round = true;
+ break;
+ case 0x06: /* SRSRA / URSRA (accum + rounding) */
+ accumulate = round = true;
+ break;
+ }
+
+ if (round) {
+ uint64_t round_const = 1ULL << (shift - 1);
+ tcg_round = tcg_const_i64(round_const);
+ } else {
+ TCGV_UNUSED_I64(tcg_round);
+ }
+
+ tcg_rn = read_fp_dreg(s, rn);
+ tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
+
+ handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+ accumulate, is_u, size, shift);
+
+ write_fp_dreg(s, rd, tcg_rd);
+
+ tcg_temp_free_i64(tcg_rn);
+ tcg_temp_free_i64(tcg_rd);
+ if (round) {
+ tcg_temp_free_i64(tcg_round);
+ }
+}
+
+/* SHL/SLI - Scalar shift left */
+static void handle_scalar_simd_shli(DisasContext *s, bool insert,
+ int immh, int immb, int opcode,
+ int rn, int rd)
+{
+ int size = 32 - clz32(immh) - 1;
+ int immhb = immh << 3 | immb;
+ int shift = immhb - (8 << size);
+ TCGv_i64 tcg_rn = new_tmp_a64(s);
+ TCGv_i64 tcg_rd = new_tmp_a64(s);
+
+ if (!extract32(immh, 3, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ tcg_rn = read_fp_dreg(s, rn);
+ tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
+
+ handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
+
+ write_fp_dreg(s, rd, tcg_rd);
+
+ tcg_temp_free_i64(tcg_rn);
+ tcg_temp_free_i64(tcg_rd);
}
/* C3.6.9 AdvSIMD scalar shift by immediate
* +-----+---+-------------+------+------+--------+---+------+------+
* | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
* +-----+---+-------------+------+------+--------+---+------+------+
+ *
+ * This is the scalar version so it works on a fixed sized registers
*/
static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
{
- unsupported_encoding(s, insn);
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 11, 5);
+ int immb = extract32(insn, 16, 3);
+ int immh = extract32(insn, 19, 4);
+ bool is_u = extract32(insn, 29, 1);
+
+ switch (opcode) {
+ case 0x00: /* SSHR / USHR */
+ case 0x02: /* SSRA / USRA */
+ case 0x04: /* SRSHR / URSHR */
+ case 0x06: /* SRSRA / URSRA */
+ handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
+ break;
+ case 0x0a: /* SHL / SLI */
+ handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
+ break;
+ default:
+ unsupported_encoding(s, insn);
+ break;
+ }
}
/* C3.6.10 AdvSIMD scalar three different
*/
static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
{
- unsupported_encoding(s, insn);
+ bool is_u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 4);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+
+ if (is_u) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (opcode) {
+ case 0x9: /* SQDMLAL, SQDMLAL2 */
+ case 0xb: /* SQDMLSL, SQDMLSL2 */
+ case 0xd: /* SQDMULL, SQDMULL2 */
+ if (size == 0 || size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size == 2) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
+ read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
+
+ tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
+ gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
+
+ switch (opcode) {
+ case 0xd: /* SQDMULL, SQDMULL2 */
+ break;
+ case 0xb: /* SQDMLSL, SQDMLSL2 */
+ tcg_gen_neg_i64(tcg_res, tcg_res);
+ /* fall through */
+ case 0x9: /* SQDMLAL, SQDMLAL2 */
+ read_vec_element(s, tcg_op1, rd, 0, MO_64);
+ gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
+ tcg_res, tcg_op1);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_fp_dreg(s, rd, tcg_res);
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ tcg_temp_free_i64(tcg_res);
+ } else {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
+ read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
+
+ gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
+ gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
+
+ switch (opcode) {
+ case 0xd: /* SQDMULL, SQDMULL2 */
+ break;
+ case 0xb: /* SQDMLSL, SQDMLSL2 */
+ gen_helper_neon_negl_u32(tcg_res, tcg_res);
+ /* fall through */
+ case 0x9: /* SQDMLAL, SQDMLAL2 */
+ {
+ TCGv_i64 tcg_op3 = tcg_temp_new_i64();
+ read_vec_element(s, tcg_op3, rd, 0, MO_32);
+ gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
+ tcg_res, tcg_op3);
+ tcg_temp_free_i64(tcg_op3);
+ break;
+ }
+ default:
+ g_assert_not_reached();
+ }
+
+ tcg_gen_ext32u_i64(tcg_res, tcg_res);
+ write_fp_dreg(s, rd, tcg_res);
+
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ tcg_temp_free_i64(tcg_res);
+ }
}
static void handle_3same_64(DisasContext *s, int opcode, bool u,
TCGCond cond;
switch (opcode) {
+ case 0x1: /* SQADD */
+ if (u) {
+ gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ } else {
+ gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ }
+ break;
+ case 0x5: /* SQSUB */
+ if (u) {
+ gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ } else {
+ gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ }
+ break;
case 0x6: /* CMGT, CMHI */
/* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
* We implement this using setcond (test) and then negating.
tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
tcg_gen_neg_i64(tcg_rd, tcg_rd);
break;
- case 0x10: /* ADD, SUB */
+ case 0x8: /* SSHL, USHL */
if (u) {
- tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
+ gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
} else {
- tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
+ gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
}
break;
- case 0x1: /* SQADD */
- case 0x5: /* SQSUB */
- case 0x8: /* SSHL, USHL */
case 0x9: /* SQSHL, UQSHL */
+ if (u) {
+ gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ } else {
+ gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ }
+ break;
case 0xa: /* SRSHL, URSHL */
+ if (u) {
+ gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
+ } else {
+ gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
+ }
+ break;
case 0xb: /* SQRSHL, UQRSHL */
+ if (u) {
+ gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ } else {
+ gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
+ }
+ break;
+ case 0x10: /* ADD, SUB */
+ if (u) {
+ tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
+ } else {
+ tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
+ }
+ break;
default:
g_assert_not_reached();
}
}
-/* C3.6.11 AdvSIMD scalar three same
- * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
- * +-----+---+-----------+------+---+------+--------+---+------+------+
- * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
- * +-----+---+-----------+------+---+------+--------+---+------+------+
+/* Handle the 3-same-operands float operations; shared by the scalar
+ * and vector encodings. The caller must filter out any encodings
+ * not allocated for the encoding it is dealing with.
*/
-static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
+static void handle_3same_float(DisasContext *s, int size, int elements,
+ int fpopcode, int rd, int rn, int rm)
{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int opcode = extract32(insn, 11, 5);
- int rm = extract32(insn, 16, 5);
- int size = extract32(insn, 22, 2);
- bool u = extract32(insn, 29, 1);
- TCGv_i64 tcg_rn;
- TCGv_i64 tcg_rm;
- TCGv_i64 tcg_rd;
+ int pass;
+ TCGv_ptr fpst = get_fpstatus_ptr();
- if (opcode >= 0x18) {
- /* Floating point: U, size[1] and opcode indicate operation */
- int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
+ for (pass = 0; pass < elements; pass++) {
+ if (size) {
+ /* Double */
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op1, rn, pass, MO_64);
+ read_vec_element(s, tcg_op2, rm, pass, MO_64);
+
+ switch (fpopcode) {
+ case 0x39: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ gen_helper_vfp_negd(tcg_op1, tcg_op1);
+ /* fall through */
+ case 0x19: /* FMLA */
+ read_vec_element(s, tcg_res, rd, pass, MO_64);
+ gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
+ tcg_res, fpst);
+ break;
+ case 0x18: /* FMAXNM */
+ gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1a: /* FADD */
+ gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1b: /* FMULX */
+ gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1c: /* FCMEQ */
+ gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1e: /* FMAX */
+ gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1f: /* FRECPS */
+ gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x38: /* FMINNM */
+ gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3a: /* FSUB */
+ gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3e: /* FMIN */
+ gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3f: /* FRSQRTS */
+ gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5b: /* FMUL */
+ gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5c: /* FCMGE */
+ gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5d: /* FACGE */
+ gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5f: /* FDIV */
+ gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7a: /* FABD */
+ gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
+ gen_helper_vfp_absd(tcg_res, tcg_res);
+ break;
+ case 0x7c: /* FCMGT */
+ gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7d: /* FACGT */
+ gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_vec_element(s, tcg_res, rd, pass, MO_64);
+
+ tcg_temp_free_i64(tcg_res);
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ } else {
+ /* Single */
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
+ read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
+
+ switch (fpopcode) {
+ case 0x39: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ gen_helper_vfp_negs(tcg_op1, tcg_op1);
+ /* fall through */
+ case 0x19: /* FMLA */
+ read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
+ tcg_res, fpst);
+ break;
+ case 0x1a: /* FADD */
+ gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1b: /* FMULX */
+ gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1c: /* FCMEQ */
+ gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1e: /* FMAX */
+ gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1f: /* FRECPS */
+ gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x18: /* FMAXNM */
+ gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x38: /* FMINNM */
+ gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3a: /* FSUB */
+ gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3e: /* FMIN */
+ gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3f: /* FRSQRTS */
+ gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5b: /* FMUL */
+ gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5c: /* FCMGE */
+ gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5d: /* FACGE */
+ gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5f: /* FDIV */
+ gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7a: /* FABD */
+ gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
+ gen_helper_vfp_abss(tcg_res, tcg_res);
+ break;
+ case 0x7c: /* FCMGT */
+ gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7d: /* FACGT */
+ gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (elements == 1) {
+ /* scalar single so clear high part */
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+ tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
+ write_vec_element(s, tcg_tmp, rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_tmp);
+ } else {
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ }
+
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ }
+ }
+
+ tcg_temp_free_ptr(fpst);
+
+ if ((elements << size) < 4) {
+ /* scalar, or non-quad vector op */
+ clear_vec_high(s, rd);
+ }
+}
+
+/* C3.6.11 AdvSIMD scalar three same
+ * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
+ * +-----+---+-----------+------+---+------+--------+---+------+------+
+ * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
+ * +-----+---+-----------+------+---+------+--------+---+------+------+
+ */
+static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 11, 5);
+ int rm = extract32(insn, 16, 5);
+ int size = extract32(insn, 22, 2);
+ bool u = extract32(insn, 29, 1);
+ TCGv_i64 tcg_rd;
+
+ if (opcode >= 0x18) {
+ /* Floating point: U, size[1] and opcode indicate operation */
+ int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
switch (fpopcode) {
case 0x1b: /* FMULX */
- case 0x1c: /* FCMEQ */
case 0x1f: /* FRECPS */
case 0x3f: /* FRSQRTS */
- case 0x5c: /* FCMGE */
case 0x5d: /* FACGE */
- case 0x7a: /* FABD */
- case 0x7c: /* FCMGT */
case 0x7d: /* FACGT */
- unsupported_encoding(s, insn);
- return;
+ case 0x1c: /* FCMEQ */
+ case 0x5c: /* FCMGE */
+ case 0x7c: /* FCMGT */
+ case 0x7a: /* FABD */
+ break;
default:
unallocated_encoding(s);
return;
}
+
+ handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
+ return;
}
switch (opcode) {
case 0x1: /* SQADD, UQADD */
case 0x5: /* SQSUB, UQSUB */
+ case 0x9: /* SQSHL, UQSHL */
+ case 0xb: /* SQRSHL, UQRSHL */
+ break;
case 0x8: /* SSHL, USHL */
case 0xa: /* SRSHL, URSHL */
- unsupported_encoding(s, insn);
- return;
case 0x6: /* CMGT, CMHI */
case 0x7: /* CMGE, CMHS */
case 0x11: /* CMTST, CMEQ */
return;
}
break;
- case 0x9: /* SQSHL, UQSHL */
- case 0xb: /* SQRSHL, UQRSHL */
- unsupported_encoding(s, insn);
- return;
case 0x16: /* SQDMULH, SQRDMULH (vector) */
if (size != 1 && size != 2) {
unallocated_encoding(s);
return;
}
- unsupported_encoding(s, insn);
- return;
+ break;
default:
unallocated_encoding(s);
return;
}
- tcg_rn = read_fp_dreg(s, rn); /* op1 */
- tcg_rm = read_fp_dreg(s, rm); /* op2 */
tcg_rd = tcg_temp_new_i64();
- /* For the moment we only support the opcodes which are
- * 64-bit-width only. The size != 3 cases will
- * be handled later when the relevant ops are implemented.
- */
- handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
+ if (size == 3) {
+ TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
+ TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
- write_fp_dreg(s, rd, tcg_rd);
+ handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
+ tcg_temp_free_i64(tcg_rn);
+ tcg_temp_free_i64(tcg_rm);
+ } else {
+ /* Do a single operation on the lowest element in the vector.
+ * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
+ * no side effects for all these operations.
+ * OPTME: special-purpose helpers would avoid doing some
+ * unnecessary work in the helper for the 8 and 16 bit cases.
+ */
+ NeonGenTwoOpEnvFn *genenvfn;
+ TCGv_i32 tcg_rn = tcg_temp_new_i32();
+ TCGv_i32 tcg_rm = tcg_temp_new_i32();
+ TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
- tcg_temp_free_i64(tcg_rn);
- tcg_temp_free_i64(tcg_rm);
- tcg_temp_free_i64(tcg_rd);
-}
+ read_vec_element_i32(s, tcg_rn, rn, 0, size);
+ read_vec_element_i32(s, tcg_rm, rm, 0, size);
-/* C3.6.12 AdvSIMD scalar two reg misc
- * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
- * +-----+---+-----------+------+-----------+--------+-----+------+------+
- * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
- * +-----+---+-----------+------+-----------+--------+-----+------+------+
- */
-static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
-{
- unsupported_encoding(s, insn);
-}
+ switch (opcode) {
+ case 0x1: /* SQADD, UQADD */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
+ { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
+ { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0x5: /* SQSUB, UQSUB */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
+ { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
+ { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0x9: /* SQSHL, UQSHL */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
+ { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
+ { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0xb: /* SQRSHL, UQRSHL */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
+ { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
+ { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0x16: /* SQDMULH, SQRDMULH */
+ {
+ static NeonGenTwoOpEnvFn * const fns[2][2] = {
+ { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
+ { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
+ };
+ assert(size == 1 || size == 2);
+ genenvfn = fns[size - 1][u];
+ break;
+ }
+ default:
+ g_assert_not_reached();
+ }
-/* C3.6.13 AdvSIMD scalar x indexed element
- * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
- * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
- * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
- * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
- */
-static void disas_simd_scalar_indexed(DisasContext *s, uint32_t insn)
-{
- unsupported_encoding(s, insn);
-}
+ genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
+ tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
+ tcg_temp_free_i32(tcg_rd32);
+ tcg_temp_free_i32(tcg_rn);
+ tcg_temp_free_i32(tcg_rm);
+ }
-/* C3.6.14 AdvSIMD shift by immediate
- * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
- * +---+---+---+-------------+------+------+--------+---+------+------+
- * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
- * +---+---+---+-------------+------+------+--------+---+------+------+
- */
-static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
-{
- unsupported_encoding(s, insn);
+ write_fp_dreg(s, rd, tcg_rd);
+
+ tcg_temp_free_i64(tcg_rd);
}
-static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
- int opcode, int rd, int rn, int rm)
+static void handle_2misc_64(DisasContext *s, int opcode, bool u,
+ TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
{
- /* 3-reg-different widening insns: 64 x 64 -> 128 */
- TCGv_i64 tcg_res[2];
- int pass, accop;
-
- tcg_res[0] = tcg_temp_new_i64();
- tcg_res[1] = tcg_temp_new_i64();
-
- /* Does this op do an adding accumulate, a subtracting accumulate,
- * or no accumulate at all?
+ /* Handle 64->64 opcodes which are shared between the scalar and
+ * vector 2-reg-misc groups. We cover every integer opcode where size == 3
+ * is valid in either group and also the double-precision fp ops.
*/
+ TCGCond cond;
+
switch (opcode) {
- case 5:
- case 8:
- case 9:
- accop = 1;
+ case 0x5: /* NOT */
+ /* This opcode is shared with CNT and RBIT but we have earlier
+ * enforced that size == 3 if and only if this is the NOT insn.
+ */
+ tcg_gen_not_i64(tcg_rd, tcg_rn);
break;
- case 10:
- case 11:
- accop = -1;
+ case 0xa: /* CMLT */
+ /* 64 bit integer comparison against zero, result is
+ * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
+ * subtracting 1.
+ */
+ cond = TCG_COND_LT;
+ do_cmop:
+ tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
+ tcg_gen_neg_i64(tcg_rd, tcg_rd);
break;
- default:
- accop = 0;
+ case 0x8: /* CMGT, CMGE */
+ cond = u ? TCG_COND_GE : TCG_COND_GT;
+ goto do_cmop;
+ case 0x9: /* CMEQ, CMLE */
+ cond = u ? TCG_COND_LE : TCG_COND_EQ;
+ goto do_cmop;
+ case 0xb: /* ABS, NEG */
+ if (u) {
+ tcg_gen_neg_i64(tcg_rd, tcg_rn);
+ } else {
+ TCGv_i64 tcg_zero = tcg_const_i64(0);
+ tcg_gen_neg_i64(tcg_rd, tcg_rn);
+ tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
+ tcg_rn, tcg_rd);
+ tcg_temp_free_i64(tcg_zero);
+ }
break;
+ case 0x2f: /* FABS */
+ gen_helper_vfp_absd(tcg_rd, tcg_rn);
+ break;
+ case 0x6f: /* FNEG */
+ gen_helper_vfp_negd(tcg_rd, tcg_rn);
+ break;
+ default:
+ g_assert_not_reached();
}
+}
- if (accop != 0) {
- read_vec_element(s, tcg_res[0], rd, 0, MO_64);
- read_vec_element(s, tcg_res[1], rd, 1, MO_64);
- }
-
- /* size == 2 means two 32x32->64 operations; this is worth special
- * casing because we can generally handle it inline.
- */
- if (size == 2) {
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 tcg_op1 = tcg_temp_new_i64();
- TCGv_i64 tcg_op2 = tcg_temp_new_i64();
- TCGv_i64 tcg_passres;
- TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
+static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
+ bool is_scalar, bool is_u, bool is_q,
+ int size, int rn, int rd)
+{
+ bool is_double = (size == 3);
+ TCGv_ptr fpst = get_fpstatus_ptr();
- int elt = pass + is_q * 2;
+ if (is_double) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ TCGv_i64 tcg_zero = tcg_const_i64(0);
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+ NeonGenTwoDoubleOPFn *genfn;
+ bool swap = false;
+ int pass;
- read_vec_element(s, tcg_op1, rn, elt, memop);
- read_vec_element(s, tcg_op2, rm, elt, memop);
+ switch (opcode) {
+ case 0x2e: /* FCMLT (zero) */
+ swap = true;
+ /* fallthrough */
+ case 0x2c: /* FCMGT (zero) */
+ genfn = gen_helper_neon_cgt_f64;
+ break;
+ case 0x2d: /* FCMEQ (zero) */
+ genfn = gen_helper_neon_ceq_f64;
+ break;
+ case 0x6d: /* FCMLE (zero) */
+ swap = true;
+ /* fall through */
+ case 0x6c: /* FCMGE (zero) */
+ genfn = gen_helper_neon_cge_f64;
+ break;
+ default:
+ g_assert_not_reached();
+ }
- if (accop == 0) {
- tcg_passres = tcg_res[pass];
+ for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+ if (swap) {
+ genfn(tcg_res, tcg_zero, tcg_op, fpst);
} else {
- tcg_passres = tcg_temp_new_i64();
+ genfn(tcg_res, tcg_op, tcg_zero, fpst);
}
+ write_vec_element(s, tcg_res, rd, pass, MO_64);
+ }
+ if (is_scalar) {
+ clear_vec_high(s, rd);
+ }
- switch (opcode) {
- case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
+ tcg_temp_free_i64(tcg_res);
+ tcg_temp_free_i64(tcg_zero);
+ tcg_temp_free_i64(tcg_op);
+ } else {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_zero = tcg_const_i32(0);
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+ NeonGenTwoSingleOPFn *genfn;
+ bool swap = false;
+ int pass, maxpasses;
+
+ switch (opcode) {
+ case 0x2e: /* FCMLT (zero) */
+ swap = true;
+ /* fall through */
+ case 0x2c: /* FCMGT (zero) */
+ genfn = gen_helper_neon_cgt_f32;
+ break;
+ case 0x2d: /* FCMEQ (zero) */
+ genfn = gen_helper_neon_ceq_f32;
+ break;
+ case 0x6d: /* FCMLE (zero) */
+ swap = true;
+ /* fall through */
+ case 0x6c: /* FCMGE (zero) */
+ genfn = gen_helper_neon_cge_f32;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (is_scalar) {
+ maxpasses = 1;
+ } else {
+ maxpasses = is_q ? 4 : 2;
+ }
+
+ for (pass = 0; pass < maxpasses; pass++) {
+ read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+ if (swap) {
+ genfn(tcg_res, tcg_zero, tcg_op, fpst);
+ } else {
+ genfn(tcg_res, tcg_op, tcg_zero, fpst);
+ }
+ if (is_scalar) {
+ write_fp_sreg(s, rd, tcg_res);
+ } else {
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ }
+ }
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_zero);
+ tcg_temp_free_i32(tcg_op);
+ if (!is_q && !is_scalar) {
+ clear_vec_high(s, rd);
+ }
+ }
+
+ tcg_temp_free_ptr(fpst);
+}
+
+/* C3.6.12 AdvSIMD scalar two reg misc
+ * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
+ * +-----+---+-----------+------+-----------+--------+-----+------+------+
+ * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
+ * +-----+---+-----------+------+-----------+--------+-----+------+------+
+ */
+static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 12, 5);
+ int size = extract32(insn, 22, 2);
+ bool u = extract32(insn, 29, 1);
+
+ switch (opcode) {
+ case 0xa: /* CMLT */
+ if (u) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x8: /* CMGT, CMGE */
+ case 0x9: /* CMEQ, CMLE */
+ case 0xb: /* ABS, NEG */
+ if (size != 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0xc ... 0xf:
+ case 0x16 ... 0x1d:
+ case 0x1f:
+ /* Floating point: U, size[1] and opcode indicate operation;
+ * size[0] indicates single or double precision.
+ */
+ opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
+ size = extract32(size, 0, 1) ? 3 : 2;
+ switch (opcode) {
+ case 0x2c: /* FCMGT (zero) */
+ case 0x2d: /* FCMEQ (zero) */
+ case 0x2e: /* FCMLT (zero) */
+ case 0x6c: /* FCMGE (zero) */
+ case 0x6d: /* FCMLE (zero) */
+ handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
+ return;
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x1d: /* SCVTF */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ case 0x3d: /* FRECPE */
+ case 0x3f: /* FRECPX */
+ case 0x56: /* FCVTXN, FCVTXN2 */
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x5d: /* UCVTF */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ case 0x7d: /* FRSQRTE */
+ unsupported_encoding(s, insn);
+ return;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ default:
+ /* Other categories of encoding in this class:
+ * + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64
+ * + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2:
+ * narrowing saturate ops: size 64/32/16 -> 32/16/8
+ */
+ unsupported_encoding(s, insn);
+ return;
+ }
+
+ if (size == 3) {
+ TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
+ TCGv_i64 tcg_rd = tcg_temp_new_i64();
+
+ handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn);
+ write_fp_dreg(s, rd, tcg_rd);
+ tcg_temp_free_i64(tcg_rd);
+ tcg_temp_free_i64(tcg_rn);
+ } else {
+ /* the 'size might not be 64' ops aren't implemented yet */
+ g_assert_not_reached();
+ }
+}
+
+/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
+static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
+ int immh, int immb, int opcode, int rn, int rd)
+{
+ int size = 32 - clz32(immh) - 1;
+ int immhb = immh << 3 | immb;
+ int shift = 2 * (8 << size) - immhb;
+ bool accumulate = false;
+ bool round = false;
+ int dsize = is_q ? 128 : 64;
+ int esize = 8 << size;
+ int elements = dsize/esize;
+ TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
+ TCGv_i64 tcg_rn = new_tmp_a64(s);
+ TCGv_i64 tcg_rd = new_tmp_a64(s);
+ TCGv_i64 tcg_round;
+ int i;
+
+ if (extract32(immh, 3, 1) && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size > 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (opcode) {
+ case 0x02: /* SSRA / USRA (accumulate) */
+ accumulate = true;
+ break;
+ case 0x04: /* SRSHR / URSHR (rounding) */
+ round = true;
+ break;
+ case 0x06: /* SRSRA / URSRA (accum + rounding) */
+ accumulate = round = true;
+ break;
+ }
+
+ if (round) {
+ uint64_t round_const = 1ULL << (shift - 1);
+ tcg_round = tcg_const_i64(round_const);
+ } else {
+ TCGV_UNUSED_I64(tcg_round);
+ }
+
+ for (i = 0; i < elements; i++) {
+ read_vec_element(s, tcg_rn, rn, i, memop);
+ if (accumulate) {
+ read_vec_element(s, tcg_rd, rd, i, memop);
+ }
+
+ handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+ accumulate, is_u, size, shift);
+
+ write_vec_element(s, tcg_rd, rd, i, size);
+ }
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+
+ if (round) {
+ tcg_temp_free_i64(tcg_round);
+ }
+}
+
+/* SHL/SLI - Vector shift left */
+static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
+ int immh, int immb, int opcode, int rn, int rd)
+{
+ int size = 32 - clz32(immh) - 1;
+ int immhb = immh << 3 | immb;
+ int shift = immhb - (8 << size);
+ int dsize = is_q ? 128 : 64;
+ int esize = 8 << size;
+ int elements = dsize/esize;
+ TCGv_i64 tcg_rn = new_tmp_a64(s);
+ TCGv_i64 tcg_rd = new_tmp_a64(s);
+ int i;
+
+ if (extract32(immh, 3, 1) && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size > 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ for (i = 0; i < elements; i++) {
+ read_vec_element(s, tcg_rn, rn, i, size);
+ if (insert) {
+ read_vec_element(s, tcg_rd, rd, i, size);
+ }
+
+ handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
+
+ write_vec_element(s, tcg_rd, rd, i, size);
+ }
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+}
+
+/* USHLL/SHLL - Vector shift left with widening */
+static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
+ int immh, int immb, int opcode, int rn, int rd)
+{
+ int size = 32 - clz32(immh) - 1;
+ int immhb = immh << 3 | immb;
+ int shift = immhb - (8 << size);
+ int dsize = 64;
+ int esize = 8 << size;
+ int elements = dsize/esize;
+ TCGv_i64 tcg_rn = new_tmp_a64(s);
+ TCGv_i64 tcg_rd = new_tmp_a64(s);
+ int i;
+
+ if (size >= 3) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* For the LL variants the store is larger than the load,
+ * so if rd == rn we would overwrite parts of our input.
+ * So load everything right now and use shifts in the main loop.
+ */
+ read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
+
+ for (i = 0; i < elements; i++) {
+ tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
+ ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
+ tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
+ write_vec_element(s, tcg_rd, rd, i, size + 1);
+ }
+}
+
+
+/* C3.6.14 AdvSIMD shift by immediate
+ * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
+ * +---+---+---+-------------+------+------+--------+---+------+------+
+ * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
+ * +---+---+---+-------------+------+------+--------+---+------+------+
+ */
+static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 11, 5);
+ int immb = extract32(insn, 16, 3);
+ int immh = extract32(insn, 19, 4);
+ bool is_u = extract32(insn, 29, 1);
+ bool is_q = extract32(insn, 30, 1);
+
+ switch (opcode) {
+ case 0x00: /* SSHR / USHR */
+ case 0x02: /* SSRA / USRA (accumulate) */
+ case 0x04: /* SRSHR / URSHR (rounding) */
+ case 0x06: /* SRSRA / URSRA (accum + rounding) */
+ handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
+ break;
+ case 0x0a: /* SHL / SLI */
+ handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
+ break;
+ case 0x14: /* SSHLL / USHLL */
+ handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
+ break;
+ default:
+ /* We don't currently implement any of the Narrow or saturating shifts;
+ * nor do we implement the fixed-point conversions in this
+ * encoding group (SCVTF, FCVTZS, UCVTF, FCVTZU).
+ */
+ unsupported_encoding(s, insn);
+ return;
+ }
+}
+
+static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
+ int opcode, int rd, int rn, int rm)
+{
+ /* 3-reg-different widening insns: 64 x 64 -> 128 */
+ TCGv_i64 tcg_res[2];
+ int pass, accop;
+
+ tcg_res[0] = tcg_temp_new_i64();
+ tcg_res[1] = tcg_temp_new_i64();
+
+ /* Does this op do an adding accumulate, a subtracting accumulate,
+ * or no accumulate at all?
+ */
+ switch (opcode) {
+ case 5:
+ case 8:
+ case 9:
+ accop = 1;
+ break;
+ case 10:
+ case 11:
+ accop = -1;
+ break;
+ default:
+ accop = 0;
+ break;
+ }
+
+ if (accop != 0) {
+ read_vec_element(s, tcg_res[0], rd, 0, MO_64);
+ read_vec_element(s, tcg_res[1], rd, 1, MO_64);
+ }
+
+ /* size == 2 means two 32x32->64 operations; this is worth special
+ * casing because we can generally handle it inline.
+ */
+ if (size == 2) {
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ TCGv_i64 tcg_passres;
+ TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
+
+ int elt = pass + is_q * 2;
+
+ read_vec_element(s, tcg_op1, rn, elt, memop);
+ read_vec_element(s, tcg_op2, rm, elt, memop);
+
+ if (accop == 0) {
+ tcg_passres = tcg_res[pass];
+ } else {
+ tcg_passres = tcg_temp_new_i64();
+ }
+
+ switch (opcode) {
+ case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
{
TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
tcg_temp_free_i64(tcg_res[1]);
}
-/* Pairwise op subgroup of C3.6.16. */
-static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
+/* Helper functions for 32 bit comparisons */
+static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
{
- unsupported_encoding(s, insn);
+ tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
}
-/* Floating point op subgroup of C3.6.16. */
-static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
+static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
{
- unsupported_encoding(s, insn);
+ tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
}
-/* Integer op subgroup of C3.6.16. */
-static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
+static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
{
- int is_q = extract32(insn, 30, 1);
- int u = extract32(insn, 29, 1);
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 11, 5);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
+ tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
+}
+
+static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
+{
+ tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
+}
+
+/* Pairwise op subgroup of C3.6.16.
+ *
+ * This is called directly or via the handle_3same_float for float pairwise
+ * operations where the opcode and size are calculated differently.
+ */
+static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
+ int size, int rn, int rm, int rd)
+{
+ TCGv_ptr fpst;
int pass;
- switch (opcode) {
- case 0x13: /* MUL, PMUL */
- if (u && size != 0) {
- unallocated_encoding(s);
- return;
+ /* Floating point operations need fpst */
+ if (opcode >= 0x58) {
+ fpst = get_fpstatus_ptr();
+ } else {
+ TCGV_UNUSED_PTR(fpst);
+ }
+
+ /* These operations work on the concatenated rm:rn, with each pair of
+ * adjacent elements being operated on to produce an element in the result.
+ */
+ if (size == 3) {
+ TCGv_i64 tcg_res[2];
+
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+ int passreg = (pass == 0) ? rn : rm;
+
+ read_vec_element(s, tcg_op1, passreg, 0, MO_64);
+ read_vec_element(s, tcg_op2, passreg, 1, MO_64);
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ switch (opcode) {
+ case 0x17: /* ADDP */
+ tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
+ break;
+ case 0x58: /* FMAXNMP */
+ gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5a: /* FADDP */
+ gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5e: /* FMAXP */
+ gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x78: /* FMINNMP */
+ gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7e: /* FMINP */
+ gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_res[pass]);
+ }
+ } else {
+ int maxpass = is_q ? 4 : 2;
+ TCGv_i32 tcg_res[4];
+
+ for (pass = 0; pass < maxpass; pass++) {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ NeonGenTwoOpFn *genfn = NULL;
+ int passreg = pass < (maxpass / 2) ? rn : rm;
+ int passelt = (is_q && (pass & 1)) ? 2 : 0;
+
+ read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
+ read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
+ tcg_res[pass] = tcg_temp_new_i32();
+
+ switch (opcode) {
+ case 0x17: /* ADDP */
+ {
+ static NeonGenTwoOpFn * const fns[3] = {
+ gen_helper_neon_padd_u8,
+ gen_helper_neon_padd_u16,
+ tcg_gen_add_i32,
+ };
+ genfn = fns[size];
+ break;
+ }
+ case 0x14: /* SMAXP, UMAXP */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
+ { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
+ { gen_max_s32, gen_max_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x15: /* SMINP, UMINP */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
+ { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
+ { gen_min_s32, gen_min_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ /* The FP operations are all on single floats (32 bit) */
+ case 0x58: /* FMAXNMP */
+ gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5a: /* FADDP */
+ gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5e: /* FMAXP */
+ gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x78: /* FMINNMP */
+ gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7e: /* FMINP */
+ gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* FP ops called directly, otherwise call now */
+ if (genfn) {
+ genfn(tcg_res[pass], tcg_op1, tcg_op2);
+ }
+
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ }
+
+ for (pass = 0; pass < maxpass; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
+ tcg_temp_free_i32(tcg_res[pass]);
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+ }
+
+ if (!TCGV_IS_UNUSED_PTR(fpst)) {
+ tcg_temp_free_ptr(fpst);
+ }
+}
+
+/* Floating point op subgroup of C3.6.16. */
+static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
+{
+ /* For floating point ops, the U, size[1] and opcode bits
+ * together indicate the operation. size[0] indicates single
+ * or double.
+ */
+ int fpopcode = extract32(insn, 11, 5)
+ | (extract32(insn, 23, 1) << 5)
+ | (extract32(insn, 29, 1) << 6);
+ int is_q = extract32(insn, 30, 1);
+ int size = extract32(insn, 22, 1);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+
+ int datasize = is_q ? 128 : 64;
+ int esize = 32 << size;
+ int elements = datasize / esize;
+
+ if (size == 1 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (fpopcode) {
+ case 0x58: /* FMAXNMP */
+ case 0x5a: /* FADDP */
+ case 0x5e: /* FMAXP */
+ case 0x78: /* FMINNMP */
+ case 0x7e: /* FMINP */
+ if (size && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
+ rn, rm, rd);
+ return;
+ case 0x1b: /* FMULX */
+ case 0x1f: /* FRECPS */
+ case 0x3f: /* FRSQRTS */
+ case 0x5d: /* FACGE */
+ case 0x7d: /* FACGT */
+ case 0x19: /* FMLA */
+ case 0x39: /* FMLS */
+ case 0x18: /* FMAXNM */
+ case 0x1a: /* FADD */
+ case 0x1c: /* FCMEQ */
+ case 0x1e: /* FMAX */
+ case 0x38: /* FMINNM */
+ case 0x3a: /* FSUB */
+ case 0x3e: /* FMIN */
+ case 0x5b: /* FMUL */
+ case 0x5c: /* FCMGE */
+ case 0x5f: /* FDIV */
+ case 0x7a: /* FABD */
+ case 0x7c: /* FCMGT */
+ handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
+ return;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+}
+
+/* Integer op subgroup of C3.6.16. */
+static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
+{
+ int is_q = extract32(insn, 30, 1);
+ int u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 11, 5);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ int pass;
+
+ switch (opcode) {
+ case 0x13: /* MUL, PMUL */
+ if (u && size != 0) {
+ unallocated_encoding(s);
+ return;
}
/* fall through */
case 0x0: /* SHADD, UHADD */
unallocated_encoding(s);
return;
}
- unsupported_encoding(s, insn);
- return;
- case 0x1: /* SQADD */
- case 0x5: /* SQSUB */
- case 0x8: /* SSHL, USHL */
- case 0x9: /* SQSHL, UQSHL */
- case 0xa: /* SRSHL, URSHL */
- case 0xb: /* SQRSHL, UQRSHL */
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- unsupported_encoding(s, insn);
- return;
+ break;
case 0x16: /* SQDMULH, SQRDMULH */
if (size == 0 || size == 3) {
unallocated_encoding(s);
return;
}
- unsupported_encoding(s, insn);
- return;
+ break;
default:
if (size == 3 && !is_q) {
unallocated_encoding(s);
TCGv_i32 tcg_op1 = tcg_temp_new_i32();
TCGv_i32 tcg_op2 = tcg_temp_new_i32();
TCGv_i32 tcg_res = tcg_temp_new_i32();
- NeonGenTwoOpFn *genfn;
+ NeonGenTwoOpFn *genfn = NULL;
+ NeonGenTwoOpEnvFn *genenvfn = NULL;
read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
switch (opcode) {
+ case 0x0: /* SHADD, UHADD */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
+ { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
+ { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x1: /* SQADD, UQADD */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
+ { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
+ { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0x2: /* SRHADD, URHADD */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
+ { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
+ { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x4: /* SHSUB, UHSUB */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
+ { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
+ { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x5: /* SQSUB, UQSUB */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
+ { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
+ { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
case 0x6: /* CMGT, CMHI */
{
static NeonGenTwoOpFn * const fns[3][2] = {
genfn = fns[size][u];
break;
}
+ case 0x8: /* SSHL, USHL */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
+ { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
+ { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0x9: /* SQSHL, UQSHL */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
+ { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
+ { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0xa: /* SRSHL, URSHL */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
+ { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
+ { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0xb: /* SQRSHL, UQRSHL */
+ {
+ static NeonGenTwoOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
+ { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
+ { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ case 0xc: /* SMAX, UMAX */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
+ { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
+ { gen_max_s32, gen_max_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+
+ case 0xd: /* SMIN, UMIN */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
+ { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
+ { gen_min_s32, gen_min_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
+ case 0xe: /* SABD, UABD */
+ case 0xf: /* SABA, UABA */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
+ { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
+ { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
+ };
+ genfn = fns[size][u];
+ break;
+ }
case 0x10: /* ADD, SUB */
{
static NeonGenTwoOpFn * const fns[3][2] = {
genfn = fns[size][u];
break;
}
+ case 0x13: /* MUL, PMUL */
+ if (u) {
+ /* PMUL */
+ assert(size == 0);
+ genfn = gen_helper_neon_mul_p8;
+ break;
+ }
+ /* fall through : MUL */
+ case 0x12: /* MLA, MLS */
+ {
+ static NeonGenTwoOpFn * const fns[3] = {
+ gen_helper_neon_mul_u8,
+ gen_helper_neon_mul_u16,
+ tcg_gen_mul_i32,
+ };
+ genfn = fns[size];
+ break;
+ }
+ case 0x16: /* SQDMULH, SQRDMULH */
+ {
+ static NeonGenTwoOpEnvFn * const fns[2][2] = {
+ { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
+ { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
+ };
+ assert(size == 1 || size == 2);
+ genenvfn = fns[size - 1][u];
+ break;
+ }
default:
g_assert_not_reached();
}
- genfn(tcg_res, tcg_op1, tcg_op2);
+ if (genenvfn) {
+ genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
+ } else {
+ genfn(tcg_res, tcg_op1, tcg_op2);
+ }
+
+ if (opcode == 0xf || opcode == 0x12) {
+ /* SABA, UABA, MLA, MLS: accumulating ops */
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
+ { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
+ { tcg_gen_add_i32, tcg_gen_sub_i32 },
+ };
+ bool is_sub = (opcode == 0x12 && u); /* MLS */
+
+ genfn = fns[size][is_sub];
+ read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
+ genfn(tcg_res, tcg_res, tcg_op1);
+ }
write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
case 0x17: /* ADDP */
case 0x14: /* SMAXP, UMAXP */
case 0x15: /* SMINP, UMINP */
+ {
/* Pairwise operations */
- disas_simd_3same_pair(s, insn);
+ int is_q = extract32(insn, 30, 1);
+ int u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ if (opcode == 0x17) {
+ if (u || (size == 3 && !is_q)) {
+ unallocated_encoding(s);
+ return;
+ }
+ } else {
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ }
+ handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
break;
+ }
case 0x18 ... 0x31:
/* floating point ops, sz[1] and U are part of opcode */
disas_simd_3same_float(s, insn);
}
}
-/* C3.6.17 AdvSIMD two reg misc
- * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
- * +---+---+---+-----------+------+-----------+--------+-----+------+------+
- * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
- * +---+---+---+-----------+------+-----------+--------+-----+------+------+
- */
-static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
+static void handle_2misc_narrow(DisasContext *s, int opcode, bool u, bool is_q,
+ int size, int rn, int rd)
{
- unsupported_encoding(s, insn);
-}
+ /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
+ * in the source becomes a size element in the destination).
+ */
+ int pass;
+ TCGv_i32 tcg_res[2];
+ int destelt = is_q ? 2 : 0;
-/* C3.6.18 AdvSIMD vector x indexed element
- * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
- * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
- * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
- * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
- */
-static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
-{
- unsupported_encoding(s, insn);
+ for (pass = 0; pass < 2; pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ NeonGenNarrowFn *genfn = NULL;
+ NeonGenNarrowEnvFn *genenvfn = NULL;
+
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+ tcg_res[pass] = tcg_temp_new_i32();
+
+ switch (opcode) {
+ case 0x12: /* XTN, SQXTUN */
+ {
+ static NeonGenNarrowFn * const xtnfns[3] = {
+ gen_helper_neon_narrow_u8,
+ gen_helper_neon_narrow_u16,
+ tcg_gen_trunc_i64_i32,
+ };
+ static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
+ gen_helper_neon_unarrow_sat8,
+ gen_helper_neon_unarrow_sat16,
+ gen_helper_neon_unarrow_sat32,
+ };
+ if (u) {
+ genenvfn = sqxtunfns[size];
+ } else {
+ genfn = xtnfns[size];
+ }
+ break;
+ }
+ case 0x14: /* SQXTN, UQXTN */
+ {
+ static NeonGenNarrowEnvFn * const fns[3][2] = {
+ { gen_helper_neon_narrow_sat_s8,
+ gen_helper_neon_narrow_sat_u8 },
+ { gen_helper_neon_narrow_sat_s16,
+ gen_helper_neon_narrow_sat_u16 },
+ { gen_helper_neon_narrow_sat_s32,
+ gen_helper_neon_narrow_sat_u32 },
+ };
+ genenvfn = fns[size][u];
+ break;
+ }
+ default:
+ g_assert_not_reached();
+ }
+
+ if (genfn) {
+ genfn(tcg_res[pass], tcg_op);
+ } else {
+ genenvfn(tcg_res[pass], cpu_env, tcg_op);
+ }
+
+ tcg_temp_free_i64(tcg_op);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
+ tcg_temp_free_i32(tcg_res[pass]);
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+}
+
+static void handle_rev(DisasContext *s, int opcode, bool u,
+ bool is_q, int size, int rn, int rd)
+{
+ int op = (opcode << 1) | u;
+ int opsz = op + size;
+ int grp_size = 3 - opsz;
+ int dsize = is_q ? 128 : 64;
+ int i;
+
+ if (opsz >= 3) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size == 0) {
+ /* Special case bytes, use bswap op on each group of elements */
+ int groups = dsize / (8 << grp_size);
+
+ for (i = 0; i < groups; i++) {
+ TCGv_i64 tcg_tmp = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_tmp, rn, i, grp_size);
+ switch (grp_size) {
+ case MO_16:
+ tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
+ break;
+ case MO_32:
+ tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
+ break;
+ case MO_64:
+ tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ write_vec_element(s, tcg_tmp, rd, i, grp_size);
+ tcg_temp_free_i64(tcg_tmp);
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+ } else {
+ int revmask = (1 << grp_size) - 1;
+ int esize = 8 << size;
+ int elements = dsize / esize;
+ TCGv_i64 tcg_rn = tcg_temp_new_i64();
+ TCGv_i64 tcg_rd = tcg_const_i64(0);
+ TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
+
+ for (i = 0; i < elements; i++) {
+ int e_rev = (i & 0xf) ^ revmask;
+ int off = e_rev * esize;
+ read_vec_element(s, tcg_rn, rn, i, size);
+ if (off >= 64) {
+ tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
+ tcg_rn, off - 64, esize);
+ } else {
+ tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
+ }
+ }
+ write_vec_element(s, tcg_rd, rd, 0, MO_64);
+ write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
+
+ tcg_temp_free_i64(tcg_rd_hi);
+ tcg_temp_free_i64(tcg_rd);
+ tcg_temp_free_i64(tcg_rn);
+ }
+}
+
+/* C3.6.17 AdvSIMD two reg misc
+ * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
+ * +---+---+---+-----------+------+-----------+--------+-----+------+------+
+ * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
+ * +---+---+---+-----------+------+-----------+--------+-----+------+------+
+ */
+static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
+{
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 5);
+ bool u = extract32(insn, 29, 1);
+ bool is_q = extract32(insn, 30, 1);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+
+ switch (opcode) {
+ case 0x0: /* REV64, REV32 */
+ case 0x1: /* REV16 */
+ handle_rev(s, opcode, u, is_q, size, rn, rd);
+ return;
+ case 0x5: /* CNT, NOT, RBIT */
+ if (u && size == 0) {
+ /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
+ size = 3;
+ break;
+ } else if (u && size == 1) {
+ /* RBIT */
+ break;
+ } else if (!u && size == 0) {
+ /* CNT */
+ break;
+ }
+ unallocated_encoding(s);
+ return;
+ case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
+ case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_2misc_narrow(s, opcode, u, is_q, size, rn, rd);
+ return;
+ case 0x2: /* SADDLP, UADDLP */
+ case 0x4: /* CLS, CLZ */
+ case 0x6: /* SADALP, UADALP */
+ if (size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ unsupported_encoding(s, insn);
+ return;
+ case 0x13: /* SHLL, SHLL2 */
+ if (u == 0 || size == 3) {
+ unallocated_encoding(s);
+ return;
+ }
+ unsupported_encoding(s, insn);
+ return;
+ case 0xa: /* CMLT */
+ if (u == 1) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x8: /* CMGT, CMGE */
+ case 0x9: /* CMEQ, CMLE */
+ case 0xb: /* ABS, NEG */
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x3: /* SUQADD, USQADD */
+ case 0x7: /* SQABS, SQNEG */
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ unsupported_encoding(s, insn);
+ return;
+ case 0xc ... 0xf:
+ case 0x16 ... 0x1d:
+ case 0x1f:
+ {
+ /* Floating point: U, size[1] and opcode indicate operation;
+ * size[0] indicates single or double precision.
+ */
+ opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
+ size = extract32(size, 0, 1) ? 3 : 2;
+ switch (opcode) {
+ case 0x2f: /* FABS */
+ case 0x6f: /* FNEG */
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x2c: /* FCMGT (zero) */
+ case 0x2d: /* FCMEQ (zero) */
+ case 0x2e: /* FCMLT (zero) */
+ case 0x6c: /* FCMGE (zero) */
+ case 0x6d: /* FCMLE (zero) */
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
+ return;
+ case 0x16: /* FCVTN, FCVTN2 */
+ case 0x17: /* FCVTL, FCVTL2 */
+ case 0x18: /* FRINTN */
+ case 0x19: /* FRINTM */
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x1d: /* SCVTF */
+ case 0x38: /* FRINTP */
+ case 0x39: /* FRINTZ */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ case 0x3c: /* URECPE */
+ case 0x3d: /* FRECPE */
+ case 0x56: /* FCVTXN, FCVTXN2 */
+ case 0x58: /* FRINTA */
+ case 0x59: /* FRINTX */
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x5d: /* UCVTF */
+ case 0x79: /* FRINTI */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ case 0x7c: /* URSQRTE */
+ case 0x7d: /* FRSQRTE */
+ case 0x7f: /* FSQRT */
+ unsupported_encoding(s, insn);
+ return;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ }
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (size == 3) {
+ /* All 64-bit element operations can be shared with scalar 2misc */
+ int pass;
+
+ for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+
+ handle_2misc_64(s, opcode, u, tcg_res, tcg_op);
+
+ write_vec_element(s, tcg_res, rd, pass, MO_64);
+
+ tcg_temp_free_i64(tcg_res);
+ tcg_temp_free_i64(tcg_op);
+ }
+ } else {
+ int pass;
+
+ for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+ TCGCond cond;
+
+ read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+
+ if (size == 2) {
+ /* Special cases for 32 bit elements */
+ switch (opcode) {
+ case 0xa: /* CMLT */
+ /* 32 bit integer comparison against zero, result is
+ * test ? (2^32 - 1) : 0. We implement via setcond(test)
+ * and inverting.
+ */
+ cond = TCG_COND_LT;
+ do_cmop:
+ tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
+ tcg_gen_neg_i32(tcg_res, tcg_res);
+ break;
+ case 0x8: /* CMGT, CMGE */
+ cond = u ? TCG_COND_GE : TCG_COND_GT;
+ goto do_cmop;
+ case 0x9: /* CMEQ, CMLE */
+ cond = u ? TCG_COND_LE : TCG_COND_EQ;
+ goto do_cmop;
+ case 0xb: /* ABS, NEG */
+ if (u) {
+ tcg_gen_neg_i32(tcg_res, tcg_op);
+ } else {
+ TCGv_i32 tcg_zero = tcg_const_i32(0);
+ tcg_gen_neg_i32(tcg_res, tcg_op);
+ tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
+ tcg_zero, tcg_op, tcg_res);
+ tcg_temp_free_i32(tcg_zero);
+ }
+ break;
+ case 0x2f: /* FABS */
+ gen_helper_vfp_abss(tcg_res, tcg_op);
+ break;
+ case 0x6f: /* FNEG */
+ gen_helper_vfp_negs(tcg_res, tcg_op);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ /* Use helpers for 8 and 16 bit elements */
+ switch (opcode) {
+ case 0x5: /* CNT, RBIT */
+ /* For these two insns size is part of the opcode specifier
+ * (handled earlier); they always operate on byte elements.
+ */
+ if (u) {
+ gen_helper_neon_rbit_u8(tcg_res, tcg_op);
+ } else {
+ gen_helper_neon_cnt_u8(tcg_res, tcg_op);
+ }
+ break;
+ case 0x8: /* CMGT, CMGE */
+ case 0x9: /* CMEQ, CMLE */
+ case 0xa: /* CMLT */
+ {
+ static NeonGenTwoOpFn * const fns[3][2] = {
+ { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
+ { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
+ { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
+ };
+ NeonGenTwoOpFn *genfn;
+ int comp;
+ bool reverse;
+ TCGv_i32 tcg_zero = tcg_const_i32(0);
+
+ /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
+ comp = (opcode - 0x8) * 2 + u;
+ /* ...but LE, LT are implemented as reverse GE, GT */
+ reverse = (comp > 2);
+ if (reverse) {
+ comp = 4 - comp;
+ }
+ genfn = fns[comp][size];
+ if (reverse) {
+ genfn(tcg_res, tcg_zero, tcg_op);
+ } else {
+ genfn(tcg_res, tcg_op, tcg_zero);
+ }
+ tcg_temp_free_i32(tcg_zero);
+ break;
+ }
+ case 0xb: /* ABS, NEG */
+ if (u) {
+ TCGv_i32 tcg_zero = tcg_const_i32(0);
+ if (size) {
+ gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
+ } else {
+ gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
+ }
+ tcg_temp_free_i32(tcg_zero);
+ } else {
+ if (size) {
+ gen_helper_neon_abs_s16(tcg_res, tcg_op);
+ } else {
+ gen_helper_neon_abs_s8(tcg_res, tcg_op);
+ }
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op);
+ }
+ }
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+}
+
+/* C3.6.13 AdvSIMD scalar x indexed element
+ * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
+ * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
+ * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * C3.6.18 AdvSIMD vector x indexed element
+ * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
+ * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
+ * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
+ */
+static void disas_simd_indexed(DisasContext *s, uint32_t insn)
+{
+ /* This encoding has two kinds of instruction:
+ * normal, where we perform elt x idxelt => elt for each
+ * element in the vector
+ * long, where we perform elt x idxelt and generate a result of
+ * double the width of the input element
+ * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
+ */
+ bool is_scalar = extract32(insn, 28, 1);
+ bool is_q = extract32(insn, 30, 1);
+ bool u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int l = extract32(insn, 21, 1);
+ int m = extract32(insn, 20, 1);
+ /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
+ int rm = extract32(insn, 16, 4);
+ int opcode = extract32(insn, 12, 4);
+ int h = extract32(insn, 11, 1);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ bool is_long = false;
+ bool is_fp = false;
+ int index;
+ TCGv_ptr fpst;
+
+ switch (opcode) {
+ case 0x0: /* MLA */
+ case 0x4: /* MLS */
+ if (!u || is_scalar) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
+ if (is_scalar) {
+ unallocated_encoding(s);
+ return;
+ }
+ is_long = true;
+ break;
+ case 0x3: /* SQDMLAL, SQDMLAL2 */
+ case 0x7: /* SQDMLSL, SQDMLSL2 */
+ case 0xb: /* SQDMULL, SQDMULL2 */
+ is_long = true;
+ /* fall through */
+ case 0xc: /* SQDMULH */
+ case 0xd: /* SQRDMULH */
+ if (u) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x8: /* MUL */
+ if (u || is_scalar) {
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ case 0x1: /* FMLA */
+ case 0x5: /* FMLS */
+ if (u) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* fall through */
+ case 0x9: /* FMUL, FMULX */
+ if (!extract32(size, 1, 1)) {
+ unallocated_encoding(s);
+ return;
+ }
+ is_fp = true;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (is_fp) {
+ /* low bit of size indicates single/double */
+ size = extract32(size, 0, 1) ? 3 : 2;
+ if (size == 2) {
+ index = h << 1 | l;
+ } else {
+ if (l || !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ index = h;
+ }
+ rm |= (m << 4);
+ } else {
+ switch (size) {
+ case 1:
+ index = h << 2 | l << 1 | m;
+ break;
+ case 2:
+ index = h << 1 | l;
+ rm |= (m << 4);
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ }
+
+ if (is_fp) {
+ fpst = get_fpstatus_ptr();
+ } else {
+ TCGV_UNUSED_PTR(fpst);
+ }
+
+ if (size == 3) {
+ TCGv_i64 tcg_idx = tcg_temp_new_i64();
+ int pass;
+
+ assert(is_fp && is_q && !is_long);
+
+ read_vec_element(s, tcg_idx, rm, index, MO_64);
+
+ for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+
+ switch (opcode) {
+ case 0x5: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ gen_helper_vfp_negd(tcg_op, tcg_op);
+ /* fall through */
+ case 0x1: /* FMLA */
+ read_vec_element(s, tcg_res, rd, pass, MO_64);
+ gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+ break;
+ case 0x9: /* FMUL, FMULX */
+ if (u) {
+ gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
+ } else {
+ gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_vec_element(s, tcg_res, rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_op);
+ tcg_temp_free_i64(tcg_res);
+ }
+
+ if (is_scalar) {
+ clear_vec_high(s, rd);
+ }
+
+ tcg_temp_free_i64(tcg_idx);
+ } else if (!is_long) {
+ /* 32 bit floating point, or 16 or 32 bit integer.
+ * For the 16 bit scalar case we use the usual Neon helpers and
+ * rely on the fact that 0 op 0 == 0 with no side effects.
+ */
+ TCGv_i32 tcg_idx = tcg_temp_new_i32();
+ int pass, maxpasses;
+
+ if (is_scalar) {
+ maxpasses = 1;
+ } else {
+ maxpasses = is_q ? 4 : 2;
+ }
+
+ read_vec_element_i32(s, tcg_idx, rm, index, size);
+
+ if (size == 1 && !is_scalar) {
+ /* The simplest way to handle the 16x16 indexed ops is to duplicate
+ * the index into both halves of the 32 bit tcg_idx and then use
+ * the usual Neon helpers.
+ */
+ tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
+ }
+
+ for (pass = 0; pass < maxpasses; pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
+
+ switch (opcode) {
+ case 0x0: /* MLA */
+ case 0x4: /* MLS */
+ case 0x8: /* MUL */
+ {
+ static NeonGenTwoOpFn * const fns[2][2] = {
+ { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
+ { tcg_gen_add_i32, tcg_gen_sub_i32 },
+ };
+ NeonGenTwoOpFn *genfn;
+ bool is_sub = opcode == 0x4;
+
+ if (size == 1) {
+ gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
+ } else {
+ tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
+ }
+ if (opcode == 0x8) {
+ break;
+ }
+ read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
+ genfn = fns[size - 1][is_sub];
+ genfn(tcg_res, tcg_op, tcg_res);
+ break;
+ }
+ case 0x5: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ gen_helper_vfp_negs(tcg_op, tcg_op);
+ /* fall through */
+ case 0x1: /* FMLA */
+ read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+ break;
+ case 0x9: /* FMUL, FMULX */
+ if (u) {
+ gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
+ } else {
+ gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+ }
+ break;
+ case 0xc: /* SQDMULH */
+ if (size == 1) {
+ gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ } else {
+ gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ }
+ break;
+ case 0xd: /* SQRDMULH */
+ if (size == 1) {
+ gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ } else {
+ gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
+ tcg_op, tcg_idx);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ if (is_scalar) {
+ write_fp_sreg(s, rd, tcg_res);
+ } else {
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ }
+
+ tcg_temp_free_i32(tcg_op);
+ tcg_temp_free_i32(tcg_res);
+ }
+
+ tcg_temp_free_i32(tcg_idx);
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+ } else {
+ /* long ops: 16x16->32 or 32x32->64 */
+ TCGv_i64 tcg_res[2];
+ int pass;
+ bool satop = extract32(opcode, 0, 1);
+ TCGMemOp memop = MO_32;
+
+ if (satop || !u) {
+ memop |= MO_SIGN;
+ }
+
+ if (size == 2) {
+ TCGv_i64 tcg_idx = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_idx, rm, index, memop);
+
+ for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ TCGv_i64 tcg_passres;
+ int passelt;
+
+ if (is_scalar) {
+ passelt = 0;
+ } else {
+ passelt = pass + (is_q * 2);
+ }
+
+ read_vec_element(s, tcg_op, rn, passelt, memop);
+
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ if (opcode == 0xa || opcode == 0xb) {
+ /* Non-accumulating ops */
+ tcg_passres = tcg_res[pass];
+ } else {
+ tcg_passres = tcg_temp_new_i64();
+ }
+
+ tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
+ tcg_temp_free_i64(tcg_op);
+
+ if (satop) {
+ /* saturating, doubling */
+ gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ }
+
+ if (opcode == 0xa || opcode == 0xb) {
+ continue;
+ }
+
+ /* Accumulating op: handle accumulate step */
+ read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+
+ switch (opcode) {
+ case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+ break;
+ case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+ break;
+ case 0x7: /* SQDMLSL, SQDMLSL2 */
+ tcg_gen_neg_i64(tcg_passres, tcg_passres);
+ /* fall through */
+ case 0x3: /* SQDMLAL, SQDMLAL2 */
+ gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
+ tcg_res[pass],
+ tcg_passres);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_i64(tcg_passres);
+ }
+ tcg_temp_free_i64(tcg_idx);
+
+ if (is_scalar) {
+ clear_vec_high(s, rd);
+ }
+ } else {
+ TCGv_i32 tcg_idx = tcg_temp_new_i32();
+
+ assert(size == 1);
+ read_vec_element_i32(s, tcg_idx, rm, index, size);
+
+ if (!is_scalar) {
+ /* The simplest way to handle the 16x16 indexed ops is to
+ * duplicate the index into both halves of the 32 bit tcg_idx
+ * and then use the usual Neon helpers.
+ */
+ tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
+ }
+
+ for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i64 tcg_passres;
+
+ if (is_scalar) {
+ read_vec_element_i32(s, tcg_op, rn, pass, size);
+ } else {
+ read_vec_element_i32(s, tcg_op, rn,
+ pass + (is_q * 2), MO_32);
+ }
+
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ if (opcode == 0xa || opcode == 0xb) {
+ /* Non-accumulating ops */
+ tcg_passres = tcg_res[pass];
+ } else {
+ tcg_passres = tcg_temp_new_i64();
+ }
+
+ if (memop & MO_SIGN) {
+ gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
+ } else {
+ gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
+ }
+ if (satop) {
+ gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ }
+ tcg_temp_free_i32(tcg_op);
+
+ if (opcode == 0xa || opcode == 0xb) {
+ continue;
+ }
+
+ /* Accumulating op: handle accumulate step */
+ read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+
+ switch (opcode) {
+ case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+ gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
+ tcg_passres);
+ break;
+ case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+ gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
+ tcg_passres);
+ break;
+ case 0x7: /* SQDMLSL, SQDMLSL2 */
+ gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
+ /* fall through */
+ case 0x3: /* SQDMLAL, SQDMLAL2 */
+ gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
+ tcg_res[pass],
+ tcg_passres);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_i64(tcg_passres);
+ }
+ tcg_temp_free_i32(tcg_idx);
+
+ if (is_scalar) {
+ tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
+ }
+ }
+
+ if (is_scalar) {
+ tcg_res[1] = tcg_const_i64(0);
+ }
+
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_res[pass]);
+ }
+ }
+
+ if (!TCGV_IS_UNUSED_PTR(fpst)) {
+ tcg_temp_free_ptr(fpst);
+ }
}
/* C3.6.19 Crypto AES
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
{ 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
{ 0x0e000400, 0x9fe08400, disas_simd_copy },
- { 0x0f000000, 0x9f000400, disas_simd_indexed_vector },
+ { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
/* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
{ 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
{ 0x0f000400, 0x9f800400, disas_simd_shift_imm },
{ 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
{ 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
{ 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
- { 0x5f000000, 0xdf000400, disas_simd_scalar_indexed },
+ { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
{ 0x4e280800, 0xff3e0c00, disas_crypto_aes },
{ 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
qemu_log("----------------\n");
qemu_log("IN: %s\n", lookup_symbol(pc_start));
log_target_disas(env, pc_start, dc->pc - pc_start,
- dc->thumb | (dc->bswap_code << 1));
+ 4 | (dc->bswap_code << 1));
qemu_log("\n");
}
#endif