}
}
-/* 32-bit pairwise ops end up the same as the elementwise versions. */
-#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
-#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
-#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
-#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
-
-#define GEN_NEON_INTEGER_OP_ENV(name) do { \
- switch ((size << 1) | u) { \
- case 0: \
- gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 1: \
- gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 2: \
- gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 3: \
- gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 4: \
- gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
- break; \
- case 5: \
- gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
- break; \
- default: return 1; \
- }} while (0)
-
-#define GEN_NEON_INTEGER_OP(name) do { \
- switch ((size << 1) | u) { \
- case 0: \
- gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
- break; \
- case 1: \
- gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
- break; \
- case 2: \
- gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
- break; \
- case 3: \
- gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
- break; \
- case 4: \
- gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
- break; \
- case 5: \
- gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
- break; \
- default: return 1; \
- }} while (0)
-
static TCGv_i32 neon_load_scratch(int scratch)
{
TCGv_i32 tmp = tcg_temp_new_i32();
}
}
-static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
- int q, int u)
-{
- if (q) {
- if (u) {
- switch (size) {
- case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
- case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
- default: abort();
- }
- } else {
- switch (size) {
- case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
- case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
- default: abort();
- }
- }
- } else {
- if (u) {
- switch (size) {
- case 1: gen_helper_neon_shl_u16(var, var, shift); break;
- case 2: gen_ushl_i32(var, var, shift); break;
- default: abort();
- }
- } else {
- switch (size) {
- case 1: gen_helper_neon_shl_s16(var, var, shift); break;
- case 2: gen_sshl_i32(var, var, shift); break;
- default: abort();
- }
- }
- }
-}
-
static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
{
if (u) {
}
}
-/* Symbolic constants for op fields for Neon 3-register same-length.
- * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
- * table A7-9.
- */
-#define NEON_3R_VHADD 0
-#define NEON_3R_VQADD 1
-#define NEON_3R_VRHADD 2
-#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
-#define NEON_3R_VHSUB 4
-#define NEON_3R_VQSUB 5
-#define NEON_3R_VCGT 6
-#define NEON_3R_VCGE 7
-#define NEON_3R_VSHL 8
-#define NEON_3R_VQSHL 9
-#define NEON_3R_VRSHL 10
-#define NEON_3R_VQRSHL 11
-#define NEON_3R_VMAX 12
-#define NEON_3R_VMIN 13
-#define NEON_3R_VABD 14
-#define NEON_3R_VABA 15
-#define NEON_3R_VADD_VSUB 16
-#define NEON_3R_VTST_VCEQ 17
-#define NEON_3R_VML 18 /* VMLA, VMLS */
-#define NEON_3R_VMUL 19
-#define NEON_3R_VPMAX 20
-#define NEON_3R_VPMIN 21
-#define NEON_3R_VQDMULH_VQRDMULH 22
-#define NEON_3R_VPADD_VQRDMLAH 23
-#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
-#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
-#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
-#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
-#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
-#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
-#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
-#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
-
-static const uint8_t neon_3r_sizes[] = {
- [NEON_3R_VHADD] = 0x7,
- [NEON_3R_VQADD] = 0xf,
- [NEON_3R_VRHADD] = 0x7,
- [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
- [NEON_3R_VHSUB] = 0x7,
- [NEON_3R_VQSUB] = 0xf,
- [NEON_3R_VCGT] = 0x7,
- [NEON_3R_VCGE] = 0x7,
- [NEON_3R_VSHL] = 0xf,
- [NEON_3R_VQSHL] = 0xf,
- [NEON_3R_VRSHL] = 0xf,
- [NEON_3R_VQRSHL] = 0xf,
- [NEON_3R_VMAX] = 0x7,
- [NEON_3R_VMIN] = 0x7,
- [NEON_3R_VABD] = 0x7,
- [NEON_3R_VABA] = 0x7,
- [NEON_3R_VADD_VSUB] = 0xf,
- [NEON_3R_VTST_VCEQ] = 0x7,
- [NEON_3R_VML] = 0x7,
- [NEON_3R_VMUL] = 0x7,
- [NEON_3R_VPMAX] = 0x7,
- [NEON_3R_VPMIN] = 0x7,
- [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
- [NEON_3R_VPADD_VQRDMLAH] = 0x7,
- [NEON_3R_SHA] = 0xf, /* size field encodes op type */
- [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
- [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
- [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
-};
-
/* Symbolic constants for op fields for Neon 2-register miscellaneous.
* The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
* table A7-13.
[NEON_2RM_VCVT_UF] = 0x4,
};
-
-/* Expand v8.1 simd helper. */
-static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
- int q, int rd, int rn, int rm)
+static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz,
+ gen_helper_gvec_3_ptr *fn)
{
- if (dc_isar_feature(aa32_rdm, s)) {
- int opr_sz = (1 + q) * 8;
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), cpu_env,
- opr_sz, opr_sz, 0, fn);
- return 0;
- }
- return 1;
-}
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
-static void gen_ceq0_i32(TCGv_i32 d, TCGv_i32 a)
-{
- tcg_gen_setcondi_i32(TCG_COND_EQ, d, a, 0);
- tcg_gen_neg_i32(d, d);
+ tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
+ opr_sz, max_sz, 0, fn);
+ tcg_temp_free_ptr(qc_ptr);
}
-static void gen_ceq0_i64(TCGv_i64 d, TCGv_i64 a)
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
{
- tcg_gen_setcondi_i64(TCG_COND_EQ, d, a, 0);
- tcg_gen_neg_i64(d, d);
+ static gen_helper_gvec_3_ptr * const fns[2] = {
+ gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
+ };
+ tcg_debug_assert(vece >= 1 && vece <= 2);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
}
-static void gen_ceq0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
{
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
- tcg_gen_cmp_vec(TCG_COND_EQ, vece, d, a, zero);
- tcg_temp_free_vec(zero);
-}
+ static gen_helper_gvec_3_ptr * const fns[2] = {
+ gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
+ };
+ tcg_debug_assert(vece >= 1 && vece <= 2);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
+}
+
+#define GEN_CMP0(NAME, COND) \
+ static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
+ { \
+ tcg_gen_setcondi_i32(COND, d, a, 0); \
+ tcg_gen_neg_i32(d, d); \
+ } \
+ static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
+ { \
+ tcg_gen_setcondi_i64(COND, d, a, 0); \
+ tcg_gen_neg_i64(d, d); \
+ } \
+ static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
+ { \
+ TCGv_vec zero = tcg_const_zeros_vec_matching(d); \
+ tcg_gen_cmp_vec(COND, vece, d, a, zero); \
+ tcg_temp_free_vec(zero); \
+ } \
+ void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
+ uint32_t opr_sz, uint32_t max_sz) \
+ { \
+ const GVecGen2 op[4] = { \
+ { .fno = gen_helper_gvec_##NAME##0_b, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .vece = MO_8 }, \
+ { .fno = gen_helper_gvec_##NAME##0_h, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .vece = MO_16 }, \
+ { .fni4 = gen_##NAME##0_i32, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .vece = MO_32 }, \
+ { .fni8 = gen_##NAME##0_i64, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64, \
+ .vece = MO_64 }, \
+ }; \
+ tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
+ }
static const TCGOpcode vecop_list_cmp[] = {
INDEX_op_cmp_vec, 0
};
-const GVecGen2 ceq0_op[4] = {
- { .fno = gen_helper_gvec_ceq0_b,
- .fniv = gen_ceq0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_8 },
- { .fno = gen_helper_gvec_ceq0_h,
- .fniv = gen_ceq0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_16 },
- { .fni4 = gen_ceq0_i32,
- .fniv = gen_ceq0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_32 },
- { .fni8 = gen_ceq0_i64,
- .fniv = gen_ceq0_vec,
- .opt_opc = vecop_list_cmp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64 },
-};
-
-static void gen_cle0_i32(TCGv_i32 d, TCGv_i32 a)
-{
- tcg_gen_setcondi_i32(TCG_COND_LE, d, a, 0);
- tcg_gen_neg_i32(d, d);
-}
-
-static void gen_cle0_i64(TCGv_i64 d, TCGv_i64 a)
-{
- tcg_gen_setcondi_i64(TCG_COND_LE, d, a, 0);
- tcg_gen_neg_i64(d, d);
-}
-
-static void gen_cle0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
-{
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
- tcg_gen_cmp_vec(TCG_COND_LE, vece, d, a, zero);
- tcg_temp_free_vec(zero);
-}
-
-const GVecGen2 cle0_op[4] = {
- { .fno = gen_helper_gvec_cle0_b,
- .fniv = gen_cle0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_8 },
- { .fno = gen_helper_gvec_cle0_h,
- .fniv = gen_cle0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_16 },
- { .fni4 = gen_cle0_i32,
- .fniv = gen_cle0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_32 },
- { .fni8 = gen_cle0_i64,
- .fniv = gen_cle0_vec,
- .opt_opc = vecop_list_cmp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64 },
-};
-
-static void gen_cge0_i32(TCGv_i32 d, TCGv_i32 a)
-{
- tcg_gen_setcondi_i32(TCG_COND_GE, d, a, 0);
- tcg_gen_neg_i32(d, d);
-}
-
-static void gen_cge0_i64(TCGv_i64 d, TCGv_i64 a)
-{
- tcg_gen_setcondi_i64(TCG_COND_GE, d, a, 0);
- tcg_gen_neg_i64(d, d);
-}
-
-static void gen_cge0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
-{
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
- tcg_gen_cmp_vec(TCG_COND_GE, vece, d, a, zero);
- tcg_temp_free_vec(zero);
-}
-
-const GVecGen2 cge0_op[4] = {
- { .fno = gen_helper_gvec_cge0_b,
- .fniv = gen_cge0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_8 },
- { .fno = gen_helper_gvec_cge0_h,
- .fniv = gen_cge0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_16 },
- { .fni4 = gen_cge0_i32,
- .fniv = gen_cge0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_32 },
- { .fni8 = gen_cge0_i64,
- .fniv = gen_cge0_vec,
- .opt_opc = vecop_list_cmp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64 },
-};
-
-static void gen_clt0_i32(TCGv_i32 d, TCGv_i32 a)
-{
- tcg_gen_setcondi_i32(TCG_COND_LT, d, a, 0);
- tcg_gen_neg_i32(d, d);
-}
-
-static void gen_clt0_i64(TCGv_i64 d, TCGv_i64 a)
-{
- tcg_gen_setcondi_i64(TCG_COND_LT, d, a, 0);
- tcg_gen_neg_i64(d, d);
-}
-
-static void gen_clt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
-{
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
- tcg_gen_cmp_vec(TCG_COND_LT, vece, d, a, zero);
- tcg_temp_free_vec(zero);
-}
-
-const GVecGen2 clt0_op[4] = {
- { .fno = gen_helper_gvec_clt0_b,
- .fniv = gen_clt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_8 },
- { .fno = gen_helper_gvec_clt0_h,
- .fniv = gen_clt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_16 },
- { .fni4 = gen_clt0_i32,
- .fniv = gen_clt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_32 },
- { .fni8 = gen_clt0_i64,
- .fniv = gen_clt0_vec,
- .opt_opc = vecop_list_cmp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64 },
-};
-
-static void gen_cgt0_i32(TCGv_i32 d, TCGv_i32 a)
-{
- tcg_gen_setcondi_i32(TCG_COND_GT, d, a, 0);
- tcg_gen_neg_i32(d, d);
-}
-
-static void gen_cgt0_i64(TCGv_i64 d, TCGv_i64 a)
-{
- tcg_gen_setcondi_i64(TCG_COND_GT, d, a, 0);
- tcg_gen_neg_i64(d, d);
-}
+GEN_CMP0(ceq, TCG_COND_EQ)
+GEN_CMP0(cle, TCG_COND_LE)
+GEN_CMP0(cge, TCG_COND_GE)
+GEN_CMP0(clt, TCG_COND_LT)
+GEN_CMP0(cgt, TCG_COND_GT)
-static void gen_cgt0_vec(unsigned vece, TCGv_vec d, TCGv_vec a)
-{
- TCGv_vec zero = tcg_const_zeros_vec_matching(d);
- tcg_gen_cmp_vec(TCG_COND_GT, vece, d, a, zero);
- tcg_temp_free_vec(zero);
-}
-
-const GVecGen2 cgt0_op[4] = {
- { .fno = gen_helper_gvec_cgt0_b,
- .fniv = gen_cgt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_8 },
- { .fno = gen_helper_gvec_cgt0_h,
- .fniv = gen_cgt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_16 },
- { .fni4 = gen_cgt0_i32,
- .fniv = gen_cgt0_vec,
- .opt_opc = vecop_list_cmp,
- .vece = MO_32 },
- { .fni8 = gen_cgt0_i64,
- .fniv = gen_cgt0_vec,
- .opt_opc = vecop_list_cmp,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .vece = MO_64 },
-};
+#undef GEN_CMP0
static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_add_vec(vece, d, d, a);
}
-static const TCGOpcode vecop_list_ssra[] = {
- INDEX_op_sari_vec, INDEX_op_add_vec, 0
-};
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_ssra8_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_ssra16_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ssra32_i32,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ssra64_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_b,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
-const GVecGen2i ssra_op[4] = {
- { .fni8 = gen_ssra8_i64,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_8 },
- { .fni8 = gen_ssra16_i64,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_16 },
- { .fni4 = gen_ssra32_i32,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_32 },
- { .fni8 = gen_ssra64_i64,
- .fniv = gen_ssra_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list_ssra,
- .load_dest = true,
- .vece = MO_64 },
-};
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits.
+ */
+ shift = MIN(shift, (8 << vece) - 1);
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_add_vec(vece, d, d, a);
}
-static const TCGOpcode vecop_list_usra[] = {
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
-};
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_usra8_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8, },
+ { .fni8 = gen_usra16_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16, },
+ { .fni4 = gen_usra32_i32,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32, },
+ { .fni8 = gen_usra64_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64, },
+ };
-const GVecGen2i usra_op[4] = {
- { .fni8 = gen_usra8_i64,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_8, },
- { .fni8 = gen_usra16_i64,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_16, },
- { .fni4 = gen_usra32_i32,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_32, },
- { .fni8 = gen_usra64_i64,
- .fniv = gen_usra_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_64, },
-};
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
-static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Unsigned results in all zeros as input to accumulate: nop.
+ */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ } else {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
+
+/*
+ * Shift one less than the requested amount, and the low bit is
+ * the rounding bit. For the 8 and 16-bit operations, because we
+ * mask the low bit, we can perform a normal integer shift instead
+ * of a vector shift.
+ */
+static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- uint64_t mask = dup_const(MO_8, 0xff >> shift);
TCGv_i64 t = tcg_temp_new_i64();
- tcg_gen_shri_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
+ tcg_gen_vec_sar8i_i64(d, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
tcg_temp_free_i64(t);
}
-static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- uint64_t mask = dup_const(MO_16, 0xffff >> shift);
TCGv_i64 t = tcg_temp_new_i64();
- tcg_gen_shri_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
+ tcg_gen_vec_sar16i_i64(d, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
tcg_temp_free_i64(t);
}
-static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
{
- tcg_gen_shri_i32(a, a, shift);
- tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
+ tcg_gen_sari_i32(d, a, sh);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
}
-static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- tcg_gen_shri_i64(a, a, shift);
- tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
+ tcg_gen_sari_i64(d, a, sh);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
}
-static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
- if (sh == 0) {
- tcg_gen_mov_vec(d, a);
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
+ tcg_gen_dupi_vec(vece, ones, 1);
+ tcg_gen_and_vec(vece, t, t, ones);
+ tcg_gen_sari_vec(vece, d, a, sh);
+ tcg_gen_add_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(ones);
+}
+
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_srshr8_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_srshr16_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_srshr32_i32,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_srshr64_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ if (shift == (8 << vece)) {
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits. With rounding, this produces
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+ * I.e. always zero.
+ */
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
} else {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
-
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
- tcg_gen_shri_vec(vece, t, a, sh);
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
-
- tcg_temp_free_vec(t);
- tcg_temp_free_vec(m);
- }
-}
-
-static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
-
-const GVecGen2i sri_op[4] = {
- { .fni8 = gen_shr8_ins_i64,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_8 },
- { .fni8 = gen_shr16_ins_i64,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_16 },
- { .fni4 = gen_shr32_ins_i32,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_32 },
- { .fni8 = gen_shr64_ins_i64,
- .fniv = gen_shr_ins_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_64 },
-};
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
-static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- uint64_t mask = dup_const(MO_8, 0xff << shift);
TCGv_i64 t = tcg_temp_new_i64();
- tcg_gen_shli_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
+ gen_srshr8_i64(t, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
tcg_temp_free_i64(t);
}
-static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- uint64_t mask = dup_const(MO_16, 0xffff << shift);
TCGv_i64 t = tcg_temp_new_i64();
- tcg_gen_shli_i64(t, a, shift);
- tcg_gen_andi_i64(t, t, mask);
- tcg_gen_andi_i64(d, d, ~mask);
- tcg_gen_or_i64(d, d, t);
+ gen_srshr16_i64(t, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
tcg_temp_free_i64(t);
}
-static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
{
- tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
-}
+ TCGv_i32 t = tcg_temp_new_i32();
-static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
-{
- tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
+ gen_srshr32_i32(t, a, sh);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
}
-static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- if (sh == 0) {
- tcg_gen_mov_vec(d, a);
- } else {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
-
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
- tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
-
- tcg_temp_free_vec(t);
- tcg_temp_free_vec(m);
- }
-}
-
-static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
-
-const GVecGen2i sli_op[4] = {
- { .fni8 = gen_shl8_ins_i64,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_8 },
- { .fni8 = gen_shl16_ins_i64,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_16 },
- { .fni4 = gen_shl32_ins_i32,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_32 },
- { .fni8 = gen_shl64_ins_i64,
- .fniv = gen_shl_ins_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_64 },
-};
+ TCGv_i64 t = tcg_temp_new_i64();
-static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- gen_helper_neon_mul_u8(a, a, b);
- gen_helper_neon_add_u8(d, d, a);
+ gen_srshr64_i64(t, a, sh);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
}
-static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
- gen_helper_neon_mul_u8(a, a, b);
- gen_helper_neon_sub_u8(d, d, a);
-}
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
-static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- gen_helper_neon_mul_u16(a, a, b);
- gen_helper_neon_add_u16(d, d, a);
+ gen_srshr_vec(vece, t, a, sh);
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
}
-static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
{
- gen_helper_neon_mul_u16(a, a, b);
- gen_helper_neon_sub_u16(d, d, a);
-}
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_srsra8_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni8 = gen_srsra16_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_srsra32_i32,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_srsra64_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
-static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
-{
- tcg_gen_mul_i32(a, a, b);
- tcg_gen_add_i32(d, d, a);
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits. With rounding, this produces
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+ * I.e. always zero. With accumulation, this leaves D unchanged.
+ */
+ if (shift == (8 << vece)) {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
}
-static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- tcg_gen_mul_i32(a, a, b);
- tcg_gen_sub_i32(d, d, a);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
+ tcg_gen_vec_shr8i_i64(d, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+ tcg_temp_free_i64(t);
}
-static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- tcg_gen_mul_i64(a, a, b);
- tcg_gen_add_i64(d, d, a);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
+ tcg_gen_vec_shr16i_i64(d, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+ tcg_temp_free_i64(t);
}
-static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
{
- tcg_gen_mul_i64(a, a, b);
- tcg_gen_sub_i64(d, d, a);
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
+ tcg_gen_shri_i32(d, a, sh);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
}
-static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- tcg_gen_mul_vec(vece, a, a, b);
- tcg_gen_add_vec(vece, d, d, a);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
+ tcg_gen_shri_i64(d, a, sh);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
}
-static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
{
- tcg_gen_mul_vec(vece, a, a, b);
- tcg_gen_sub_vec(vece, d, d, a);
-}
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
-/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
- * these tables are shared with AArch64 which does support them.
- */
+ tcg_gen_shri_vec(vece, t, a, shift - 1);
+ tcg_gen_dupi_vec(vece, ones, 1);
+ tcg_gen_and_vec(vece, t, t, ones);
+ tcg_gen_shri_vec(vece, d, a, shift);
+ tcg_gen_add_vec(vece, d, d, t);
-static const TCGOpcode vecop_list_mla[] = {
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
-};
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(ones);
+}
-static const TCGOpcode vecop_list_mls[] = {
- INDEX_op_mul_vec, INDEX_op_sub_vec, 0
-};
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_urshr8_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_urshr16_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_urshr32_i32,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_urshr64_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
-const GVecGen3 mla_op[4] = {
- { .fni4 = gen_mla8_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_8 },
- { .fni4 = gen_mla16_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_16 },
- { .fni4 = gen_mla32_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_32 },
- { .fni8 = gen_mla64_i64,
- .fniv = gen_mla_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_64 },
-};
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
-const GVecGen3 mls_op[4] = {
- { .fni4 = gen_mls8_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_8 },
- { .fni4 = gen_mls16_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_16 },
- { .fni4 = gen_mls32_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_32 },
- { .fni8 = gen_mls64_i64,
- .fniv = gen_mls_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_64 },
-};
+ if (shift == (8 << vece)) {
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Unsigned results in zero. With rounding, this produces a
+ * copy of the most significant bit.
+ */
+ tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
-/* CMTST : test is "if (X & Y != 0)". */
-static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- tcg_gen_and_i32(d, a, b);
- tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
- tcg_gen_neg_i32(d, d);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 8) {
+ tcg_gen_vec_shr8i_i64(t, a, 7);
+ } else {
+ gen_urshr8_i64(t, a, sh);
+ }
+ tcg_gen_vec_add8_i64(d, d, t);
+ tcg_temp_free_i64(t);
}
-void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- tcg_gen_and_i64(d, a, b);
- tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
- tcg_gen_neg_i64(d, d);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 16) {
+ tcg_gen_vec_shr16i_i64(t, a, 15);
+ } else {
+ gen_urshr16_i64(t, a, sh);
+ }
+ tcg_gen_vec_add16_i64(d, d, t);
+ tcg_temp_free_i64(t);
}
-static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
{
- tcg_gen_and_vec(vece, d, a, b);
- tcg_gen_dupi_vec(vece, a, 0);
- tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
-}
+ TCGv_i32 t = tcg_temp_new_i32();
-static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
-
-const GVecGen3 cmtst_op[4] = {
- { .fni4 = gen_helper_neon_tst_u8,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_8 },
- { .fni4 = gen_helper_neon_tst_u16,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_16 },
- { .fni4 = gen_cmtst_i32,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_32 },
- { .fni8 = gen_cmtst_i64,
- .fniv = gen_cmtst_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_64 },
-};
+ if (sh == 32) {
+ tcg_gen_shri_i32(t, a, 31);
+ } else {
+ gen_urshr32_i32(t, a, sh);
+ }
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
-void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
+static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
{
- TCGv_i32 lval = tcg_temp_new_i32();
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 64) {
+ tcg_gen_shri_i64(t, a, 63);
+ } else {
+ gen_urshr64_i64(t, a, sh);
+ }
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ if (sh == (8 << vece)) {
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
+ } else {
+ gen_urshr_vec(vece, t, a, sh);
+ }
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_ursra8_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni8 = gen_ursra16_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_ursra32_i32,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_ursra64_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
+
+static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_8, 0xff >> shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff >> shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_shri_i32(a, a, shift);
+ tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
+}
+
+static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_shri_i64(a, a, shift);
+ tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
+}
+
+static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
+ tcg_gen_shri_vec(vece, t, a, sh);
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
+}
+
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shr8_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shr16_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shr32_ins_i32,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shr64_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /* Shift of esize leaves destination unchanged. */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ } else {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
+
+static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_8, 0xff << shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff << shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
+}
+
+static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
+}
+
+static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shli_vec(vece, t, a, sh);
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
+}
+
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shl8_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shl16_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shl32_ins_i32,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shl64_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [0..esize-1]. */
+ tcg_debug_assert(shift >= 0);
+ tcg_debug_assert(shift < (8 << vece));
+
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u8(a, a, b);
+ gen_helper_neon_add_u8(d, d, a);
+}
+
+static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u8(a, a, b);
+ gen_helper_neon_sub_u8(d, d, a);
+}
+
+static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u16(a, a, b);
+ gen_helper_neon_add_u16(d, d, a);
+}
+
+static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u16(a, a, b);
+ gen_helper_neon_sub_u16(d, d, a);
+}
+
+static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_mul_i32(a, a, b);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_mul_i32(a, a, b);
+ tcg_gen_sub_i32(d, d, a);
+}
+
+static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_mul_i64(a, a, b);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_mul_i64(a, a, b);
+ tcg_gen_sub_i64(d, d, a);
+}
+
+static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mul_vec(vece, a, a, b);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mul_vec(vece, a, a, b);
+ tcg_gen_sub_vec(vece, d, d, a);
+}
+
+/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
+ * these tables are shared with AArch64 which does support them.
+ */
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_mla8_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_mla16_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_mla32_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_mla64_i64,
+ .fniv = gen_mla_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_mls8_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_mls16_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_mls32_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_mls64_i64,
+ .fniv = gen_mls_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+/* CMTST : test is "if (X & Y != 0)". */
+static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_and_i32(d, a, b);
+ tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
+ tcg_gen_neg_i32(d, d);
+}
+
+void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_and_i64(d, a, b);
+ tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
+ tcg_gen_neg_i64(d, d);
+}
+
+static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_and_vec(vece, d, a, b);
+ tcg_gen_dupi_vec(vece, a, 0);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
+}
+
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_helper_neon_tst_u8,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_helper_neon_tst_u16,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_cmtst_i32,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_cmtst_i64,
+ .fniv = gen_cmtst_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
+{
+ TCGv_i32 lval = tcg_temp_new_i32();
TCGv_i32 rval = tcg_temp_new_i32();
TCGv_i32 lsh = tcg_temp_new_i32();
TCGv_i32 rsh = tcg_temp_new_i32();
tcg_temp_free_vec(rsh);
}
-static const TCGOpcode ushl_list[] = {
- INDEX_op_neg_vec, INDEX_op_shlv_vec,
- INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
-};
-
-const GVecGen3 ushl_op[4] = {
- { .fniv = gen_ushl_vec,
- .fno = gen_helper_gvec_ushl_b,
- .opt_opc = ushl_list,
- .vece = MO_8 },
- { .fniv = gen_ushl_vec,
- .fno = gen_helper_gvec_ushl_h,
- .opt_opc = ushl_list,
- .vece = MO_16 },
- { .fni4 = gen_ushl_i32,
- .fniv = gen_ushl_vec,
- .opt_opc = ushl_list,
- .vece = MO_32 },
- { .fni8 = gen_ushl_i64,
- .fniv = gen_ushl_vec,
- .opt_opc = ushl_list,
- .vece = MO_64 },
-};
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_neg_vec, INDEX_op_shlv_vec,
+ INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ushl_i32,
+ .fniv = gen_ushl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ushl_i64,
+ .fniv = gen_ushl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
{
tcg_temp_free_vec(tmp);
}
-static const TCGOpcode sshl_list[] = {
- INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
- INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
-};
-
-const GVecGen3 sshl_op[4] = {
- { .fniv = gen_sshl_vec,
- .fno = gen_helper_gvec_sshl_b,
- .opt_opc = sshl_list,
- .vece = MO_8 },
- { .fniv = gen_sshl_vec,
- .fno = gen_helper_gvec_sshl_h,
- .opt_opc = sshl_list,
- .vece = MO_16 },
- { .fni4 = gen_sshl_i32,
- .fniv = gen_sshl_vec,
- .opt_opc = sshl_list,
- .vece = MO_32 },
- { .fni8 = gen_sshl_i64,
- .fniv = gen_sshl_vec,
- .opt_opc = sshl_list,
- .vece = MO_64 },
-};
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
+ INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sshl_i32,
+ .fniv = gen_sshl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sshl_i64,
+ .fniv = gen_sshl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_uqadd[] = {
- INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
-};
-
-const GVecGen4 uqadd_op[4] = {
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_b,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_8 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_h,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_16 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_s,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_32 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_d,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_64 },
-};
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_b,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_h,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_s,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_d,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_sqadd[] = {
- INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
-};
-
-const GVecGen4 sqadd_op[4] = {
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_b,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_h,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_s,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_d,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_64 },
-};
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_uqsub[] = {
- INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
-};
-
-const GVecGen4 uqsub_op[4] = {
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_b,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_h,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_s,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_d,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_64 },
-};
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_sqsub[] = {
- INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
-};
-
-const GVecGen4 sqsub_op[4] = {
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_b,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_h,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_s,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_d,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_64 },
-};
-
-/* Translate a NEON data processing instruction. Return nonzero if the
- instruction is invalid.
- We process data in a mixture of 32-bit and 64-bit chunks.
- Mostly we use 32-bit chunks so we can use normal scalar instructions. */
-
-static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
{
- int op;
- int q;
- int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
- int size;
- int shift;
- int pass;
- int count;
- int pairwise;
- int u;
- int vec_size;
- uint32_t imm;
- TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
- TCGv_ptr ptr1, ptr2, ptr3;
- TCGv_i64 tmp64;
-
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- return 1;
- }
-
- /* FIXME: this access check should not take precedence over UNDEF
- * for invalid encodings; we will generate incorrect syndrome information
- * for attempts to execute invalid vfp/neon encodings with FP disabled.
- */
- if (s->fp_excp_el) {
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
- syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
-
- if (!s->vfp_enabled)
- return 1;
- q = (insn & (1 << 6)) != 0;
- u = (insn >> 24) & 1;
- VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
- VFP_DREG_M(rm, insn);
- size = (insn >> 20) & 3;
- vec_size = q ? 16 : 8;
- rd_ofs = neon_reg_offset(rd, 0);
- rn_ofs = neon_reg_offset(rn, 0);
- rm_ofs = neon_reg_offset(rm, 0);
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
- if ((insn & (1 << 23)) == 0) {
- /* Three register same length. */
- op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
- /* Catch invalid op and bad size combinations: UNDEF */
- if ((neon_3r_sizes[op] & (1 << size)) == 0) {
- return 1;
- }
- /* All insns of this form UNDEF for either this condition or the
- * superset of cases "Q==1"; we catch the latter later.
- */
- if (q && ((rd | rn | rm) & 1)) {
- return 1;
- }
- switch (op) {
- case NEON_3R_SHA:
- /* The SHA-1/SHA-256 3-register instructions require special
- * treatment here, as their size field is overloaded as an
- * op type selector, and they all consume their input in a
- * single pass.
- */
- if (!q) {
- return 1;
- }
- if (!u) { /* SHA-1 */
- if (!dc_isar_feature(aa32_sha1, s)) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rn);
- ptr3 = vfp_reg_ptr(true, rm);
- tmp4 = tcg_const_i32(size);
- gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
- tcg_temp_free_i32(tmp4);
- } else { /* SHA-256 */
- if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
- return 1;
- }
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rn);
- ptr3 = vfp_reg_ptr(true, rm);
- switch (size) {
- case 0:
- gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
- break;
- case 1:
- gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
- break;
- case 2:
- gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
- break;
- }
- }
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- tcg_temp_free_ptr(ptr3);
- return 0;
+static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
- case NEON_3R_VPADD_VQRDMLAH:
- if (!u) {
- break; /* VPADD */
- }
- /* VQRDMLAH */
- switch (size) {
- case 1:
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
- q, rd, rn, rm);
- case 2:
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
- q, rd, rn, rm);
- }
- return 1;
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
+ tcg_temp_free_i32(t);
+}
- case NEON_3R_VFM_VQRDMLSH:
- if (!u) {
- /* VFM, VFMS */
- if (size == 1) {
- return 1;
- }
- break;
- }
- /* VQRDMLSH */
- switch (size) {
- case 1:
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
- q, rd, rn, rm);
- case 2:
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
- q, rd, rn, rm);
- }
- return 1;
+static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
- case NEON_3R_VADD_VSUB:
- case NEON_3R_LOGIC:
- case NEON_3R_VMAX:
- case NEON_3R_VMIN:
- case NEON_3R_VTST_VCEQ:
- case NEON_3R_VCGT:
- case NEON_3R_VCGE:
- case NEON_3R_VQADD:
- case NEON_3R_VQSUB:
- case NEON_3R_VMUL:
- case NEON_3R_VML:
- case NEON_3R_VSHL:
- /* Already handled by decodetree */
- return 1;
- }
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
+ tcg_temp_free_i64(t);
+}
- if (size == 3) {
- /* 64-bit element instructions. */
- for (pass = 0; pass < (q ? 2 : 1); pass++) {
- neon_load_reg64(cpu_V0, rn + pass);
- neon_load_reg64(cpu_V1, rm + pass);
- switch (op) {
- case NEON_3R_VQSHL:
- if (u) {
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- }
- break;
- case NEON_3R_VRSHL:
- if (u) {
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
- }
- break;
- case NEON_3R_VQRSHL:
- if (u) {
- gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- } else {
- gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
- cpu_V1, cpu_V0);
- }
- break;
- default:
- abort();
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- return 0;
- }
- pairwise = 0;
- switch (op) {
- case NEON_3R_VQSHL:
- case NEON_3R_VRSHL:
- case NEON_3R_VQRSHL:
- {
- int rtmp;
- /* Shift instruction operands are reversed. */
- rtmp = rn;
- rn = rm;
- rm = rtmp;
- }
- break;
- case NEON_3R_VPADD_VQRDMLAH:
- case NEON_3R_VPMAX:
- case NEON_3R_VPMIN:
- pairwise = 1;
- break;
- case NEON_3R_FLOAT_ARITH:
- pairwise = (u && size < 2); /* if VPADD (float) */
- break;
- case NEON_3R_FLOAT_MINMAX:
- pairwise = u; /* if VPMIN/VPMAX (float) */
- break;
- case NEON_3R_FLOAT_CMP:
- if (!u && size) {
- /* no encoding for U=0 C=1x */
- return 1;
- }
- break;
- case NEON_3R_FLOAT_ACMP:
- if (!u) {
- return 1;
- }
- break;
- case NEON_3R_FLOAT_MISC:
- /* VMAXNM/VMINNM in ARMv8 */
- if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
- return 1;
- }
- break;
- case NEON_3R_VFM_VQRDMLSH:
- if (!dc_isar_feature(aa32_simdfmac, s)) {
- return 1;
- }
- break;
- default:
- break;
- }
+static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
- if (pairwise && q) {
- /* All the pairwise insns UNDEF if Q is set */
- return 1;
- }
+ tcg_gen_smin_vec(vece, t, a, b);
+ tcg_gen_smax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sabd_i32,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sabd_i64,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
- if (pairwise) {
- /* Pairwise. */
- if (pass < 1) {
- tmp = neon_load_reg(rn, 0);
- tmp2 = neon_load_reg(rn, 1);
- } else {
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- }
- } else {
- /* Elementwise. */
- tmp = neon_load_reg(rn, pass);
- tmp2 = neon_load_reg(rm, pass);
- }
- switch (op) {
- case NEON_3R_VHADD:
- GEN_NEON_INTEGER_OP(hadd);
- break;
- case NEON_3R_VRHADD:
- GEN_NEON_INTEGER_OP(rhadd);
- break;
- case NEON_3R_VHSUB:
- GEN_NEON_INTEGER_OP(hsub);
- break;
- case NEON_3R_VQSHL:
- GEN_NEON_INTEGER_OP_ENV(qshl);
- break;
- case NEON_3R_VRSHL:
- GEN_NEON_INTEGER_OP(rshl);
- break;
- case NEON_3R_VQRSHL:
- GEN_NEON_INTEGER_OP_ENV(qrshl);
- break;
- case NEON_3R_VABD:
- GEN_NEON_INTEGER_OP(abd);
- break;
- case NEON_3R_VABA:
- GEN_NEON_INTEGER_OP(abd);
- tcg_temp_free_i32(tmp2);
- tmp2 = neon_load_reg(rd, pass);
- gen_neon_add(size, tmp, tmp2);
- break;
- case NEON_3R_VPMAX:
- GEN_NEON_INTEGER_OP(pmax);
- break;
- case NEON_3R_VPMIN:
- GEN_NEON_INTEGER_OP(pmin);
- break;
- case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
- if (!u) { /* VQDMULH */
- switch (size) {
- case 1:
- gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
- break;
- default: abort();
- }
- } else { /* VQRDMULH */
- switch (size) {
- case 1:
- gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
- break;
- default: abort();
- }
- }
- break;
- case NEON_3R_VPADD_VQRDMLAH:
- switch (size) {
- case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- break;
- case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- switch ((u << 2) | size) {
- case 0: /* VADD */
- case 4: /* VPADD */
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- break;
- case 2: /* VSUB */
- gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
- break;
- case 6: /* VABD */
- gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
- break;
- default:
- abort();
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MULTIPLY:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
- if (!u) {
- tcg_temp_free_i32(tmp2);
- tmp2 = neon_load_reg(rd, pass);
- if (size == 0) {
- gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
- }
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_CMP:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (!u) {
- gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- if (size == 0) {
- gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
- }
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_ACMP:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MINMAX:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- case NEON_3R_FLOAT_MISC:
- if (u) {
- /* VMAXNM/VMINNM */
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- if (size == 0) {
- gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
- } else {
- gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
- }
- tcg_temp_free_ptr(fpstatus);
- } else {
- if (size == 0) {
- gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
- } else {
- gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
- }
- }
- break;
- case NEON_3R_VFM_VQRDMLSH:
- {
- /* VFMA, VFMS: fused multiply-add */
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- TCGv_i32 tmp3 = neon_load_reg(rd, pass);
- if (size) {
- /* VFMS */
- gen_helper_vfp_negs(tmp, tmp);
- }
- gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
- tcg_temp_free_i32(tmp3);
- tcg_temp_free_ptr(fpstatus);
- break;
- }
- default:
- abort();
- }
- tcg_temp_free_i32(tmp2);
+static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
- /* Save the result. For elementwise operations we can put it
- straight into the destination register. For pairwise operations
- we have to be careful to avoid clobbering the source operands. */
- if (pairwise && rd == rm) {
- neon_store_scratch(pass, tmp);
- } else {
- neon_store_reg(rd, pass, tmp);
- }
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
+ tcg_temp_free_i32(t);
+}
- } /* for pass */
- if (pairwise && rd == rm) {
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- tmp = neon_load_scratch(pass);
- neon_store_reg(rd, pass, tmp);
- }
- }
- /* End of 3 register same size operations. */
- } else if (insn & (1 << 4)) {
- if ((insn & 0x00380080) != 0) {
- /* Two registers and shift. */
- op = (insn >> 8) & 0xf;
- if (insn & (1 << 7)) {
- /* 64-bit shift. */
- if (op > 7) {
- return 1;
- }
- size = 3;
- } else {
- size = 2;
- while ((insn & (1 << (size + 19))) == 0)
- size--;
- }
- shift = (insn >> 16) & ((1 << (3 + size)) - 1);
- if (op < 8) {
- /* Shift by immediate:
- VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
- if (q && ((rd | rm) & 1)) {
- return 1;
- }
- if (!u && (op == 4 || op == 6)) {
- return 1;
- }
- /* Right shifts are encoded as N - shift, where N is the
- element size in bits. */
- if (op <= 4) {
- shift = shift - (1 << (size + 3));
- }
+static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
- switch (op) {
- case 0: /* VSHR */
- /* Right shift comes here negative. */
- shift = -shift;
- /* Shifts larger than the element size are architecturally
- * valid. Unsigned results in all zeros; signed results
- * in all sign bits.
- */
- if (!u) {
- tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
- MIN(shift, (8 << size) - 1),
- vec_size, vec_size);
- } else if (shift >= 8 << size) {
- tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
- } else {
- tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- }
- return 0;
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
+ tcg_temp_free_i64(t);
+}
- case 1: /* VSRA */
- /* Right shift comes here negative. */
- shift = -shift;
- /* Shifts larger than the element size are architecturally
- * valid. Unsigned results in all zeros; signed results
- * in all sign bits.
- */
- if (!u) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- MIN(shift, (8 << size) - 1),
- &ssra_op[size]);
- } else if (shift >= 8 << size) {
- /* rd += 0 */
- } else {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- shift, &usra_op[size]);
- }
- return 0;
+static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
- case 4: /* VSRI */
- if (!u) {
- return 1;
- }
- /* Right shift comes here negative. */
- shift = -shift;
- /* Shift out of range leaves destination unchanged. */
- if (shift < 8 << size) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- shift, &sri_op[size]);
- }
- return 0;
+ tcg_gen_umin_vec(vece, t, a, b);
+ tcg_gen_umax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
- case 5: /* VSHL, VSLI */
- if (u) { /* VSLI */
- /* Shift out of range leaves destination unchanged. */
- if (shift < 8 << size) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
- vec_size, shift, &sli_op[size]);
- }
- } else { /* VSHL */
- /* Shifts larger than the element size are
- * architecturally valid and results in zero.
- */
- if (shift >= 8 << size) {
- tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
- } else {
- tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- }
- }
- return 0;
- }
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_uabd_i32,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_uabd_i64,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
- if (size == 3) {
- count = q + 1;
- } else {
- count = q ? 4: 2;
- }
+static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ gen_sabd_i32(t, a, b);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
- /* To avoid excessive duplication of ops we implement shift
- * by immediate using the variable shift operations.
- */
- imm = dup_const(size, shift);
+static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_sabd_i64(t, a, b);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
- for (pass = 0; pass < count; pass++) {
- if (size == 3) {
- neon_load_reg64(cpu_V0, rm + pass);
- tcg_gen_movi_i64(cpu_V1, imm);
- switch (op) {
- case 2: /* VRSHR */
- case 3: /* VRSRA */
- if (u)
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
- else
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
- break;
- case 6: /* VQSHLU */
- gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- break;
- case 7: /* VQSHL */
- if (u) {
- gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- } else {
- gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- }
- break;
- default:
- g_assert_not_reached();
- }
- if (op == 3) {
- /* Accumulate. */
- neon_load_reg64(cpu_V1, rd + pass);
- tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
- }
- neon_store_reg64(cpu_V0, rd + pass);
- } else { /* size < 3 */
- /* Operands in T0 and T1. */
- tmp = neon_load_reg(rm, pass);
- tmp2 = tcg_temp_new_i32();
- tcg_gen_movi_i32(tmp2, imm);
- switch (op) {
- case 2: /* VRSHR */
- case 3: /* VRSRA */
- GEN_NEON_INTEGER_OP(rshl);
- break;
- case 6: /* VQSHLU */
- switch (size) {
- case 0:
- gen_helper_neon_qshlu_s8(tmp, cpu_env,
- tmp, tmp2);
- break;
- case 1:
- gen_helper_neon_qshlu_s16(tmp, cpu_env,
- tmp, tmp2);
- break;
- case 2:
- gen_helper_neon_qshlu_s32(tmp, cpu_env,
- tmp, tmp2);
- break;
- default:
- abort();
- }
- break;
- case 7: /* VQSHL */
- GEN_NEON_INTEGER_OP_ENV(qshl);
- break;
- default:
- g_assert_not_reached();
- }
- tcg_temp_free_i32(tmp2);
+static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ gen_sabd_vec(vece, t, a, b);
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
- if (op == 3) {
- /* Accumulate. */
- tmp2 = neon_load_reg(rd, pass);
- gen_neon_add(size, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
- neon_store_reg(rd, pass, tmp);
- }
- } /* for pass */
- } else if (op < 10) {
- /* Shift by immediate and narrow:
- VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
- int input_unsigned = (op == 8) ? !u : u;
- if (rm & 1) {
- return 1;
- }
- shift = shift - (1 << (size + 3));
- size++;
- if (size == 3) {
- tmp64 = tcg_const_i64(shift);
- neon_load_reg64(cpu_V0, rm);
- neon_load_reg64(cpu_V1, rm + 1);
- for (pass = 0; pass < 2; pass++) {
- TCGv_i64 in;
- if (pass == 0) {
- in = cpu_V0;
- } else {
- in = cpu_V1;
- }
- if (q) {
- if (input_unsigned) {
- gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
- } else {
- gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
- }
- } else {
- if (input_unsigned) {
- gen_ushl_i64(cpu_V0, in, tmp64);
- } else {
- gen_sshl_i64(cpu_V0, in, tmp64);
- }
- }
- tmp = tcg_temp_new_i32();
- gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
- neon_store_reg(rd, pass, tmp);
- } /* for pass */
- tcg_temp_free_i64(tmp64);
- } else {
- if (size == 1) {
- imm = (uint16_t)shift;
- imm |= imm << 16;
- } else {
- /* size == 2 */
- imm = (uint32_t)shift;
- }
- tmp2 = tcg_const_i32(imm);
- tmp4 = neon_load_reg(rm + 1, 0);
- tmp5 = neon_load_reg(rm + 1, 1);
- for (pass = 0; pass < 2; pass++) {
- if (pass == 0) {
- tmp = neon_load_reg(rm, 0);
- } else {
- tmp = tmp4;
- }
- gen_neon_shift_narrow(size, tmp, tmp2, q,
- input_unsigned);
- if (pass == 0) {
- tmp3 = neon_load_reg(rm, 1);
- } else {
- tmp3 = tmp5;
- }
- gen_neon_shift_narrow(size, tmp3, tmp2, q,
- input_unsigned);
- tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(tmp3);
- tmp = tcg_temp_new_i32();
- gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
- neon_store_reg(rd, pass, tmp);
- } /* for pass */
- tcg_temp_free_i32(tmp2);
- }
- } else if (op == 10) {
- /* VSHLL, VMOVL */
- if (q || (rd & 1)) {
- return 1;
- }
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- for (pass = 0; pass < 2; pass++) {
- if (pass == 1)
- tmp = tmp2;
-
- gen_neon_widen(cpu_V0, tmp, size, u);
-
- if (shift != 0) {
- /* The shift is less than the width of the source
- type, so we can just shift the whole register. */
- tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
- /* Widen the result of shift: we need to clear
- * the potential overflow bits resulting from
- * left bits of the narrow input appearing as
- * right bits of left the neighbour narrow
- * input. */
- if (size < 2 || !u) {
- uint64_t imm64;
- if (size == 0) {
- imm = (0xffu >> (8 - shift));
- imm |= imm << 16;
- } else if (size == 1) {
- imm = 0xffff >> (16 - shift);
- } else {
- /* size == 2 */
- imm = 0xffffffff >> (32 - shift);
- }
- if (size < 2) {
- imm64 = imm | (((uint64_t)imm) << 32);
- } else {
- imm64 = imm;
- }
- tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
- }
- }
- neon_store_reg64(cpu_V0, rd + pass);
- }
- } else if (op >= 14) {
- /* VCVT fixed-point. */
- TCGv_ptr fpst;
- TCGv_i32 shiftv;
- VFPGenFixPointFn *fn;
+void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_add_vec,
+ INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_saba_i32,
+ .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_saba_i64,
+ .fniv = gen_saba_vec,
+ .fno = gen_helper_gvec_saba_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
- if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
- return 1;
- }
+static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+ gen_uabd_i32(t, a, b);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
- if (!(op & 1)) {
- if (u) {
- fn = gen_helper_vfp_ultos;
- } else {
- fn = gen_helper_vfp_sltos;
- }
- } else {
- if (u) {
- fn = gen_helper_vfp_touls_round_to_zero;
- } else {
- fn = gen_helper_vfp_tosls_round_to_zero;
- }
- }
+static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+ gen_uabd_i64(t, a, b);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
- /* We have already masked out the must-be-1 top bit of imm6,
- * hence this 32-shift where the ARM ARM has 64-imm6.
- */
- shift = 32 - shift;
- fpst = get_fpstatus_ptr(1);
- shiftv = tcg_const_i32(shift);
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- TCGv_i32 tmpf = neon_load_reg(rm, pass);
- fn(tmpf, tmpf, shiftv, fpst);
- neon_store_reg(rd, pass, tmpf);
- }
- tcg_temp_free_ptr(fpst);
- tcg_temp_free_i32(shiftv);
- } else {
- return 1;
- }
- } else { /* (insn & 0x00380080) == 0 */
- int invert, reg_ofs, vec_size;
+static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ gen_uabd_vec(vece, t, a, b);
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
- if (q && (rd & 1)) {
- return 1;
- }
+void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_add_vec,
+ INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_uaba_i32,
+ .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_uaba_i64,
+ .fniv = gen_uaba_vec,
+ .fno = gen_helper_gvec_uaba_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
- op = (insn >> 8) & 0xf;
- /* One register and immediate. */
- imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
- invert = (insn & (1 << 5)) != 0;
- /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
- * We choose to not special-case this and will behave as if a
- * valid constant encoding of 0 had been given.
- */
- switch (op) {
- case 0: case 1:
- /* no-op */
- break;
- case 2: case 3:
- imm <<= 8;
- break;
- case 4: case 5:
- imm <<= 16;
- break;
- case 6: case 7:
- imm <<= 24;
- break;
- case 8: case 9:
- imm |= imm << 16;
- break;
- case 10: case 11:
- imm = (imm << 8) | (imm << 24);
- break;
- case 12:
- imm = (imm << 8) | 0xff;
- break;
- case 13:
- imm = (imm << 16) | 0xffff;
- break;
- case 14:
- imm |= (imm << 8) | (imm << 16) | (imm << 24);
- if (invert) {
- imm = ~imm;
- }
- break;
- case 15:
- if (invert) {
- return 1;
- }
- imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
- | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
- break;
- }
- if (invert) {
- imm = ~imm;
- }
+/* Translate a NEON data processing instruction. Return nonzero if the
+ instruction is invalid.
+ We process data in a mixture of 32-bit and 64-bit chunks.
+ Mostly we use 32-bit chunks so we can use normal scalar instructions. */
+
+static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
+{
+ int op;
+ int q;
+ int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
+ int size;
+ int pass;
+ int u;
+ int vec_size;
+ uint32_t imm;
+ TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
+ TCGv_ptr ptr1;
+ TCGv_i64 tmp64;
- reg_ofs = neon_reg_offset(rd, 0);
- vec_size = q ? 16 : 8;
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return 1;
+ }
- if (op & 1 && op < 12) {
- if (invert) {
- /* The immediate value has already been inverted,
- * so BIC becomes AND.
- */
- tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
- vec_size, vec_size);
- } else {
- tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
- vec_size, vec_size);
- }
- } else {
- /* VMOV, VMVN. */
- if (op == 14 && invert) {
- TCGv_i64 t64 = tcg_temp_new_i64();
+ /* FIXME: this access check should not take precedence over UNDEF
+ * for invalid encodings; we will generate incorrect syndrome information
+ * for attempts to execute invalid vfp/neon encodings with FP disabled.
+ */
+ if (s->fp_excp_el) {
+ gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+ syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
+ return 0;
+ }
- for (pass = 0; pass <= q; ++pass) {
- uint64_t val = 0;
- int n;
+ if (!s->vfp_enabled)
+ return 1;
+ q = (insn & (1 << 6)) != 0;
+ u = (insn >> 24) & 1;
+ VFP_DREG_D(rd, insn);
+ VFP_DREG_N(rn, insn);
+ VFP_DREG_M(rm, insn);
+ size = (insn >> 20) & 3;
+ vec_size = q ? 16 : 8;
+ rd_ofs = neon_reg_offset(rd, 0);
+ rn_ofs = neon_reg_offset(rn, 0);
+ rm_ofs = neon_reg_offset(rm, 0);
- for (n = 0; n < 8; n++) {
- if (imm & (1 << (n + pass * 8))) {
- val |= 0xffull << (n * 8);
- }
- }
- tcg_gen_movi_i64(t64, val);
- neon_store_reg64(t64, rd + pass);
- }
- tcg_temp_free_i64(t64);
- } else {
- tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
- }
- }
- }
+ if ((insn & (1 << 23)) == 0) {
+ /* Three register same length: handled by decodetree */
+ return 1;
+ } else if (insn & (1 << 4)) {
+ /* Two registers and shift or reg and imm: handled by decodetree */
+ return 1;
} else { /* (insn & 0x00800010 == 0x00800000) */
if (size != 3) {
op = (insn >> 8) & 0xf;
if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
return 1;
}
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
-
- /* Bit 6 is the lowest opcode bit; it distinguishes between
- * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
- */
- tmp3 = tcg_const_i32(extract32(insn, 6, 1));
-
+ /*
+ * Bit 6 is the lowest opcode bit; it distinguishes
+ * between encryption (AESE/AESMC) and decryption
+ * (AESD/AESIMC).
+ */
if (op == NEON_2RM_AESE) {
- gen_helper_crypto_aese(ptr1, ptr2, tmp3);
+ tcg_gen_gvec_3_ool(vfp_reg_offset(true, rd),
+ vfp_reg_offset(true, rd),
+ vfp_reg_offset(true, rm),
+ 16, 16, extract32(insn, 6, 1),
+ gen_helper_crypto_aese);
} else {
- gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
+ tcg_gen_gvec_2_ool(vfp_reg_offset(true, rd),
+ vfp_reg_offset(true, rm),
+ 16, 16, extract32(insn, 6, 1),
+ gen_helper_crypto_aesmc);
}
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
- tcg_temp_free_i32(tmp3);
break;
case NEON_2RM_SHA1H:
if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
return 1;
}
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
-
- gen_helper_crypto_sha1h(ptr1, ptr2);
-
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
+ tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
+ gen_helper_crypto_sha1h);
break;
case NEON_2RM_SHA1SU1:
if ((rm | rd) & 1) {
} else if (!dc_isar_feature(aa32_sha1, s)) {
return 1;
}
- ptr1 = vfp_reg_ptr(true, rd);
- ptr2 = vfp_reg_ptr(true, rm);
- if (q) {
- gen_helper_crypto_sha256su0(ptr1, ptr2);
- } else {
- gen_helper_crypto_sha1su1(ptr1, ptr2);
- }
- tcg_temp_free_ptr(ptr1);
- tcg_temp_free_ptr(ptr2);
+ tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
+ q ? gen_helper_crypto_sha256su0
+ : gen_helper_crypto_sha1su1);
break;
-
case NEON_2RM_VMVN:
tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
break;
break;
case NEON_2RM_VCEQ0:
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
- vec_size, &ceq0_op[size]);
+ gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
case NEON_2RM_VCGT0:
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
- vec_size, &cgt0_op[size]);
+ gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
case NEON_2RM_VCLE0:
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
- vec_size, &cle0_op[size]);
+ gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
case NEON_2RM_VCGE0:
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
- vec_size, &cge0_op[size]);
+ gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
case NEON_2RM_VCLT0:
- tcg_gen_gvec_2(rd_ofs, rm_ofs, vec_size,
- vec_size, &clt0_op[size]);
+ gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
default:
break;
}
case NEON_2RM_VRECPE:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_recpe_u32(tmp, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
+ gen_helper_recpe_u32(tmp, tmp);
break;
- }
case NEON_2RM_VRSQRTE:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
+ gen_helper_rsqrte_u32(tmp, tmp);
break;
- }
case NEON_2RM_VRECPE_F:
{
TCGv_ptr fpstatus = get_fpstatus_ptr(1);