#define ARM_CP_RW_BIT (1 << 20)
-/* Include the VFP decoder */
+/* Include the VFP and Neon decoders */
#include "translate-vfp.inc.c"
+#include "translate-neon.inc.c"
static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
{
}
#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
-#define VFP_SREG(insn, bigbit, smallbit) \
- ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
if (dc_isar_feature(aa32_simd_r32, s)) { \
reg = (((insn) >> (bigbit)) & 0x0f) \
reg = ((insn) >> (bigbit)) & 0x0f; \
}} while (0)
-#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
-#define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
-#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
static void gen_neon_dup_low16(TCGv_i32 var)
tcg_temp_free_i32(tmp);
}
-/*
- * Disassemble a VFP instruction. Returns nonzero if an error occurred
- * (ie. an undefined instruction).
- */
-static int disas_vfp_insn(DisasContext *s, uint32_t insn)
-{
- /*
- * If the decodetree decoder handles this insn it will always
- * emit code to either execute the insn or generate an appropriate
- * exception; so we don't need to ever return non-zero to tell
- * the calling code to emit an UNDEF exception.
- */
- if (extract32(insn, 28, 4) == 0xf) {
- if (disas_vfp_uncond(s, insn)) {
- return 0;
- }
- } else {
- if (disas_vfp(s, insn)) {
- return 0;
- }
- }
- /* If the decodetree decoder didn't handle this insn, it must be UNDEF */
- return 1;
-}
-
static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
{
#ifndef CONFIG_USER_ONLY
tcg_temp_free_i32(rd);
}
-
-static struct {
- int nregs;
- int interleave;
- int spacing;
-} const neon_ls_element_type[11] = {
- {1, 4, 1},
- {1, 4, 2},
- {4, 1, 1},
- {2, 2, 2},
- {1, 3, 1},
- {1, 3, 2},
- {3, 1, 1},
- {1, 1, 1},
- {1, 2, 1},
- {1, 2, 2},
- {2, 1, 1}
-};
-
-/* Translate a NEON load/store element instruction. Return nonzero if the
- instruction is invalid. */
-static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
-{
- int rd, rn, rm;
- int op;
- int nregs;
- int interleave;
- int spacing;
- int stride;
- int size;
- int reg;
- int load;
- int n;
- int vec_size;
- int mmu_idx;
- MemOp endian;
- TCGv_i32 addr;
- TCGv_i32 tmp;
- TCGv_i32 tmp2;
- TCGv_i64 tmp64;
-
- /* FIXME: this access check should not take precedence over UNDEF
- * for invalid encodings; we will generate incorrect syndrome information
- * for attempts to execute invalid vfp/neon encodings with FP disabled.
- */
- if (s->fp_excp_el) {
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
- syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
-
- if (!s->vfp_enabled)
- return 1;
- VFP_DREG_D(rd, insn);
- rn = (insn >> 16) & 0xf;
- rm = insn & 0xf;
- load = (insn & (1 << 21)) != 0;
- endian = s->be_data;
- mmu_idx = get_mem_index(s);
- if ((insn & (1 << 23)) == 0) {
- /* Load store all elements. */
- op = (insn >> 8) & 0xf;
- size = (insn >> 6) & 3;
- if (op > 10)
- return 1;
- /* Catch UNDEF cases for bad values of align field */
- switch (op & 0xc) {
- case 4:
- if (((insn >> 5) & 1) == 1) {
- return 1;
- }
- break;
- case 8:
- if (((insn >> 4) & 3) == 3) {
- return 1;
- }
- break;
- default:
- break;
- }
- nregs = neon_ls_element_type[op].nregs;
- interleave = neon_ls_element_type[op].interleave;
- spacing = neon_ls_element_type[op].spacing;
- if (size == 3 && (interleave | spacing) != 1) {
- return 1;
- }
- /* For our purposes, bytes are always little-endian. */
- if (size == 0) {
- endian = MO_LE;
- }
- /* Consecutive little-endian elements from a single register
- * can be promoted to a larger little-endian operation.
- */
- if (interleave == 1 && endian == MO_LE) {
- size = 3;
- }
- tmp64 = tcg_temp_new_i64();
- addr = tcg_temp_new_i32();
- tmp2 = tcg_const_i32(1 << size);
- load_reg_var(s, addr, rn);
- for (reg = 0; reg < nregs; reg++) {
- for (n = 0; n < 8 >> size; n++) {
- int xs;
- for (xs = 0; xs < interleave; xs++) {
- int tt = rd + reg + spacing * xs;
-
- if (load) {
- gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
- neon_store_element64(tt, n, size, tmp64);
- } else {
- neon_load_element64(tmp64, tt, n, size);
- gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
- }
- tcg_gen_add_i32(addr, addr, tmp2);
- }
- }
- }
- tcg_temp_free_i32(addr);
- tcg_temp_free_i32(tmp2);
- tcg_temp_free_i64(tmp64);
- stride = nregs * interleave * 8;
- } else {
- size = (insn >> 10) & 3;
- if (size == 3) {
- /* Load single element to all lanes. */
- int a = (insn >> 4) & 1;
- if (!load) {
- return 1;
- }
- size = (insn >> 6) & 3;
- nregs = ((insn >> 8) & 3) + 1;
-
- if (size == 3) {
- if (nregs != 4 || a == 0) {
- return 1;
- }
- /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
- size = 2;
- }
- if (nregs == 1 && a == 1 && size == 0) {
- return 1;
- }
- if (nregs == 3 && a == 1) {
- return 1;
- }
- addr = tcg_temp_new_i32();
- load_reg_var(s, addr, rn);
-
- /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
- * VLD2/3/4 to all lanes: bit 5 indicates register stride.
- */
- stride = (insn & (1 << 5)) ? 2 : 1;
- vec_size = nregs == 1 ? stride * 8 : 8;
-
- tmp = tcg_temp_new_i32();
- for (reg = 0; reg < nregs; reg++) {
- gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
- s->be_data | size);
- if ((rd & 1) && vec_size == 16) {
- /* We cannot write 16 bytes at once because the
- * destination is unaligned.
- */
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
- 8, 8, tmp);
- tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
- neon_reg_offset(rd, 0), 8, 8);
- } else {
- tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
- vec_size, vec_size, tmp);
- }
- tcg_gen_addi_i32(addr, addr, 1 << size);
- rd += stride;
- }
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(addr);
- stride = (1 << size) * nregs;
- } else {
- /* Single element. */
- int idx = (insn >> 4) & 0xf;
- int reg_idx;
- switch (size) {
- case 0:
- reg_idx = (insn >> 5) & 7;
- stride = 1;
- break;
- case 1:
- reg_idx = (insn >> 6) & 3;
- stride = (insn & (1 << 5)) ? 2 : 1;
- break;
- case 2:
- reg_idx = (insn >> 7) & 1;
- stride = (insn & (1 << 6)) ? 2 : 1;
- break;
- default:
- abort();
- }
- nregs = ((insn >> 8) & 3) + 1;
- /* Catch the UNDEF cases. This is unavoidably a bit messy. */
- switch (nregs) {
- case 1:
- if (((idx & (1 << size)) != 0) ||
- (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
- return 1;
- }
- break;
- case 3:
- if ((idx & 1) != 0) {
- return 1;
- }
- /* fall through */
- case 2:
- if (size == 2 && (idx & 2) != 0) {
- return 1;
- }
- break;
- case 4:
- if ((size == 2) && ((idx & 3) == 3)) {
- return 1;
- }
- break;
- default:
- abort();
- }
- if ((rd + stride * (nregs - 1)) > 31) {
- /* Attempts to write off the end of the register file
- * are UNPREDICTABLE; we choose to UNDEF because otherwise
- * the neon_load_reg() would write off the end of the array.
- */
- return 1;
- }
- tmp = tcg_temp_new_i32();
- addr = tcg_temp_new_i32();
- load_reg_var(s, addr, rn);
- for (reg = 0; reg < nregs; reg++) {
- if (load) {
- gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
- s->be_data | size);
- neon_store_element(rd, reg_idx, size, tmp);
- } else { /* Store */
- neon_load_element(tmp, rd, reg_idx, size);
- gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
- s->be_data | size);
- }
- rd += stride;
- tcg_gen_addi_i32(addr, addr, 1 << size);
- }
- tcg_temp_free_i32(addr);
- tcg_temp_free_i32(tmp);
- stride = nregs * (1 << size);
- }
- }
- if (rm != 15) {
- TCGv_i32 base;
-
- base = load_reg(s, rn);
- if (rm == 13) {
- tcg_gen_addi_i32(base, base, stride);
- } else {
- TCGv_i32 index;
- index = load_reg(s, rm);
- tcg_gen_add_i32(base, base, index);
- tcg_temp_free_i32(index);
- }
- store_reg(s, rn, base);
- }
- return 0;
-}
-
static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
{
switch (size) {
[NEON_2RM_VCVT_UF] = 0x4,
};
+static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
+ uint32_t opr_sz, uint32_t max_sz,
+ gen_helper_gvec_3_ptr *fn)
+{
+ TCGv_ptr qc_ptr = tcg_temp_new_ptr();
+
+ tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
+ tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
+ opr_sz, max_sz, 0, fn);
+ tcg_temp_free_ptr(qc_ptr);
+}
-/* Expand v8.1 simd helper. */
-static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
- int q, int rd, int rn, int rm)
+void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
{
- if (dc_isar_feature(aa32_rdm, s)) {
- int opr_sz = (1 + q) * 8;
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), cpu_env,
- opr_sz, opr_sz, 0, fn);
- return 0;
- }
- return 1;
+ static gen_helper_gvec_3_ptr * const fns[2] = {
+ gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
+ };
+ tcg_debug_assert(vece >= 1 && vece <= 2);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
}
+void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static gen_helper_gvec_3_ptr * const fns[2] = {
+ gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
+ };
+ tcg_debug_assert(vece >= 1 && vece <= 2);
+ gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
+}
+
+#define GEN_CMP0(NAME, COND) \
+ static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
+ { \
+ tcg_gen_setcondi_i32(COND, d, a, 0); \
+ tcg_gen_neg_i32(d, d); \
+ } \
+ static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
+ { \
+ tcg_gen_setcondi_i64(COND, d, a, 0); \
+ tcg_gen_neg_i64(d, d); \
+ } \
+ static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
+ { \
+ TCGv_vec zero = tcg_const_zeros_vec_matching(d); \
+ tcg_gen_cmp_vec(COND, vece, d, a, zero); \
+ tcg_temp_free_vec(zero); \
+ } \
+ void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
+ uint32_t opr_sz, uint32_t max_sz) \
+ { \
+ const GVecGen2 op[4] = { \
+ { .fno = gen_helper_gvec_##NAME##0_b, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .vece = MO_8 }, \
+ { .fno = gen_helper_gvec_##NAME##0_h, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .vece = MO_16 }, \
+ { .fni4 = gen_##NAME##0_i32, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .vece = MO_32 }, \
+ { .fni8 = gen_##NAME##0_i64, \
+ .fniv = gen_##NAME##0_vec, \
+ .opt_opc = vecop_list_cmp, \
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64, \
+ .vece = MO_64 }, \
+ }; \
+ tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
+ }
+
+static const TCGOpcode vecop_list_cmp[] = {
+ INDEX_op_cmp_vec, 0
+};
+
+GEN_CMP0(ceq, TCG_COND_EQ)
+GEN_CMP0(cle, TCG_COND_LE)
+GEN_CMP0(cge, TCG_COND_GE)
+GEN_CMP0(clt, TCG_COND_LT)
+GEN_CMP0(cgt, TCG_COND_GT)
+
+#undef GEN_CMP0
+
static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_sar8i_i64(a, a, shift);
tcg_gen_add_vec(vece, d, d, a);
}
-static const TCGOpcode vecop_list_ssra[] = {
- INDEX_op_sari_vec, INDEX_op_add_vec, 0
-};
+void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_ssra8_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_ssra16_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ssra32_i32,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ssra64_i64,
+ .fniv = gen_ssra_vec,
+ .fno = gen_helper_gvec_ssra_b,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
-const GVecGen2i ssra_op[4] = {
- { .fni8 = gen_ssra8_i64,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_8 },
- { .fni8 = gen_ssra16_i64,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_16 },
- { .fni4 = gen_ssra32_i32,
- .fniv = gen_ssra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_ssra,
- .vece = MO_32 },
- { .fni8 = gen_ssra64_i64,
- .fniv = gen_ssra_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list_ssra,
- .load_dest = true,
- .vece = MO_64 },
-};
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits.
+ */
+ shift = MIN(shift, (8 << vece) - 1);
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_add_vec(vece, d, d, a);
}
-static const TCGOpcode vecop_list_usra[] = {
- INDEX_op_shri_vec, INDEX_op_add_vec, 0
-};
+void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_usra8_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8, },
+ { .fni8 = gen_usra16_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16, },
+ { .fni4 = gen_usra32_i32,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32, },
+ { .fni8 = gen_usra64_i64,
+ .fniv = gen_usra_vec,
+ .fno = gen_helper_gvec_usra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64, },
+ };
-const GVecGen2i usra_op[4] = {
- { .fni8 = gen_usra8_i64,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_8, },
- { .fni8 = gen_usra16_i64,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_16, },
- { .fni4 = gen_usra32_i32,
- .fniv = gen_usra_vec,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_32, },
- { .fni8 = gen_usra64_i64,
- .fniv = gen_usra_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_usra,
- .vece = MO_64, },
-};
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Unsigned results in all zeros as input to accumulate: nop.
+ */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ } else {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
+
+/*
+ * Shift one less than the requested amount, and the low bit is
+ * the rounding bit. For the 8 and 16-bit operations, because we
+ * mask the low bit, we can perform a normal integer shift instead
+ * of a vector shift.
+ */
+static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
+ tcg_gen_vec_sar8i_i64(d, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
+ tcg_gen_vec_sar16i_i64(d, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
+ tcg_gen_sari_i32(d, a, sh);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
+ tcg_gen_sari_i64(d, a, sh);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
+ tcg_gen_dupi_vec(vece, ones, 1);
+ tcg_gen_and_vec(vece, t, t, ones);
+ tcg_gen_sari_vec(vece, d, a, sh);
+ tcg_gen_add_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(ones);
+}
+
+void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_srshr8_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_srshr16_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_srshr32_i32,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_srshr64_i64,
+ .fniv = gen_srshr_vec,
+ .fno = gen_helper_gvec_srshr_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ if (shift == (8 << vece)) {
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits. With rounding, this produces
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+ * I.e. always zero.
+ */
+ tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr8_i64(t, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr16_i64(t, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ gen_srshr32_i32(t, a, sh);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ gen_srshr64_i64(t, a, sh);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ gen_srshr_vec(vece, t, a, sh);
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_srsra8_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni8 = gen_srsra16_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_srsra32_i32,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_srsra64_i64,
+ .fniv = gen_srsra_vec,
+ .fno = gen_helper_gvec_srsra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Signed results in all sign bits. With rounding, this produces
+ * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
+ * I.e. always zero. With accumulation, this leaves D unchanged.
+ */
+ if (shift == (8 << vece)) {
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
+ tcg_gen_vec_shr8i_i64(d, a, sh);
+ tcg_gen_vec_add8_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, sh - 1);
+ tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
+ tcg_gen_vec_shr16i_i64(d, a, sh);
+ tcg_gen_vec_add16_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_extract_i32(t, a, sh - 1, 1);
+ tcg_gen_shri_i32(d, a, sh);
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_extract_i64(t, a, sh - 1, 1);
+ tcg_gen_shri_i64(d, a, sh);
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shri_vec(vece, t, a, shift - 1);
+ tcg_gen_dupi_vec(vece, ones, 1);
+ tcg_gen_and_vec(vece, t, t, ones);
+ tcg_gen_shri_vec(vece, d, a, shift);
+ tcg_gen_add_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(ones);
+}
+
+void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_urshr8_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_urshr16_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_urshr32_i32,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_urshr64_i64,
+ .fniv = gen_urshr_vec,
+ .fno = gen_helper_gvec_urshr_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ if (shift == (8 << vece)) {
+ /*
+ * Shifts larger than the element size are architecturally valid.
+ * Unsigned results in zero. With rounding, this produces a
+ * copy of the most significant bit.
+ */
+ tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
+
+static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 8) {
+ tcg_gen_vec_shr8i_i64(t, a, 7);
+ } else {
+ gen_urshr8_i64(t, a, sh);
+ }
+ tcg_gen_vec_add8_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 16) {
+ tcg_gen_vec_shr16i_i64(t, a, 15);
+ } else {
+ gen_urshr16_i64(t, a, sh);
+ }
+ tcg_gen_vec_add16_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ if (sh == 32) {
+ tcg_gen_shri_i32(t, a, 31);
+ } else {
+ gen_urshr32_i32(t, a, sh);
+ }
+ tcg_gen_add_i32(d, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ if (sh == 64) {
+ tcg_gen_shri_i64(t, a, 63);
+ } else {
+ gen_urshr64_i64(t, a, sh);
+ }
+ tcg_gen_add_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ if (sh == (8 << vece)) {
+ tcg_gen_shri_vec(vece, t, a, sh - 1);
+ } else {
+ gen_urshr_vec(vece, t, a, sh);
+ }
+ tcg_gen_add_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_shri_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen2i ops[4] = {
+ { .fni8 = gen_ursra8_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_b,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni8 = gen_ursra16_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_h,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_ursra32_i32,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_s,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_ursra64_i64,
+ .fniv = gen_ursra_vec,
+ .fno = gen_helper_gvec_ursra_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .load_dest = true,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize] */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+}
static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
- if (sh == 0) {
- tcg_gen_mov_vec(d, a);
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
+ tcg_gen_shri_vec(vece, t, a, sh);
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
+}
+
+void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shr8_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shr16_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shr32_ins_i32,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shr64_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .fno = gen_helper_gvec_sri_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [1..esize]. */
+ tcg_debug_assert(shift > 0);
+ tcg_debug_assert(shift <= (8 << vece));
+
+ /* Shift of esize leaves destination unchanged. */
+ if (shift < (8 << vece)) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
} else {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
-
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
- tcg_gen_shri_vec(vece, t, a, sh);
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
-
- tcg_temp_free_vec(t);
- tcg_temp_free_vec(m);
- }
-}
-
-static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
-
-const GVecGen2i sri_op[4] = {
- { .fni8 = gen_shr8_ins_i64,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_8 },
- { .fni8 = gen_shr16_ins_i64,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_16 },
- { .fni4 = gen_shr32_ins_i32,
- .fniv = gen_shr_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_32 },
- { .fni8 = gen_shr64_ins_i64,
- .fniv = gen_shr_ins_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_sri,
- .vece = MO_64 },
-};
+ /* Nop, but we do need to clear the tail. */
+ tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
+ }
+}
static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
- if (sh == 0) {
- tcg_gen_mov_vec(d, a);
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_shli_vec(vece, t, a, sh);
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
+}
+
+void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
+ int64_t shift, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
+ const GVecGen2i ops[4] = {
+ { .fni8 = gen_shl8_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_b,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni8 = gen_shl16_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_h,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_shl32_ins_i32,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_s,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_shl64_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .fno = gen_helper_gvec_sli_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+
+ /* tszimm encoding produces immediates in the range [0..esize-1]. */
+ tcg_debug_assert(shift >= 0);
+ tcg_debug_assert(shift < (8 << vece));
+
+ if (shift == 0) {
+ tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
} else {
- TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
-
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
- tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_and_vec(vece, d, d, m);
- tcg_gen_or_vec(vece, d, d, t);
-
- tcg_temp_free_vec(t);
- tcg_temp_free_vec(m);
- }
-}
-
-static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
-
-const GVecGen2i sli_op[4] = {
- { .fni8 = gen_shl8_ins_i64,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_8 },
- { .fni8 = gen_shl16_ins_i64,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_16 },
- { .fni4 = gen_shl32_ins_i32,
- .fniv = gen_shl_ins_vec,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_32 },
- { .fni8 = gen_shl64_ins_i64,
- .fniv = gen_shl_ins_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_sli,
- .vece = MO_64 },
-};
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
+ }
+}
static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
{
/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
* these tables are shared with AArch64 which does support them.
*/
+void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_mla8_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_mla16_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_mla32_i32,
+ .fniv = gen_mla_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_mla64_i64,
+ .fniv = gen_mla_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
-static const TCGOpcode vecop_list_mla[] = {
- INDEX_op_mul_vec, INDEX_op_add_vec, 0
-};
-
-static const TCGOpcode vecop_list_mls[] = {
- INDEX_op_mul_vec, INDEX_op_sub_vec, 0
-};
-
-const GVecGen3 mla_op[4] = {
- { .fni4 = gen_mla8_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_8 },
- { .fni4 = gen_mla16_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_16 },
- { .fni4 = gen_mla32_i32,
- .fniv = gen_mla_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_32 },
- { .fni8 = gen_mla64_i64,
- .fniv = gen_mla_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_mla,
- .vece = MO_64 },
-};
-
-const GVecGen3 mls_op[4] = {
- { .fni4 = gen_mls8_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_8 },
- { .fni4 = gen_mls16_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_16 },
- { .fni4 = gen_mls32_i32,
- .fniv = gen_mls_vec,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_32 },
- { .fni8 = gen_mls64_i64,
- .fniv = gen_mls_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .load_dest = true,
- .opt_opc = vecop_list_mls,
- .vece = MO_64 },
-};
+void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_mul_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_mls8_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_mls16_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_mls32_i32,
+ .fniv = gen_mls_vec,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_mls64_i64,
+ .fniv = gen_mls_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
/* CMTST : test is "if (X & Y != 0)". */
static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
}
-static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
-
-const GVecGen3 cmtst_op[4] = {
- { .fni4 = gen_helper_neon_tst_u8,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_8 },
- { .fni4 = gen_helper_neon_tst_u16,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_16 },
- { .fni4 = gen_cmtst_i32,
- .fniv = gen_cmtst_vec,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_32 },
- { .fni8 = gen_cmtst_i64,
- .fniv = gen_cmtst_vec,
- .prefer_i64 = TCG_TARGET_REG_BITS == 64,
- .opt_opc = vecop_list_cmtst,
- .vece = MO_64 },
-};
+void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
+ static const GVecGen3 ops[4] = {
+ { .fni4 = gen_helper_neon_tst_u8,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fni4 = gen_helper_neon_tst_u16,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_cmtst_i32,
+ .fniv = gen_cmtst_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_cmtst_i64,
+ .fniv = gen_cmtst_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
{
tcg_temp_free_vec(rsh);
}
-static const TCGOpcode ushl_list[] = {
- INDEX_op_neg_vec, INDEX_op_shlv_vec,
- INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
-};
-
-const GVecGen3 ushl_op[4] = {
- { .fniv = gen_ushl_vec,
- .fno = gen_helper_gvec_ushl_b,
- .opt_opc = ushl_list,
- .vece = MO_8 },
- { .fniv = gen_ushl_vec,
- .fno = gen_helper_gvec_ushl_h,
- .opt_opc = ushl_list,
- .vece = MO_16 },
- { .fni4 = gen_ushl_i32,
- .fniv = gen_ushl_vec,
- .opt_opc = ushl_list,
- .vece = MO_32 },
- { .fni8 = gen_ushl_i64,
- .fniv = gen_ushl_vec,
- .opt_opc = ushl_list,
- .vece = MO_64 },
-};
+void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_neg_vec, INDEX_op_shlv_vec,
+ INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_ushl_vec,
+ .fno = gen_helper_gvec_ushl_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_ushl_i32,
+ .fniv = gen_ushl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_ushl_i64,
+ .fniv = gen_ushl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
{
tcg_temp_free_vec(tmp);
}
-static const TCGOpcode sshl_list[] = {
- INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
- INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
-};
-
-const GVecGen3 sshl_op[4] = {
- { .fniv = gen_sshl_vec,
- .fno = gen_helper_gvec_sshl_b,
- .opt_opc = sshl_list,
- .vece = MO_8 },
- { .fniv = gen_sshl_vec,
- .fno = gen_helper_gvec_sshl_h,
- .opt_opc = sshl_list,
- .vece = MO_16 },
- { .fni4 = gen_sshl_i32,
- .fniv = gen_sshl_vec,
- .opt_opc = sshl_list,
- .vece = MO_32 },
- { .fni8 = gen_sshl_i64,
- .fniv = gen_sshl_vec,
- .opt_opc = sshl_list,
- .vece = MO_64 },
-};
+void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
+ INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_sshl_vec,
+ .fno = gen_helper_gvec_sshl_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sshl_i32,
+ .fniv = gen_sshl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sshl_i64,
+ .fniv = gen_sshl_vec,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_uqadd[] = {
- INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
-};
-
-const GVecGen4 uqadd_op[4] = {
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_b,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_8 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_h,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_16 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_s,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_32 },
- { .fniv = gen_uqadd_vec,
- .fno = gen_helper_gvec_uqadd_d,
- .write_aofs = true,
- .opt_opc = vecop_list_uqadd,
- .vece = MO_64 },
-};
+void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_b,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_h,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_s,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_d,
+ .write_aofs = true,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_sqadd[] = {
- INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
-};
-
-const GVecGen4 sqadd_op[4] = {
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_b,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_h,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_s,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_sqadd_vec,
- .fno = gen_helper_gvec_sqadd_d,
- .opt_opc = vecop_list_sqadd,
- .write_aofs = true,
- .vece = MO_64 },
-};
+void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_uqsub[] = {
- INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
-};
-
-const GVecGen4 uqsub_op[4] = {
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_b,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_h,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_s,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_uqsub_vec,
- .fno = gen_helper_gvec_uqsub_d,
- .opt_opc = vecop_list_uqsub,
- .write_aofs = true,
- .vece = MO_64 },
-};
+void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
TCGv_vec a, TCGv_vec b)
tcg_temp_free_vec(x);
}
-static const TCGOpcode vecop_list_sqsub[] = {
- INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
-};
+void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
+ };
+ static const GVecGen4 ops[4] = {
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_b,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_h,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_s,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_d,
+ .opt_opc = vecop_list,
+ .write_aofs = true,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
-const GVecGen4 sqsub_op[4] = {
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_b,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_8 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_h,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_16 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_s,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_32 },
- { .fniv = gen_sqsub_vec,
- .fno = gen_helper_gvec_sqsub_d,
- .opt_opc = vecop_list_sqsub,
- .write_aofs = true,
- .vece = MO_64 },
-};
+static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_smin_vec(vece, t, a, b);
+ tcg_gen_smax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_sabd_i32,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_sabd_i64,
+ .fniv = gen_sabd_vec,
+ .fno = gen_helper_gvec_sabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
+
+static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ TCGv_i32 t = tcg_temp_new_i32();
+
+ tcg_gen_sub_i32(t, a, b);
+ tcg_gen_sub_i32(d, b, a);
+ tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
+ tcg_temp_free_i32(t);
+}
+
+static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_sub_i64(t, a, b);
+ tcg_gen_sub_i64(d, b, a);
+ tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_umin_vec(vece, t, a, b);
+ tcg_gen_umax_vec(vece, d, a, b);
+ tcg_gen_sub_vec(vece, d, d, t);
+ tcg_temp_free_vec(t);
+}
+
+void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
+ uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
+{
+ static const TCGOpcode vecop_list[] = {
+ INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+ };
+ static const GVecGen3 ops[4] = {
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_b,
+ .opt_opc = vecop_list,
+ .vece = MO_8 },
+ { .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_h,
+ .opt_opc = vecop_list,
+ .vece = MO_16 },
+ { .fni4 = gen_uabd_i32,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_s,
+ .opt_opc = vecop_list,
+ .vece = MO_32 },
+ { .fni8 = gen_uabd_i64,
+ .fniv = gen_uabd_vec,
+ .fno = gen_helper_gvec_uabd_d,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .opt_opc = vecop_list,
+ .vece = MO_64 },
+ };
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
+}
/* Translate a NEON data processing instruction. Return nonzero if the
instruction is invalid.
TCGv_ptr ptr1, ptr2, ptr3;
TCGv_i64 tmp64;
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
+ return 1;
+ }
+
/* FIXME: this access check should not take precedence over UNDEF
* for invalid encodings; we will generate incorrect syndrome information
* for attempts to execute invalid vfp/neon encodings with FP disabled.
break; /* VPADD */
}
/* VQRDMLAH */
- switch (size) {
- case 1:
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
- q, rd, rn, rm);
- case 2:
- return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
- q, rd, rn, rm);
+ if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
+ gen_gvec_sqrdmlah_qc(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ return 0;
}
return 1;
break;
}
/* VQRDMLSH */
- switch (size) {
- case 1:
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
- q, rd, rn, rm);
- case 2:
- return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
- q, rd, rn, rm);
- }
- return 1;
-
- case NEON_3R_LOGIC: /* Logic ops. */
- switch ((u << 2) | size) {
- case 0: /* VAND */
- tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 1: /* VBIC */
- tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 2: /* VORR */
- tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 3: /* VORN */
- tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 4: /* VEOR */
- tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 5: /* VBSL */
- tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- break;
- case 6: /* VBIT */
- tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
- vec_size, vec_size);
- break;
- case 7: /* VBIF */
- tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
- vec_size, vec_size);
- break;
- }
- return 0;
-
- case NEON_3R_VADD_VSUB:
- if (u) {
- tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- } else {
- tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- }
- return 0;
-
- case NEON_3R_VQADD:
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
- rn_ofs, rm_ofs, vec_size, vec_size,
- (u ? uqadd_op : sqadd_op) + size);
- return 0;
-
- case NEON_3R_VQSUB:
- tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
- rn_ofs, rm_ofs, vec_size, vec_size,
- (u ? uqsub_op : sqsub_op) + size);
- return 0;
-
- case NEON_3R_VMUL: /* VMUL */
- if (u) {
- /* Polynomial case allows only P8. */
- if (size != 0) {
- return 1;
- }
- tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
- 0, gen_helper_gvec_pmul_b);
- } else {
- tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- }
- return 0;
-
- case NEON_3R_VML: /* VMLA, VMLS */
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
- u ? &mls_op[size] : &mla_op[size]);
- return 0;
-
- case NEON_3R_VTST_VCEQ:
- if (u) { /* VCEQ */
- tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- } else { /* VTST */
- tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size, &cmtst_op[size]);
+ if (dc_isar_feature(aa32_rdm, s) && (size == 1 || size == 2)) {
+ gen_gvec_sqrdmlsh_qc(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ return 0;
}
- return 0;
-
- case NEON_3R_VCGT:
- tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
- rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
- return 0;
-
- case NEON_3R_VCGE:
- tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
- rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
- return 0;
+ return 1;
- case NEON_3R_VMAX:
- if (u) {
- tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- } else {
- tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
- }
- return 0;
- case NEON_3R_VMIN:
+ case NEON_3R_VABD:
if (u) {
- tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
+ gen_gvec_uabd(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
} else {
- tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
- vec_size, vec_size);
+ gen_gvec_sabd(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
}
return 0;
+ case NEON_3R_VADD_VSUB:
+ case NEON_3R_LOGIC:
+ case NEON_3R_VMAX:
+ case NEON_3R_VMIN:
+ case NEON_3R_VTST_VCEQ:
+ case NEON_3R_VCGT:
+ case NEON_3R_VCGE:
+ case NEON_3R_VQADD:
+ case NEON_3R_VQSUB:
+ case NEON_3R_VMUL:
+ case NEON_3R_VML:
case NEON_3R_VSHL:
- /* Note the operation is vshl vd,vm,vn */
- tcg_gen_gvec_3(rd_ofs, rm_ofs, rn_ofs, vec_size, vec_size,
- u ? &ushl_op[size] : &sshl_op[size]);
- return 0;
+ /* Already handled by decodetree */
+ return 1;
}
if (size == 3) {
case NEON_3R_VQRSHL:
GEN_NEON_INTEGER_OP_ENV(qrshl);
break;
- case NEON_3R_VABD:
- GEN_NEON_INTEGER_OP(abd);
- break;
case NEON_3R_VABA:
GEN_NEON_INTEGER_OP(abd);
tcg_temp_free_i32(tmp2);
MIN(shift, (8 << size) - 1),
vec_size, vec_size);
} else if (shift >= 8 << size) {
- tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
+ tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
+ vec_size, 0);
} else {
tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
vec_size, vec_size);
case 1: /* VSRA */
/* Right shift comes here negative. */
shift = -shift;
- /* Shifts larger than the element size are architecturally
- * valid. Unsigned results in all zeros; signed results
- * in all sign bits.
- */
- if (!u) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- MIN(shift, (8 << size) - 1),
- &ssra_op[size]);
- } else if (shift >= 8 << size) {
- /* rd += 0 */
+ if (u) {
+ gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
+ } else {
+ gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
+ }
+ return 0;
+
+ case 2: /* VRSHR */
+ /* Right shift comes here negative. */
+ shift = -shift;
+ if (u) {
+ gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
+ } else {
+ gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
+ }
+ return 0;
+
+ case 3: /* VRSRA */
+ /* Right shift comes here negative. */
+ shift = -shift;
+ if (u) {
+ gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
} else {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- shift, &usra_op[size]);
+ gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
}
return 0;
}
/* Right shift comes here negative. */
shift = -shift;
- /* Shift out of range leaves destination unchanged. */
- if (shift < 8 << size) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
- shift, &sri_op[size]);
- }
+ gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
return 0;
case 5: /* VSHL, VSLI */
if (u) { /* VSLI */
- /* Shift out of range leaves destination unchanged. */
- if (shift < 8 << size) {
- tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
- vec_size, shift, &sli_op[size]);
- }
+ gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
} else { /* VSHL */
- /* Shifts larger than the element size are
- * architecturally valid and results in zero.
- */
- if (shift >= 8 << size) {
- tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
- } else {
- tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
- vec_size, vec_size);
- }
+ tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
}
return 0;
}
neon_load_reg64(cpu_V0, rm + pass);
tcg_gen_movi_i64(cpu_V1, imm);
switch (op) {
- case 2: /* VRSHR */
- case 3: /* VRSRA */
- if (u)
- gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
- else
- gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
- break;
case 6: /* VQSHLU */
gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
cpu_V0, cpu_V1);
default:
g_assert_not_reached();
}
- if (op == 3) {
- /* Accumulate. */
- neon_load_reg64(cpu_V1, rd + pass);
- tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
- }
neon_store_reg64(cpu_V0, rd + pass);
} else { /* size < 3 */
/* Operands in T0 and T1. */
tmp2 = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp2, imm);
switch (op) {
- case 2: /* VRSHR */
- case 3: /* VRSRA */
- GEN_NEON_INTEGER_OP(rshl);
- break;
case 6: /* VQSHLU */
switch (size) {
case 0:
g_assert_not_reached();
}
tcg_temp_free_i32(tmp2);
-
- if (op == 3) {
- /* Accumulate. */
- tmp2 = neon_load_reg(rd, pass);
- gen_neon_add(size, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- }
neon_store_reg(rd, pass, tmp);
}
} /* for pass */
}
tcg_temp_free_i64(t64);
} else {
- tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
+ tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
+ vec_size, imm);
}
}
}
{0, 0, 0, 0}, /* VMLSL */
{0, 0, 0, 9}, /* VQDMLSL */
{0, 0, 0, 0}, /* Integer VMULL */
- {0, 0, 0, 1}, /* VQDMULL */
+ {0, 0, 0, 9}, /* VQDMULL */
{0, 0, 0, 0xa}, /* Polynomial VMULL */
{0, 0, 0, 7}, /* Reserved: always UNDEF */
};
tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
break;
+ case NEON_2RM_VCEQ0:
+ gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
+ break;
+ case NEON_2RM_VCGT0:
+ gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
+ break;
+ case NEON_2RM_VCLE0:
+ gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
+ break;
+ case NEON_2RM_VCGE0:
+ gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
+ break;
+ case NEON_2RM_VCLT0:
+ gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
+ break;
+
default:
elementwise:
for (pass = 0; pass < (q ? 4 : 2); pass++) {
default: abort();
}
break;
- case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
- tmp2 = tcg_const_i32(0);
- switch(size) {
- case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
- default: abort();
- }
- tcg_temp_free_i32(tmp2);
- if (op == NEON_2RM_VCLE0) {
- tcg_gen_not_i32(tmp, tmp);
- }
- break;
- case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
- tmp2 = tcg_const_i32(0);
- switch(size) {
- case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
- default: abort();
- }
- tcg_temp_free_i32(tmp2);
- if (op == NEON_2RM_VCLT0) {
- tcg_gen_not_i32(tmp, tmp);
- }
- break;
- case NEON_2RM_VCEQ0:
- tmp2 = tcg_const_i32(0);
- switch(size) {
- case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
- default: abort();
- }
- tcg_temp_free_i32(tmp2);
- break;
case NEON_2RM_VCGT0_F:
{
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
break;
}
case NEON_2RM_VRECPE:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_recpe_u32(tmp, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
+ gen_helper_recpe_u32(tmp, tmp);
break;
- }
case NEON_2RM_VRSQRTE:
- {
- TCGv_ptr fpstatus = get_fpstatus_ptr(1);
- gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
- tcg_temp_free_ptr(fpstatus);
+ gen_helper_rsqrte_u32(tmp, tmp);
break;
- }
case NEON_2RM_VRECPE_F:
{
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
return 0;
}
-/* Advanced SIMD three registers of the same length extension.
- * 31 25 23 22 20 16 12 11 10 9 8 3 0
- * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
- * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
- * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
- */
-static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
-{
- gen_helper_gvec_3 *fn_gvec = NULL;
- gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
- int rd, rn, rm, opr_sz;
- int data = 0;
- int off_rn, off_rm;
- bool is_long = false, q = extract32(insn, 6, 1);
- bool ptr_is_env = false;
-
- if ((insn & 0xfe200f10) == 0xfc200800) {
- /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
- int size = extract32(insn, 20, 1);
- data = extract32(insn, 23, 2); /* rot */
- if (!dc_isar_feature(aa32_vcma, s)
- || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
- return 1;
- }
- fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
- } else if ((insn & 0xfea00f10) == 0xfc800800) {
- /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
- int size = extract32(insn, 20, 1);
- data = extract32(insn, 24, 1); /* rot */
- if (!dc_isar_feature(aa32_vcma, s)
- || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
- return 1;
- }
- fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
- } else if ((insn & 0xfeb00f00) == 0xfc200d00) {
- /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
- bool u = extract32(insn, 4, 1);
- if (!dc_isar_feature(aa32_dp, s)) {
- return 1;
- }
- fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
- } else if ((insn & 0xff300f10) == 0xfc200810) {
- /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
- int is_s = extract32(insn, 23, 1);
- if (!dc_isar_feature(aa32_fhm, s)) {
- return 1;
- }
- is_long = true;
- data = is_s; /* is_2 == 0 */
- fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
- ptr_is_env = true;
- } else {
- return 1;
- }
-
- VFP_DREG_D(rd, insn);
- if (rd & q) {
- return 1;
- }
- if (q || !is_long) {
- VFP_DREG_N(rn, insn);
- VFP_DREG_M(rm, insn);
- if ((rn | rm) & q & !is_long) {
- return 1;
- }
- off_rn = vfp_reg_offset(1, rn);
- off_rm = vfp_reg_offset(1, rm);
- } else {
- rn = VFP_SREG_N(insn);
- rm = VFP_SREG_M(insn);
- off_rn = vfp_reg_offset(0, rn);
- off_rm = vfp_reg_offset(0, rm);
- }
-
- if (s->fp_excp_el) {
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
- syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
- if (!s->vfp_enabled) {
- return 1;
- }
-
- opr_sz = (1 + q) * 8;
- if (fn_gvec_ptr) {
- TCGv_ptr ptr;
- if (ptr_is_env) {
- ptr = cpu_env;
- } else {
- ptr = get_fpstatus_ptr(1);
- }
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
- opr_sz, opr_sz, data, fn_gvec_ptr);
- if (!ptr_is_env) {
- tcg_temp_free_ptr(ptr);
- }
- } else {
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
- opr_sz, opr_sz, data, fn_gvec);
- }
- return 0;
-}
-
-/* Advanced SIMD two registers and a scalar extension.
- * 31 24 23 22 20 16 12 11 10 9 8 3 0
- * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
- * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
- * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
- *
- */
-
-static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
-{
- gen_helper_gvec_3 *fn_gvec = NULL;
- gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
- int rd, rn, rm, opr_sz, data;
- int off_rn, off_rm;
- bool is_long = false, q = extract32(insn, 6, 1);
- bool ptr_is_env = false;
-
- if ((insn & 0xff000f10) == 0xfe000800) {
- /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
- int rot = extract32(insn, 20, 2);
- int size = extract32(insn, 23, 1);
- int index;
-
- if (!dc_isar_feature(aa32_vcma, s)) {
- return 1;
- }
- if (size == 0) {
- if (!dc_isar_feature(aa32_fp16_arith, s)) {
- return 1;
- }
- /* For fp16, rm is just Vm, and index is M. */
- rm = extract32(insn, 0, 4);
- index = extract32(insn, 5, 1);
- } else {
- /* For fp32, rm is the usual M:Vm, and index is 0. */
- VFP_DREG_M(rm, insn);
- index = 0;
- }
- data = (index << 2) | rot;
- fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
- : gen_helper_gvec_fcmlah_idx);
- } else if ((insn & 0xffb00f00) == 0xfe200d00) {
- /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
- int u = extract32(insn, 4, 1);
-
- if (!dc_isar_feature(aa32_dp, s)) {
- return 1;
- }
- fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
- /* rm is just Vm, and index is M. */
- data = extract32(insn, 5, 1); /* index */
- rm = extract32(insn, 0, 4);
- } else if ((insn & 0xffa00f10) == 0xfe000810) {
- /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
- int is_s = extract32(insn, 20, 1);
- int vm20 = extract32(insn, 0, 3);
- int vm3 = extract32(insn, 3, 1);
- int m = extract32(insn, 5, 1);
- int index;
-
- if (!dc_isar_feature(aa32_fhm, s)) {
- return 1;
- }
- if (q) {
- rm = vm20;
- index = m * 2 + vm3;
- } else {
- rm = vm20 * 2 + m;
- index = vm3;
- }
- is_long = true;
- data = (index << 2) | is_s; /* is_2 == 0 */
- fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
- ptr_is_env = true;
- } else {
- return 1;
- }
-
- VFP_DREG_D(rd, insn);
- if (rd & q) {
- return 1;
- }
- if (q || !is_long) {
- VFP_DREG_N(rn, insn);
- if (rn & q & !is_long) {
- return 1;
- }
- off_rn = vfp_reg_offset(1, rn);
- off_rm = vfp_reg_offset(1, rm);
- } else {
- rn = VFP_SREG_N(insn);
- off_rn = vfp_reg_offset(0, rn);
- off_rm = vfp_reg_offset(0, rm);
- }
- if (s->fp_excp_el) {
- gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
- syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
- return 0;
- }
- if (!s->vfp_enabled) {
- return 1;
- }
-
- opr_sz = (1 + q) * 8;
- if (fn_gvec_ptr) {
- TCGv_ptr ptr;
- if (ptr_is_env) {
- ptr = cpu_env;
- } else {
- ptr = get_fpstatus_ptr(1);
- }
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
- opr_sz, opr_sz, data, fn_gvec_ptr);
- if (!ptr_is_env) {
- tcg_temp_free_ptr(ptr);
- }
- } else {
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
- opr_sz, opr_sz, data, fn_gvec);
- }
- return 0;
-}
-
static int disas_coproc_insn(DisasContext *s, uint32_t insn)
{
int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
/*
- * A write to any coprocessor regiser that ends a TB
+ * A write to any coprocessor register that ends a TB
* must rebuild the hflags for the next TB.
*/
TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
{
- TCGv_i32 addr, reg, el;
+ TCGv_i32 addr, reg;
if (!arm_dc_feature(s, ARM_FEATURE_M)) {
return false;
gen_helper_v7m_msr(cpu_env, addr, reg);
tcg_temp_free_i32(addr);
tcg_temp_free_i32(reg);
- el = tcg_const_i32(s->current_el);
- gen_helper_rebuild_hflags_m32(cpu_env, el);
- tcg_temp_free_i32(el);
+ /* If we wrote to CONTROL, the EL might have changed */
+ gen_helper_rebuild_hflags_m32_newel(cpu_env);
gen_lookup_tb(s);
return true;
}
static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
{
- TCGv_i32 tmp, addr;
+ TCGv_i32 tmp, addr, el;
if (!arm_dc_feature(s, ARM_FEATURE_M)) {
return false;
gen_helper_v7m_msr(cpu_env, addr, tmp);
tcg_temp_free_i32(addr);
}
+ el = tcg_const_i32(s->current_el);
+ gen_helper_rebuild_hflags_m32(cpu_env, el);
+ tcg_temp_free_i32(el);
tcg_temp_free_i32(tmp);
gen_lookup_tb(s);
return true;
ARCH(5);
/* Unconditional instructions. */
- if (disas_a32_uncond(s, insn)) {
+ /* TODO: Perhaps merge these into one decodetree output file. */
+ if (disas_a32_uncond(s, insn) ||
+ disas_vfp_uncond(s, insn) ||
+ disas_neon_dp(s, insn) ||
+ disas_neon_ls(s, insn) ||
+ disas_neon_shared(s, insn)) {
return;
}
/* fall back to legacy decoder */
if (((insn >> 25) & 7) == 1) {
/* NEON Data processing. */
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- goto illegal_op;
- }
-
if (disas_neon_data_insn(s, insn)) {
goto illegal_op;
}
return;
}
- if ((insn & 0x0f100000) == 0x04000000) {
- /* NEON load/store. */
- if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
- goto illegal_op;
- }
-
- if (disas_neon_ls_insn(s, insn)) {
- goto illegal_op;
- }
- return;
- }
- if ((insn & 0x0f000e10) == 0x0e000a00) {
- /* VFP. */
- if (disas_vfp_insn(s, insn)) {
- goto illegal_op;
- }
- return;
- }
if ((insn & 0x0e000f00) == 0x0c000100) {
if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
/* iWMMXt register transfer. */
}
}
}
- } else if ((insn & 0x0e000a00) == 0x0c000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- if (disas_neon_insn_3same_ext(s, insn)) {
- goto illegal_op;
- }
- return;
- } else if ((insn & 0x0f000a00) == 0x0e000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
- goto illegal_op;
- }
- return;
}
goto illegal_op;
}
arm_skip_unless(s, cond);
}
- if (disas_a32(s, insn)) {
+ /* TODO: Perhaps merge these into one decodetree output file. */
+ if (disas_a32(s, insn) ||
+ disas_vfp(s, insn)) {
return;
}
/* fall back to legacy decoder */
case 0xd:
case 0xe:
if (((insn >> 8) & 0xe) == 10) {
- /* VFP. */
- if (disas_vfp_insn(s, insn)) {
- goto illegal_op;
- }
- } else if (disas_coproc_insn(s, insn)) {
+ /* VFP, but failed disas_vfp. */
+ goto illegal_op;
+ }
+ if (disas_coproc_insn(s, insn)) {
/* Coprocessor. */
goto illegal_op;
}
ARCH(6T2);
}
- if (disas_t32(s, insn)) {
+ if ((insn & 0xef000000) == 0xef000000) {
+ /*
+ * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+ * transform into
+ * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
+ */
+ uint32_t a32_insn = (insn & 0xe2ffffff) |
+ ((insn & (1 << 28)) >> 4) | (1 << 28);
+
+ if (disas_neon_dp(s, a32_insn)) {
+ return;
+ }
+ }
+
+ if ((insn & 0xff100000) == 0xf9000000) {
+ /*
+ * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
+ * transform into
+ * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
+ */
+ uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
+
+ if (disas_neon_ls(s, a32_insn)) {
+ return;
+ }
+ }
+
+ /*
+ * TODO: Perhaps merge these into one decodetree output file.
+ * Note disas_vfp is written for a32 with cond field in the
+ * top nibble. The t32 encoding requires 0xe in the top nibble.
+ */
+ if (disas_t32(s, insn) ||
+ disas_vfp_uncond(s, insn) ||
+ disas_neon_shared(s, insn) ||
+ ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
return;
}
/* fall back to legacy decoder */
goto illegal_op; /* op0 = 0b11 : unallocated */
}
- /*
- * Decode VLLDM and VLSTM first: these are nonstandard because:
- * * if there is no FPU then these insns must NOP in
- * Secure state and UNDEF in Nonsecure state
- * * if there is an FPU then these insns do not have
- * the usual behaviour that disas_vfp_insn() provides of
- * being controlled by CPACR/NSACR enable bits or the
- * lazy-stacking logic.
- */
- if (arm_dc_feature(s, ARM_FEATURE_V8) &&
- (insn & 0xffa00f00) == 0xec200a00) {
- /* 0b1110_1100_0x1x_xxxx_xxxx_1010_xxxx_xxxx
- * - VLLDM, VLSTM
- * We choose to UNDEF if the RAZ bits are non-zero.
- */
- if (!s->v8m_secure || (insn & 0x0040f0ff)) {
- goto illegal_op;
- }
-
- if (arm_dc_feature(s, ARM_FEATURE_VFP)) {
- uint32_t rn = (insn >> 16) & 0xf;
- TCGv_i32 fptr = load_reg(s, rn);
-
- if (extract32(insn, 20, 1)) {
- gen_helper_v7m_vlldm(cpu_env, fptr);
- } else {
- gen_helper_v7m_vlstm(cpu_env, fptr);
- }
- tcg_temp_free_i32(fptr);
-
- /* End the TB, because we have updated FP control bits */
- s->base.is_jmp = DISAS_UPDATE;
- }
- break;
- }
- if (arm_dc_feature(s, ARM_FEATURE_VFP) &&
- ((insn >> 8) & 0xe) == 10) {
+ if (((insn >> 8) & 0xe) == 10 &&
+ dc_isar_feature(aa32_fpsp_v2, s)) {
/* FP, and the CPU supports it */
- if (disas_vfp_insn(s, insn)) {
- goto illegal_op;
- }
- break;
+ goto illegal_op;
+ } else {
+ /* All other insns: NOCP */
+ gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
+ syn_uncategorized(),
+ default_exception_el(s));
}
-
- /* All other insns: NOCP */
- gen_exception_insn(s, s->pc_curr, EXCP_NOCP, syn_uncategorized(),
- default_exception_el(s));
break;
}
- if ((insn & 0xfe000a00) == 0xfc000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- /* The Thumb2 and ARM encodings are identical. */
- if (disas_neon_insn_3same_ext(s, insn)) {
- goto illegal_op;
- }
- } else if ((insn & 0xff000a00) == 0xfe000800
- && arm_dc_feature(s, ARM_FEATURE_V8)) {
- /* The Thumb2 and ARM encodings are identical. */
- if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
- goto illegal_op;
- }
- } else if (((insn >> 24) & 3) == 3) {
+ if (((insn >> 24) & 3) == 3) {
/* Translate into the equivalent ARM encoding. */
insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
if (disas_neon_data_insn(s, insn)) {
goto illegal_op;
}
} else if (((insn >> 8) & 0xe) == 10) {
- if (disas_vfp_insn(s, insn)) {
- goto illegal_op;
- }
+ /* VFP, but failed disas_vfp. */
+ goto illegal_op;
} else {
if (insn & (1 << 28))
goto illegal_op;
}
break;
case 12:
- if ((insn & 0x01100000) == 0x01000000) {
- if (disas_neon_ls_insn(s, insn)) {
- goto illegal_op;
- }
- break;
- }
goto illegal_op;
default:
illegal_op: