#include "exec/log.h"
#include "trace-tcg.h"
+#include "translate-a64.h"
static TCGv_i64 cpu_X[32];
static TCGv_i64 cpu_pc;
/* Load/store exclusive handling */
static TCGv_i64 cpu_exclusive_high;
-static TCGv_i64 cpu_reg(DisasContext *s, int reg);
static const char *regnames[] = {
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
-
-/* Note that the gvec expanders operate on offsets + sizes. */
-typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
-typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
- uint32_t, uint32_t);
-typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
- uint32_t, uint32_t, uint32_t);
+typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
/* initialize TCG globals. */
void a64_translate_init(void)
s->base.is_jmp = DISAS_NORETURN;
}
+static void gen_exception_bkpt_insn(DisasContext *s, int offset,
+ uint32_t syndrome)
+{
+ TCGv_i32 tcg_syn;
+
+ gen_a64_set_pc_im(s->pc - offset);
+ tcg_syn = tcg_const_i32(syndrome);
+ gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
+ tcg_temp_free_i32(tcg_syn);
+ s->base.is_jmp = DISAS_NORETURN;
+}
+
static void gen_ss_advance(DisasContext *s)
{
/* If the singlestep state is Active-not-pending, advance to
if (use_goto_tb(s, n, dest)) {
tcg_gen_goto_tb(n);
gen_a64_set_pc_im(dest);
- tcg_gen_exit_tb((intptr_t)tb + n);
+ tcg_gen_exit_tb(tb, n);
s->base.is_jmp = DISAS_NORETURN;
} else {
gen_a64_set_pc_im(dest);
}
}
-static void unallocated_encoding(DisasContext *s)
+void unallocated_encoding(DisasContext *s)
{
/* Unallocated and reserved encodings are uncategorized */
gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
default_exception_el(s));
}
-#define unsupported_encoding(s, insn) \
- do { \
- qemu_log_mask(LOG_UNIMP, \
- "%s:%d: unsupported instruction encoding 0x%08x " \
- "at pc=%016" PRIx64 "\n", \
- __FILE__, __LINE__, insn, s->pc - 4); \
- unallocated_encoding(s); \
- } while (0)
-
static void init_tmp_a64_array(DisasContext *s)
{
#ifdef CONFIG_DEBUG_TCG
init_tmp_a64_array(s);
}
-static TCGv_i64 new_tmp_a64(DisasContext *s)
+TCGv_i64 new_tmp_a64(DisasContext *s)
{
assert(s->tmp_a64_count < TMP_A64_MAX);
return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
}
-static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
+TCGv_i64 new_tmp_a64_zero(DisasContext *s)
{
TCGv_i64 t = new_tmp_a64(s);
tcg_gen_movi_i64(t, 0);
* to cpu_X[31] and ZR accesses to a temporary which can be discarded.
* This is the point of the _sp forms.
*/
-static TCGv_i64 cpu_reg(DisasContext *s, int reg)
+TCGv_i64 cpu_reg(DisasContext *s, int reg)
{
if (reg == 31) {
return new_tmp_a64_zero(s);
}
/* register access for when 31 == SP */
-static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
+TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
{
return cpu_X[reg];
}
* representing the register contents. This TCGv is an auto-freed
* temporary so it need not be explicitly freed, and may be modified.
*/
-static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
+TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
{
TCGv_i64 v = new_tmp_a64(s);
if (reg != 31) {
return v;
}
-static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
+TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
{
TCGv_i64 v = new_tmp_a64(s);
if (sf) {
return v;
}
-/* We should have at some point before trying to access an FP register
- * done the necessary access check, so assert that
- * (a) we did the check and
- * (b) we didn't then just plough ahead anyway if it failed.
- * Print the instruction pattern in the abort message so we can figure
- * out what we need to fix if a user encounters this problem in the wild.
- */
-static inline void assert_fp_access_checked(DisasContext *s)
-{
-#ifdef CONFIG_DEBUG_TCG
- if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
- fprintf(stderr, "target-arm: FP access check missing for "
- "instruction 0x%08x\n", s->insn);
- abort();
- }
-#endif
-}
-
-/* Return the offset into CPUARMState of an element of specified
- * size, 'element' places in from the least significant end of
- * the FP/vector register Qn.
- */
-static inline int vec_reg_offset(DisasContext *s, int regno,
- int element, TCGMemOp size)
-{
- int offs = 0;
-#ifdef HOST_WORDS_BIGENDIAN
- /* This is complicated slightly because vfp.zregs[n].d[0] is
- * still the low half and vfp.zregs[n].d[1] the high half
- * of the 128 bit vector, even on big endian systems.
- * Calculate the offset assuming a fully bigendian 128 bits,
- * then XOR to account for the order of the two 64 bit halves.
- */
- offs += (16 - ((element + 1) * (1 << size)));
- offs ^= 8;
-#else
- offs += element * (1 << size);
-#endif
- offs += offsetof(CPUARMState, vfp.zregs[regno]);
- assert_fp_access_checked(s);
- return offs;
-}
-
-/* Return the offset info CPUARMState of the "whole" vector register Qn. */
-static inline int vec_full_reg_offset(DisasContext *s, int regno)
-{
- assert_fp_access_checked(s);
- return offsetof(CPUARMState, vfp.zregs[regno]);
-}
-
-/* Return a newly allocated pointer to the vector register. */
-static TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno)
-{
- TCGv_ptr ret = tcg_temp_new_ptr();
- tcg_gen_addi_ptr(ret, cpu_env, vec_full_reg_offset(s, regno));
- return ret;
-}
-
-/* Return the byte size of the "whole" vector register, VL / 8. */
-static inline int vec_full_reg_size(DisasContext *s)
-{
- /* FIXME SVE: We should put the composite ZCR_EL* value into tb->flags.
- In the meantime this is just the AdvSIMD length of 128. */
- return 128 / 8;
-}
-
/* Return the offset into CPUARMState of a slice (from
* the least significant end) of FP register Qn (ie
* Dn, Sn, Hn or Bn).
return v;
}
+static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
+{
+ TCGv_i32 v = tcg_temp_new_i32();
+
+ tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
+ return v;
+}
+
/* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
* If SVE is not enabled, then there are only 128 bits in the vector.
*/
}
}
-static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
+void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
{
unsigned ofs = fp_reg_offset(s, reg, MO_64);
tcg_temp_free_i64(tmp);
}
-static TCGv_ptr get_fpstatus_ptr(bool is_f16)
+TCGv_ptr get_fpstatus_ptr(bool is_f16)
{
TCGv_ptr statusptr = tcg_temp_new_ptr();
int offset;
vec_full_reg_size(s), gvec_op);
}
+/* Expand a 3-operand + env pointer operation using
+ * an out-of-line helper.
+ */
+static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
+ int rn, int rm, gen_helper_gvec_3_ptr *fn)
+{
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), cpu_env,
+ is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
+}
+
+/* Expand a 3-operand + fpstatus pointer + simd data value operation using
+ * an out-of-line helper.
+ */
+static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
+ int rm, bool is_fp16, int data,
+ gen_helper_gvec_3_ptr *fn)
+{
+ TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm), fpst,
+ is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
+ tcg_temp_free_ptr(fpst);
+}
+
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
* than the 32 bit equivalent.
*/
/* Check that SVE access is enabled. If it is, return true.
* If not, emit code to generate an appropriate exception and return false.
*/
-static inline bool sve_access_check(DisasContext *s)
+bool sve_access_check(DisasContext *s)
{
if (s->sve_excp_el) {
gen_exception_insn(s, 4, EXCP_UDEF, syn_sve_access_trap(),
s->sve_excp_el);
return false;
}
- return true;
+ return fp_access_check(s);
}
/*
break;
}
/* BRK */
- gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
- default_exception_el(s));
+ gen_exception_bkpt_insn(s, 4, syn_aa64_bkpt(imm16));
break;
case 2:
if (op2_ll != 0) {
unallocated_encoding(s);
return;
}
+ if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+ gen_io_start();
+ }
gen_helper_exception_return(cpu_env);
+ if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+ gen_io_end();
+ }
/* Must exit loop to check un-masked IRQs */
s->base.is_jmp = DISAS_EXIT;
return;
tcg_gen_movi_i64(cpu_exclusive_addr, -1);
}
+static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
+ int rn, int size)
+{
+ TCGv_i64 tcg_rs = cpu_reg(s, rs);
+ TCGv_i64 tcg_rt = cpu_reg(s, rt);
+ int memidx = get_mem_index(s);
+ TCGv_i64 addr = cpu_reg_sp(s, rn);
+
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx,
+ size | MO_ALIGN | s->be_data);
+}
+
+static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
+ int rn, int size)
+{
+ TCGv_i64 s1 = cpu_reg(s, rs);
+ TCGv_i64 s2 = cpu_reg(s, rs + 1);
+ TCGv_i64 t1 = cpu_reg(s, rt);
+ TCGv_i64 t2 = cpu_reg(s, rt + 1);
+ TCGv_i64 addr = cpu_reg_sp(s, rn);
+ int memidx = get_mem_index(s);
+
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+
+ if (size == 2) {
+ TCGv_i64 cmp = tcg_temp_new_i64();
+ TCGv_i64 val = tcg_temp_new_i64();
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_concat32_i64(val, t1, t2);
+ tcg_gen_concat32_i64(cmp, s1, s2);
+ } else {
+ tcg_gen_concat32_i64(val, t2, t1);
+ tcg_gen_concat32_i64(cmp, s2, s1);
+ }
+
+ tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx,
+ MO_64 | MO_ALIGN | s->be_data);
+ tcg_temp_free_i64(val);
+
+ if (s->be_data == MO_LE) {
+ tcg_gen_extr32_i64(s1, s2, cmp);
+ } else {
+ tcg_gen_extr32_i64(s2, s1, cmp);
+ }
+ tcg_temp_free_i64(cmp);
+ } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+ TCGv_i32 tcg_rs = tcg_const_i32(rs);
+
+ if (s->be_data == MO_LE) {
+ gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
+ } else {
+ gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
+ }
+ tcg_temp_free_i32(tcg_rs);
+ } else {
+ TCGv_i64 d1 = tcg_temp_new_i64();
+ TCGv_i64 d2 = tcg_temp_new_i64();
+ TCGv_i64 a2 = tcg_temp_new_i64();
+ TCGv_i64 c1 = tcg_temp_new_i64();
+ TCGv_i64 c2 = tcg_temp_new_i64();
+ TCGv_i64 zero = tcg_const_i64(0);
+
+ /* Load the two words, in memory order. */
+ tcg_gen_qemu_ld_i64(d1, addr, memidx,
+ MO_64 | MO_ALIGN_16 | s->be_data);
+ tcg_gen_addi_i64(a2, addr, 8);
+ tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data);
+
+ /* Compare the two words, also in memory order. */
+ tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
+ tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
+ tcg_gen_and_i64(c2, c2, c1);
+
+ /* If compare equal, write back new data, else write back old data. */
+ tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
+ tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
+ tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data);
+ tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
+ tcg_temp_free_i64(a2);
+ tcg_temp_free_i64(c1);
+ tcg_temp_free_i64(c2);
+ tcg_temp_free_i64(zero);
+
+ /* Write back the data from memory to Rs. */
+ tcg_gen_mov_i64(s1, d1);
+ tcg_gen_mov_i64(s2, d2);
+ tcg_temp_free_i64(d1);
+ tcg_temp_free_i64(d2);
+ }
+}
+
/* Update the Sixty-Four bit (SF) registersize. This logic is derived
* from the ARMv8 specs for LDR (Shared decode for all encodings).
*/
int rt = extract32(insn, 0, 5);
int rn = extract32(insn, 5, 5);
int rt2 = extract32(insn, 10, 5);
- int is_lasr = extract32(insn, 15, 1);
int rs = extract32(insn, 16, 5);
- int is_pair = extract32(insn, 21, 1);
- int is_store = !extract32(insn, 22, 1);
- int is_excl = !extract32(insn, 23, 1);
+ int is_lasr = extract32(insn, 15, 1);
+ int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
int size = extract32(insn, 30, 2);
TCGv_i64 tcg_addr;
- if ((!is_excl && !is_pair && !is_lasr) ||
- (!is_excl && is_pair) ||
- (is_pair && size < 2)) {
- unallocated_encoding(s);
+ switch (o2_L_o1_o0) {
+ case 0x0: /* STXR */
+ case 0x1: /* STLXR */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ if (is_lasr) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ }
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false);
return;
- }
- if (rn == 31) {
- gen_check_sp_alignment(s);
- }
- tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ case 0x4: /* LDXR */
+ case 0x5: /* LDAXR */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ s->is_ldex = true;
+ gen_load_exclusive(s, rt, rt2, tcg_addr, size, false);
+ if (is_lasr) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ }
+ return;
- /* Note that since TCG is single threaded load-acquire/store-release
- * semantics require no extra if (is_lasr) { ... } handling.
- */
+ case 0x9: /* STLR */
+ /* Generate ISS for non-exclusive accesses including LASR. */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt,
+ disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
+ return;
- if (is_excl) {
- if (!is_store) {
- s->is_ldex = true;
- gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
- if (is_lasr) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ case 0xd: /* LDAR */
+ /* Generate ISS for non-exclusive accesses including LASR. */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt,
+ disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ return;
+
+ case 0x2: case 0x3: /* CASP / STXP */
+ if (size & 2) { /* STXP / STLXP */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
}
- } else {
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
}
- gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
+ return;
}
- } else {
- TCGv_i64 tcg_rt = cpu_reg(s, rt);
- bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
+ if (rt2 == 31
+ && ((rt | rs) & 1) == 0
+ && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+ /* CASP / CASPL */
+ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
+ return;
+ }
+ break;
- /* Generate ISS for non-exclusive accesses including LASR. */
- if (is_store) {
- if (is_lasr) {
- tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ case 0x6: case 0x7: /* CASPA / LDXP */
+ if (size & 2) { /* LDXP / LDAXP */
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
}
- do_gpr_st(s, tcg_rt, tcg_addr, size,
- true, rt, iss_sf, is_lasr);
- } else {
- do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
- true, rt, iss_sf, is_lasr);
+ tcg_addr = read_cpu_reg_sp(s, rn, 1);
+ s->is_ldex = true;
+ gen_load_exclusive(s, rt, rt2, tcg_addr, size, true);
if (is_lasr) {
tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
}
+ return;
+ }
+ if (rt2 == 31
+ && ((rt | rs) & 1) == 0
+ && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+ /* CASPA / CASPAL */
+ gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
+ return;
+ }
+ break;
+
+ case 0xa: /* CAS */
+ case 0xb: /* CASL */
+ case 0xe: /* CASA */
+ case 0xf: /* CASAL */
+ if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
+ gen_compare_and_swap(s, rs, rt, rn, size);
+ return;
}
+ break;
}
+ unallocated_encoding(s);
}
/*
}
}
+/* Atomic memory operations
+ *
+ * 31 30 27 26 24 22 21 16 15 12 10 5 0
+ * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
+ * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt |
+ * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
+ *
+ * Rt: the result register
+ * Rn: base address or SP
+ * Rs: the source register for the operation
+ * V: vector flag (always 0 as of v8.3)
+ * A: acquire flag
+ * R: release flag
+ */
+static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
+ int size, int rt, bool is_vector)
+{
+ int rs = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int o3_opc = extract32(insn, 12, 4);
+ int feature = ARM_FEATURE_V8_ATOMICS;
+ TCGv_i64 tcg_rn, tcg_rs;
+ AtomicThreeOpFn *fn;
+
+ if (is_vector) {
+ unallocated_encoding(s);
+ return;
+ }
+ switch (o3_opc) {
+ case 000: /* LDADD */
+ fn = tcg_gen_atomic_fetch_add_i64;
+ break;
+ case 001: /* LDCLR */
+ fn = tcg_gen_atomic_fetch_and_i64;
+ break;
+ case 002: /* LDEOR */
+ fn = tcg_gen_atomic_fetch_xor_i64;
+ break;
+ case 003: /* LDSET */
+ fn = tcg_gen_atomic_fetch_or_i64;
+ break;
+ case 004: /* LDSMAX */
+ fn = tcg_gen_atomic_fetch_smax_i64;
+ break;
+ case 005: /* LDSMIN */
+ fn = tcg_gen_atomic_fetch_smin_i64;
+ break;
+ case 006: /* LDUMAX */
+ fn = tcg_gen_atomic_fetch_umax_i64;
+ break;
+ case 007: /* LDUMIN */
+ fn = tcg_gen_atomic_fetch_umin_i64;
+ break;
+ case 010: /* SWP */
+ fn = tcg_gen_atomic_xchg_i64;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ if (!arm_dc_feature(s, feature)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (rn == 31) {
+ gen_check_sp_alignment(s);
+ }
+ tcg_rn = cpu_reg_sp(s, rn);
+ tcg_rs = read_cpu_reg(s, rs, true);
+
+ if (o3_opc == 1) { /* LDCLR */
+ tcg_gen_not_i64(tcg_rs, tcg_rs);
+ }
+
+ /* The tcg atomic primitives are all full barriers. Therefore we
+ * can ignore the Acquire and Release bits of this instruction.
+ */
+ fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
+ s->be_data | size | MO_ALIGN);
+}
+
/* Load/store register (all forms) */
static void disas_ldst_reg(DisasContext *s, uint32_t insn)
{
switch (extract32(insn, 24, 2)) {
case 0:
- if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
- disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
- } else {
+ if (extract32(insn, 21, 1) == 0) {
/* Load/store register (unscaled immediate)
* Load/store immediate pre/post-indexed
* Load/store register unprivileged
*/
disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
+ return;
+ }
+ switch (extract32(insn, 10, 2)) {
+ case 0:
+ disas_ldst_atomic(s, insn, size, rt, is_vector);
+ return;
+ case 2:
+ disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
+ return;
}
break;
case 1:
disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
- break;
- default:
- unallocated_encoding(s);
- break;
+ return;
}
+ unallocated_encoding(s);
}
/* AdvSIMD load/store multiple structures
* value (ie should cause a guest UNDEF exception), and true if they are
* valid, in which case the decoded bit pattern is written to result.
*/
-static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
- unsigned int imms, unsigned int immr)
+bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
+ unsigned int imms, unsigned int immr)
{
uint64_t mask;
unsigned e, levels, s, r;
}
}
-static void handle_fp_compare(DisasContext *s, bool is_double,
+static void handle_fp_compare(DisasContext *s, int size,
unsigned int rn, unsigned int rm,
bool cmp_with_zero, bool signal_all_nans)
{
TCGv_i64 tcg_flags = tcg_temp_new_i64();
- TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
- if (is_double) {
+ if (size == MO_64) {
TCGv_i64 tcg_vn, tcg_vm;
tcg_vn = read_fp_dreg(s, rn);
tcg_temp_free_i64(tcg_vn);
tcg_temp_free_i64(tcg_vm);
} else {
- TCGv_i32 tcg_vn, tcg_vm;
+ TCGv_i32 tcg_vn = tcg_temp_new_i32();
+ TCGv_i32 tcg_vm = tcg_temp_new_i32();
- tcg_vn = read_fp_sreg(s, rn);
+ read_vec_element_i32(s, tcg_vn, rn, 0, size);
if (cmp_with_zero) {
- tcg_vm = tcg_const_i32(0);
+ tcg_gen_movi_i32(tcg_vm, 0);
} else {
- tcg_vm = read_fp_sreg(s, rm);
+ read_vec_element_i32(s, tcg_vm, rm, 0, size);
}
- if (signal_all_nans) {
- gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
- } else {
- gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+
+ switch (size) {
+ case MO_32:
+ if (signal_all_nans) {
+ gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+ } else {
+ gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+ }
+ break;
+ case MO_16:
+ if (signal_all_nans) {
+ gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+ } else {
+ gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
+ }
+ break;
+ default:
+ g_assert_not_reached();
}
+
tcg_temp_free_i32(tcg_vn);
tcg_temp_free_i32(tcg_vm);
}
static void disas_fp_compare(DisasContext *s, uint32_t insn)
{
unsigned int mos, type, rm, op, rn, opc, op2r;
+ int size;
mos = extract32(insn, 29, 3);
- type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
+ type = extract32(insn, 22, 2);
rm = extract32(insn, 16, 5);
op = extract32(insn, 14, 2);
rn = extract32(insn, 5, 5);
opc = extract32(insn, 3, 2);
op2r = extract32(insn, 0, 3);
- if (mos || op || op2r || type > 1) {
+ if (mos || op || op2r) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (type) {
+ case 0:
+ size = MO_32;
+ break;
+ case 1:
+ size = MO_64;
+ break;
+ case 3:
+ size = MO_16;
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ break;
+ }
+ /* fallthru */
+ default:
unallocated_encoding(s);
return;
}
return;
}
- handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
+ handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
}
/* Floating point conditional compare
unsigned int mos, type, rm, cond, rn, op, nzcv;
TCGv_i64 tcg_flags;
TCGLabel *label_continue = NULL;
+ int size;
mos = extract32(insn, 29, 3);
- type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
+ type = extract32(insn, 22, 2);
rm = extract32(insn, 16, 5);
cond = extract32(insn, 12, 4);
rn = extract32(insn, 5, 5);
op = extract32(insn, 4, 1);
nzcv = extract32(insn, 0, 4);
- if (mos || type > 1) {
+ if (mos) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (type) {
+ case 0:
+ size = MO_32;
+ break;
+ case 1:
+ size = MO_64;
+ break;
+ case 3:
+ size = MO_16;
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ break;
+ }
+ /* fallthru */
+ default:
unallocated_encoding(s);
return;
}
gen_set_label(label_match);
}
- handle_fp_compare(s, type, rn, rm, false, op);
+ handle_fp_compare(s, size, rn, rm, false, op);
if (cond < 0x0e) {
gen_set_label(label_continue);
unsigned int mos, type, rm, cond, rn, rd;
TCGv_i64 t_true, t_false, t_zero;
DisasCompare64 c;
+ TCGMemOp sz;
mos = extract32(insn, 29, 3);
- type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
+ type = extract32(insn, 22, 2);
rm = extract32(insn, 16, 5);
cond = extract32(insn, 12, 4);
rn = extract32(insn, 5, 5);
rd = extract32(insn, 0, 5);
- if (mos || type > 1) {
+ if (mos) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (type) {
+ case 0:
+ sz = MO_32;
+ break;
+ case 1:
+ sz = MO_64;
+ break;
+ case 3:
+ sz = MO_16;
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ break;
+ }
+ /* fallthru */
+ default:
unallocated_encoding(s);
return;
}
return;
}
- /* Zero extend sreg inputs to 64 bits now. */
+ /* Zero extend sreg & hreg inputs to 64 bits now. */
t_true = tcg_temp_new_i64();
t_false = tcg_temp_new_i64();
- read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
- read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
+ read_vec_element(s, t_true, rn, 0, sz);
+ read_vec_element(s, t_false, rm, 0, sz);
a64_test_cc(&c, cond);
t_zero = tcg_const_i64(0);
tcg_temp_free_i64(t_false);
a64_free_cc(&c);
- /* Note that sregs write back zeros to the high bits,
+ /* Note that sregs & hregs write back zeros to the high bits,
and we've already done the zero-extension. */
write_fp_dreg(s, rd, t_true);
tcg_temp_free_i64(t_true);
}
-/* Floating-point data-processing (1 source) - single precision */
-static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
+/* Floating-point data-processing (1 source) - half precision */
+static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
{
- TCGv_ptr fpst;
- TCGv_i32 tcg_op;
- TCGv_i32 tcg_res;
-
- fpst = get_fpstatus_ptr(false);
- tcg_op = read_fp_sreg(s, rn);
- tcg_res = tcg_temp_new_i32();
+ TCGv_ptr fpst = NULL;
+ TCGv_i32 tcg_op = read_fp_hreg(s, rn);
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
switch (opcode) {
case 0x0: /* FMOV */
tcg_gen_mov_i32(tcg_res, tcg_op);
break;
case 0x1: /* FABS */
- gen_helper_vfp_abss(tcg_res, tcg_op);
+ tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
break;
case 0x2: /* FNEG */
- gen_helper_vfp_negs(tcg_res, tcg_op);
+ tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
break;
case 0x3: /* FSQRT */
- gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
+ fpst = get_fpstatus_ptr(true);
+ gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
break;
case 0x8: /* FRINTN */
case 0x9: /* FRINTP */
case 0xc: /* FRINTA */
{
TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
+ fpst = get_fpstatus_ptr(true);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
- gen_helper_rints(tcg_res, tcg_op, fpst);
+ gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
tcg_temp_free_i32(tcg_rmode);
break;
}
case 0xe: /* FRINTX */
- gen_helper_rints_exact(tcg_res, tcg_op, fpst);
+ fpst = get_fpstatus_ptr(true);
+ gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
break;
case 0xf: /* FRINTI */
- gen_helper_rints(tcg_res, tcg_op, fpst);
+ fpst = get_fpstatus_ptr(true);
+ gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
break;
default:
abort();
write_fp_sreg(s, rd, tcg_res);
- tcg_temp_free_ptr(fpst);
+ if (fpst) {
+ tcg_temp_free_ptr(fpst);
+ }
tcg_temp_free_i32(tcg_op);
tcg_temp_free_i32(tcg_res);
}
-/* Floating-point data-processing (1 source) - double precision */
-static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
-{
+/* Floating-point data-processing (1 source) - single precision */
+static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 tcg_op;
+ TCGv_i32 tcg_res;
+
+ fpst = get_fpstatus_ptr(false);
+ tcg_op = read_fp_sreg(s, rn);
+ tcg_res = tcg_temp_new_i32();
+
+ switch (opcode) {
+ case 0x0: /* FMOV */
+ tcg_gen_mov_i32(tcg_res, tcg_op);
+ break;
+ case 0x1: /* FABS */
+ gen_helper_vfp_abss(tcg_res, tcg_op);
+ break;
+ case 0x2: /* FNEG */
+ gen_helper_vfp_negs(tcg_res, tcg_op);
+ break;
+ case 0x3: /* FSQRT */
+ gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
+ break;
+ case 0x8: /* FRINTN */
+ case 0x9: /* FRINTP */
+ case 0xa: /* FRINTM */
+ case 0xb: /* FRINTZ */
+ case 0xc: /* FRINTA */
+ {
+ TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
+
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+ gen_helper_rints(tcg_res, tcg_op, fpst);
+
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+ tcg_temp_free_i32(tcg_rmode);
+ break;
+ }
+ case 0xe: /* FRINTX */
+ gen_helper_rints_exact(tcg_res, tcg_op, fpst);
+ break;
+ case 0xf: /* FRINTI */
+ gen_helper_rints(tcg_res, tcg_op, fpst);
+ break;
+ default:
+ abort();
+ }
+
+ write_fp_sreg(s, rd, tcg_res);
+
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tcg_op);
+ tcg_temp_free_i32(tcg_res);
+}
+
+/* Floating-point data-processing (1 source) - double precision */
+static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
+{
TCGv_ptr fpst;
TCGv_i64 tcg_op;
TCGv_i64 tcg_res;
} else {
/* Single to half */
TCGv_i32 tcg_rd = tcg_temp_new_i32();
- gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
+ TCGv_i32 ahp = get_ahp_flag();
+ TCGv_ptr fpst = get_fpstatus_ptr(false);
+
+ gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
/* write_fp_sreg is OK here because top half of tcg_rd is zero */
write_fp_sreg(s, rd, tcg_rd);
tcg_temp_free_i32(tcg_rd);
+ tcg_temp_free_i32(ahp);
+ tcg_temp_free_ptr(fpst);
}
tcg_temp_free_i32(tcg_rn);
break;
/* Double to single */
gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
} else {
+ TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_i32 ahp = get_ahp_flag();
/* Double to half */
- gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
+ gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
/* write_fp_sreg is OK here because top half of tcg_rd is zero */
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(ahp);
}
write_fp_sreg(s, rd, tcg_rd);
tcg_temp_free_i32(tcg_rd);
case 0x3:
{
TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
+ TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
+ TCGv_i32 tcg_ahp = get_ahp_flag();
tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
if (dtype == 0) {
/* Half to single */
TCGv_i32 tcg_rd = tcg_temp_new_i32();
- gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
+ gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
write_fp_sreg(s, rd, tcg_rd);
+ tcg_temp_free_ptr(tcg_fpst);
+ tcg_temp_free_i32(tcg_ahp);
tcg_temp_free_i32(tcg_rd);
} else {
/* Half to double */
TCGv_i64 tcg_rd = tcg_temp_new_i64();
- gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
+ gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
write_fp_dreg(s, rd, tcg_rd);
tcg_temp_free_i64(tcg_rd);
}
handle_fp_1src_double(s, opcode, rd, rn);
break;
+ case 3:
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ handle_fp_1src_half(s, opcode, rd, rn);
+ break;
default:
unallocated_encoding(s);
}
tcg_temp_free_i64(tcg_res);
}
+/* Floating-point data-processing (2 source) - half precision */
+static void handle_fp_2src_half(DisasContext *s, int opcode,
+ int rd, int rn, int rm)
+{
+ TCGv_i32 tcg_op1;
+ TCGv_i32 tcg_op2;
+ TCGv_i32 tcg_res;
+ TCGv_ptr fpst;
+
+ tcg_res = tcg_temp_new_i32();
+ fpst = get_fpstatus_ptr(true);
+ tcg_op1 = read_fp_hreg(s, rn);
+ tcg_op2 = read_fp_hreg(s, rm);
+
+ switch (opcode) {
+ case 0x0: /* FMUL */
+ gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1: /* FDIV */
+ gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2: /* FADD */
+ gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3: /* FSUB */
+ gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x4: /* FMAX */
+ gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x5: /* FMIN */
+ gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x6: /* FMAXNM */
+ gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7: /* FMINNM */
+ gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x8: /* FNMUL */
+ gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
+ tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_fp_sreg(s, rd, tcg_res);
+
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ tcg_temp_free_i32(tcg_res);
+}
+
/* Floating point data-processing (2 source)
* 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
* +---+---+---+-----------+------+---+------+--------+-----+------+------+
}
handle_fp_2src_double(s, opcode, rd, rn, rm);
break;
+ case 3:
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ }
+ if (!fp_access_check(s)) {
+ return;
+ }
+ handle_fp_2src_half(s, opcode, rd, rn, rm);
+ break;
default:
unallocated_encoding(s);
}
tcg_temp_free_i64(tcg_res);
}
+/* Floating-point data-processing (3 source) - half precision */
+static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
+ int rd, int rn, int rm, int ra)
+{
+ TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+ TCGv_ptr fpst = get_fpstatus_ptr(true);
+
+ tcg_op1 = read_fp_hreg(s, rn);
+ tcg_op2 = read_fp_hreg(s, rm);
+ tcg_op3 = read_fp_hreg(s, ra);
+
+ /* These are fused multiply-add, and must be done as one
+ * floating point operation with no rounding between the
+ * multiplication and addition steps.
+ * NB that doing the negations here as separate steps is
+ * correct : an input NaN should come out with its sign bit
+ * flipped if it is a negated-input.
+ */
+ if (o1 == true) {
+ tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
+ }
+
+ if (o0 != o1) {
+ tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
+ }
+
+ gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
+
+ write_fp_sreg(s, rd, tcg_res);
+
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ tcg_temp_free_i32(tcg_op3);
+ tcg_temp_free_i32(tcg_res);
+}
+
/* Floating point data-processing (3 source)
* 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0
* +---+---+---+-----------+------+----+------+----+------+------+------+
}
handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
break;
+ case 3:
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ }
+ if (!fp_access_check(s)) {
+ return;
+ }
+ handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
+ break;
default:
unallocated_encoding(s);
}
* the range 01....1xx to 10....0xx, and the most significant 4 bits of
* the mantissa; see VFPExpandImm() in the v8 ARM ARM.
*/
-static uint64_t vfp_expand_imm(int size, uint8_t imm8)
+uint64_t vfp_expand_imm(int size, uint8_t imm8)
{
uint64_t imm;
{
int rd = extract32(insn, 0, 5);
int imm8 = extract32(insn, 13, 8);
- int is_double = extract32(insn, 22, 2);
+ int type = extract32(insn, 22, 2);
uint64_t imm;
TCGv_i64 tcg_res;
+ TCGMemOp sz;
- if (is_double > 1) {
+ switch (type) {
+ case 0:
+ sz = MO_32;
+ break;
+ case 1:
+ sz = MO_64;
+ break;
+ case 3:
+ sz = MO_16;
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ break;
+ }
+ /* fallthru */
+ default:
unallocated_encoding(s);
return;
}
return;
}
- imm = vfp_expand_imm(MO_32 + is_double, imm8);
+ imm = vfp_expand_imm(sz, imm8);
tcg_res = tcg_const_i64(imm);
write_fp_dreg(s, rd, tcg_res);
bool itof, int rmode, int scale, int sf, int type)
{
bool is_signed = !(opcode & 1);
- bool is_double = type;
TCGv_ptr tcg_fpstatus;
- TCGv_i32 tcg_shift;
+ TCGv_i32 tcg_shift, tcg_single;
+ TCGv_i64 tcg_double;
- tcg_fpstatus = get_fpstatus_ptr(false);
+ tcg_fpstatus = get_fpstatus_ptr(type == 3);
tcg_shift = tcg_const_i32(64 - scale);
tcg_int = tcg_extend;
}
- if (is_double) {
- TCGv_i64 tcg_double = tcg_temp_new_i64();
+ switch (type) {
+ case 1: /* float64 */
+ tcg_double = tcg_temp_new_i64();
if (is_signed) {
gen_helper_vfp_sqtod(tcg_double, tcg_int,
tcg_shift, tcg_fpstatus);
}
write_fp_dreg(s, rd, tcg_double);
tcg_temp_free_i64(tcg_double);
- } else {
- TCGv_i32 tcg_single = tcg_temp_new_i32();
+ break;
+
+ case 0: /* float32 */
+ tcg_single = tcg_temp_new_i32();
if (is_signed) {
gen_helper_vfp_sqtos(tcg_single, tcg_int,
tcg_shift, tcg_fpstatus);
}
write_fp_sreg(s, rd, tcg_single);
tcg_temp_free_i32(tcg_single);
+ break;
+
+ case 3: /* float16 */
+ tcg_single = tcg_temp_new_i32();
+ if (is_signed) {
+ gen_helper_vfp_sqtoh(tcg_single, tcg_int,
+ tcg_shift, tcg_fpstatus);
+ } else {
+ gen_helper_vfp_uqtoh(tcg_single, tcg_int,
+ tcg_shift, tcg_fpstatus);
+ }
+ write_fp_sreg(s, rd, tcg_single);
+ tcg_temp_free_i32(tcg_single);
+ break;
+
+ default:
+ g_assert_not_reached();
}
} else {
TCGv_i64 tcg_int = cpu_reg(s, rd);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
- if (is_double) {
- TCGv_i64 tcg_double = read_fp_dreg(s, rn);
+ switch (type) {
+ case 1: /* float64 */
+ tcg_double = read_fp_dreg(s, rn);
if (is_signed) {
if (!sf) {
gen_helper_vfp_tosld(tcg_int, tcg_double,
tcg_shift, tcg_fpstatus);
}
}
+ if (!sf) {
+ tcg_gen_ext32u_i64(tcg_int, tcg_int);
+ }
tcg_temp_free_i64(tcg_double);
- } else {
- TCGv_i32 tcg_single = read_fp_sreg(s, rn);
+ break;
+
+ case 0: /* float32 */
+ tcg_single = read_fp_sreg(s, rn);
if (sf) {
if (is_signed) {
gen_helper_vfp_tosqs(tcg_int, tcg_single,
tcg_temp_free_i32(tcg_dest);
}
tcg_temp_free_i32(tcg_single);
+ break;
+
+ case 3: /* float16 */
+ tcg_single = read_fp_sreg(s, rn);
+ if (sf) {
+ if (is_signed) {
+ gen_helper_vfp_tosqh(tcg_int, tcg_single,
+ tcg_shift, tcg_fpstatus);
+ } else {
+ gen_helper_vfp_touqh(tcg_int, tcg_single,
+ tcg_shift, tcg_fpstatus);
+ }
+ } else {
+ TCGv_i32 tcg_dest = tcg_temp_new_i32();
+ if (is_signed) {
+ gen_helper_vfp_toslh(tcg_dest, tcg_single,
+ tcg_shift, tcg_fpstatus);
+ } else {
+ gen_helper_vfp_toulh(tcg_dest, tcg_single,
+ tcg_shift, tcg_fpstatus);
+ }
+ tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
+ tcg_temp_free_i32(tcg_dest);
+ }
+ tcg_temp_free_i32(tcg_single);
+ break;
+
+ default:
+ g_assert_not_reached();
}
gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
tcg_temp_free_i32(tcg_rmode);
-
- if (!sf) {
- tcg_gen_ext32u_i64(tcg_int, tcg_int);
- }
}
tcg_temp_free_ptr(tcg_fpstatus);
bool sf = extract32(insn, 31, 1);
bool itof;
- if (sbit || (type > 1)
- || (!sf && scale < 32)) {
+ if (sbit || (!sf && scale < 32)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (type) {
+ case 0: /* float32 */
+ case 1: /* float64 */
+ break;
+ case 3: /* float16 */
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ break;
+ }
+ /* fallthru */
+ default:
unallocated_encoding(s);
return;
}
if (itof) {
TCGv_i64 tcg_rn = cpu_reg(s, rn);
+ TCGv_i64 tmp;
switch (type) {
case 0:
- {
/* 32 bit */
- TCGv_i64 tmp = tcg_temp_new_i64();
+ tmp = tcg_temp_new_i64();
tcg_gen_ext32u_i64(tmp, tcg_rn);
- tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
- tcg_gen_movi_i64(tmp, 0);
- tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
+ write_fp_dreg(s, rd, tmp);
tcg_temp_free_i64(tmp);
break;
- }
case 1:
- {
/* 64 bit */
- TCGv_i64 tmp = tcg_const_i64(0);
- tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
- tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
- tcg_temp_free_i64(tmp);
+ write_fp_dreg(s, rd, tcg_rn);
break;
- }
case 2:
/* 64 bit to top half. */
tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
+ clear_vec_high(s, true, rd);
+ break;
+ case 3:
+ /* 16 bit */
+ tmp = tcg_temp_new_i64();
+ tcg_gen_ext16u_i64(tmp, tcg_rn);
+ write_fp_dreg(s, rd, tmp);
+ tcg_temp_free_i64(tmp);
break;
+ default:
+ g_assert_not_reached();
}
} else {
TCGv_i64 tcg_rd = cpu_reg(s, rd);
/* 64 bits from top half */
tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
break;
+ case 3:
+ /* 16 bit */
+ tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
+ break;
+ default:
+ g_assert_not_reached();
}
}
}
case 0xa: /* 64 bit */
case 0xd: /* 64 bit to top half of quad */
break;
+ case 0x6: /* 16-bit float, 32-bit int */
+ case 0xe: /* 16-bit float, 64-bit int */
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ break;
+ }
+ /* fallthru */
default:
/* all other sf/type/rmode combinations are invalid */
unallocated_encoding(s);
- break;
+ return;
}
if (!fp_access_check(s)) {
/* actual FP conversions */
bool itof = extract32(opcode, 1, 1);
- if (type > 1 || (rmode != 0 && opcode > 1)) {
+ if (rmode != 0 && opcode > 1) {
+ unallocated_encoding(s);
+ return;
+ }
+ switch (type) {
+ case 0: /* float32 */
+ case 1: /* float64 */
+ break;
+ case 3: /* float16 */
+ if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ break;
+ }
+ /* fallthru */
+ default:
unallocated_encoding(s);
return;
}
tcg_temp_free_i64(tcg_resh);
}
-static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
- int opc, bool is_min, TCGv_ptr fpst)
-{
- /* Helper function for disas_simd_across_lanes: do a single precision
- * min/max operation on the specified two inputs,
- * and return the result in tcg_elt1.
- */
- if (opc == 0xc) {
- if (is_min) {
- gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
- } else {
- gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
- }
+/*
+ * do_reduction_op helper
+ *
+ * This mirrors the Reduce() pseudocode in the ARM ARM. It is
+ * important for correct NaN propagation that we do these
+ * operations in exactly the order specified by the pseudocode.
+ *
+ * This is a recursive function, TCG temps should be freed by the
+ * calling function once it is done with the values.
+ */
+static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
+ int esize, int size, int vmap, TCGv_ptr fpst)
+{
+ if (esize == size) {
+ int element;
+ TCGMemOp msize = esize == 16 ? MO_16 : MO_32;
+ TCGv_i32 tcg_elem;
+
+ /* We should have one register left here */
+ assert(ctpop8(vmap) == 1);
+ element = ctz32(vmap);
+ assert(element < 8);
+
+ tcg_elem = tcg_temp_new_i32();
+ read_vec_element_i32(s, tcg_elem, rn, element, msize);
+ return tcg_elem;
} else {
- assert(opc == 0xf);
- if (is_min) {
- gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
- } else {
- gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
+ int bits = size / 2;
+ int shift = ctpop8(vmap) / 2;
+ int vmap_lo = (vmap >> shift) & vmap;
+ int vmap_hi = (vmap & ~vmap_lo);
+ TCGv_i32 tcg_hi, tcg_lo, tcg_res;
+
+ tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
+ tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
+ tcg_res = tcg_temp_new_i32();
+
+ switch (fpopcode) {
+ case 0x0c: /* fmaxnmv half-precision */
+ gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x0f: /* fmaxv half-precision */
+ gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x1c: /* fminnmv half-precision */
+ gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x1f: /* fminv half-precision */
+ gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x2c: /* fmaxnmv */
+ gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x2f: /* fmaxv */
+ gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x3c: /* fminnmv */
+ gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ case 0x3f: /* fminv */
+ gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
+ break;
+ default:
+ g_assert_not_reached();
}
+
+ tcg_temp_free_i32(tcg_hi);
+ tcg_temp_free_i32(tcg_lo);
+ return tcg_res;
}
}
break;
case 0xc: /* FMAXNMV, FMINNMV */
case 0xf: /* FMAXV, FMINV */
- if (!is_u || !is_q || extract32(size, 0, 1)) {
- unallocated_encoding(s);
- return;
- }
- /* Bit 1 of size field encodes min vs max, and actual size is always
- * 32 bits: adjust the size variable so following code can rely on it
+ /* Bit 1 of size field encodes min vs max and the actual size
+ * depends on the encoding of the U bit. If not set (and FP16
+ * enabled) then we do half-precision float instead of single
+ * precision.
*/
is_min = extract32(size, 1, 1);
is_fp = true;
- size = 2;
+ if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ size = 1;
+ } else if (!is_u || !is_q || extract32(size, 0, 1)) {
+ unallocated_encoding(s);
+ return;
+ } else {
+ size = 2;
+ }
break;
default:
unallocated_encoding(s);
tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
break;
case 0x0a: /* SMAXV / UMAXV */
- tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
- tcg_res,
- tcg_res, tcg_elt, tcg_res, tcg_elt);
+ if (is_u) {
+ tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
+ } else {
+ tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
+ }
break;
case 0x1a: /* SMINV / UMINV */
- tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
- tcg_res,
- tcg_res, tcg_elt, tcg_res, tcg_elt);
- break;
+ if (is_u) {
+ tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
+ } else {
+ tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
+ }
break;
default:
g_assert_not_reached();
}
} else {
- /* Floating point ops which work on 32 bit (single) intermediates.
+ /* Floating point vector reduction ops which work across 32
+ * bit (single) or 16 bit (half-precision) intermediates.
* Note that correct NaN propagation requires that we do these
* operations in exactly the order specified by the pseudocode.
*/
- TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
- TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
- TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
- TCGv_ptr fpst = get_fpstatus_ptr(false);
-
- assert(esize == 32);
- assert(elements == 4);
-
- read_vec_element(s, tcg_elt, rn, 0, MO_32);
- tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
- read_vec_element(s, tcg_elt, rn, 1, MO_32);
- tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
-
- do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
-
- read_vec_element(s, tcg_elt, rn, 2, MO_32);
- tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
- read_vec_element(s, tcg_elt, rn, 3, MO_32);
- tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
-
- do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
-
- do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
-
- tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
- tcg_temp_free_i32(tcg_elt1);
- tcg_temp_free_i32(tcg_elt2);
- tcg_temp_free_i32(tcg_elt3);
+ TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
+ int fpopcode = opcode | is_min << 4 | is_u << 5;
+ int vmap = (1 << elements) - 1;
+ TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
+ (is_q ? 128 : 64), vmap, fpst);
+ tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
+ tcg_temp_free_i32(tcg_res32);
tcg_temp_free_ptr(fpst);
}
* MVNI - move inverted (shifted) imm into register
* ORR - bitwise OR of (shifted) imm with register
* BIC - bitwise clear of (shifted) imm with register
+ * With ARMv8.2 we also have:
+ * FMOV half-precision
*/
static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
{
uint64_t imm = 0;
if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
- unallocated_encoding(s);
- return;
+ /* Check for FMOV (vector, immediate) - half-precision */
+ if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) {
+ unallocated_encoding(s);
+ return;
+ }
}
if (!fp_access_check(s)) {
imm |= 0x4000000000000000ULL;
}
} else {
- imm = (abcdefgh & 0x3f) << 19;
- if (abcdefgh & 0x80) {
- imm |= 0x80000000;
- }
- if (abcdefgh & 0x40) {
- imm |= 0x3e000000;
+ if (o2) {
+ /* FMOV (vector, immediate) - half-precision */
+ imm = vfp_expand_imm(MO_16, abcdefgh);
+ /* now duplicate across the lanes */
+ imm = bitfield_replicate(imm, 16);
} else {
- imm |= 0x40000000;
+ imm = (abcdefgh & 0x3f) << 19;
+ if (abcdefgh & 0x80) {
+ imm |= 0x80000000;
+ }
+ if (abcdefgh & 0x40) {
+ imm |= 0x3e000000;
+ } else {
+ imm |= 0x40000000;
+ }
+ imm |= (imm << 32);
}
- imm |= (imm << 32);
}
}
break;
+ default:
+ fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
+ g_assert_not_reached();
}
if (cmode_3_1 != 7 && is_neg) {
case 0xf: /* FMAXP */
case 0x2c: /* FMINNMP */
case 0x2f: /* FMINP */
- /* FP op, size[0] is 32 or 64 bit */
+ /* FP op, size[0] is 32 or 64 bit*/
if (!u) {
- unallocated_encoding(s);
- return;
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ } else {
+ size = MO_16;
+ }
+ } else {
+ size = extract32(size, 0, 1) ? MO_64 : MO_32;
}
+
if (!fp_access_check(s)) {
return;
}
- size = extract32(size, 0, 1) ? 3 : 2;
- fpst = get_fpstatus_ptr(false);
+ fpst = get_fpstatus_ptr(size == MO_16);
break;
default:
unallocated_encoding(s);
return;
}
- if (size == 3) {
+ if (size == MO_64) {
TCGv_i64 tcg_op1 = tcg_temp_new_i64();
TCGv_i64 tcg_op2 = tcg_temp_new_i64();
TCGv_i64 tcg_res = tcg_temp_new_i64();
TCGv_i32 tcg_op2 = tcg_temp_new_i32();
TCGv_i32 tcg_res = tcg_temp_new_i32();
- read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
- read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
+ read_vec_element_i32(s, tcg_op1, rn, 0, size);
+ read_vec_element_i32(s, tcg_op2, rn, 1, size);
- switch (opcode) {
- case 0xc: /* FMAXNMP */
- gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xd: /* FADDP */
- gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0xf: /* FMAXP */
- gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2c: /* FMINNMP */
- gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- case 0x2f: /* FMINP */
- gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
- break;
- default:
- g_assert_not_reached();
+ if (size == MO_16) {
+ switch (opcode) {
+ case 0xc: /* FMAXNMP */
+ gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xd: /* FADDP */
+ gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xf: /* FMAXP */
+ gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2c: /* FMINNMP */
+ gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2f: /* FMINP */
+ gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ switch (opcode) {
+ case 0xc: /* FMAXNMP */
+ gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xd: /* FADDP */
+ gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xf: /* FMAXP */
+ gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2c: /* FMINNMP */
+ gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x2f: /* FMINP */
+ gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
write_fp_sreg(s, rd, tcg_res);
int elements, int is_signed,
int fracbits, int size)
{
- bool is_double = size == 3 ? true : false;
- TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
- TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
- TCGv_i64 tcg_int = tcg_temp_new_i64();
+ TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
+ TCGv_i32 tcg_shift = NULL;
+
TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
int pass;
- for (pass = 0; pass < elements; pass++) {
- read_vec_element(s, tcg_int, rn, pass, mop);
+ if (fracbits || size == MO_64) {
+ tcg_shift = tcg_const_i32(fracbits);
+ }
+
+ if (size == MO_64) {
+ TCGv_i64 tcg_int64 = tcg_temp_new_i64();
+ TCGv_i64 tcg_double = tcg_temp_new_i64();
+
+ for (pass = 0; pass < elements; pass++) {
+ read_vec_element(s, tcg_int64, rn, pass, mop);
- if (is_double) {
- TCGv_i64 tcg_double = tcg_temp_new_i64();
if (is_signed) {
- gen_helper_vfp_sqtod(tcg_double, tcg_int,
+ gen_helper_vfp_sqtod(tcg_double, tcg_int64,
tcg_shift, tcg_fpst);
} else {
- gen_helper_vfp_uqtod(tcg_double, tcg_int,
+ gen_helper_vfp_uqtod(tcg_double, tcg_int64,
tcg_shift, tcg_fpst);
}
if (elements == 1) {
} else {
write_vec_element(s, tcg_double, rd, pass, MO_64);
}
- tcg_temp_free_i64(tcg_double);
- } else {
- TCGv_i32 tcg_single = tcg_temp_new_i32();
- if (is_signed) {
- gen_helper_vfp_sqtos(tcg_single, tcg_int,
- tcg_shift, tcg_fpst);
- } else {
- gen_helper_vfp_uqtos(tcg_single, tcg_int,
- tcg_shift, tcg_fpst);
+ }
+
+ tcg_temp_free_i64(tcg_int64);
+ tcg_temp_free_i64(tcg_double);
+
+ } else {
+ TCGv_i32 tcg_int32 = tcg_temp_new_i32();
+ TCGv_i32 tcg_float = tcg_temp_new_i32();
+
+ for (pass = 0; pass < elements; pass++) {
+ read_vec_element_i32(s, tcg_int32, rn, pass, mop);
+
+ switch (size) {
+ case MO_32:
+ if (fracbits) {
+ if (is_signed) {
+ gen_helper_vfp_sltos(tcg_float, tcg_int32,
+ tcg_shift, tcg_fpst);
+ } else {
+ gen_helper_vfp_ultos(tcg_float, tcg_int32,
+ tcg_shift, tcg_fpst);
+ }
+ } else {
+ if (is_signed) {
+ gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
+ } else {
+ gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
+ }
+ }
+ break;
+ case MO_16:
+ if (fracbits) {
+ if (is_signed) {
+ gen_helper_vfp_sltoh(tcg_float, tcg_int32,
+ tcg_shift, tcg_fpst);
+ } else {
+ gen_helper_vfp_ultoh(tcg_float, tcg_int32,
+ tcg_shift, tcg_fpst);
+ }
+ } else {
+ if (is_signed) {
+ gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
+ } else {
+ gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
+ }
+ }
+ break;
+ default:
+ g_assert_not_reached();
}
+
if (elements == 1) {
- write_fp_sreg(s, rd, tcg_single);
+ write_fp_sreg(s, rd, tcg_float);
} else {
- write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
+ write_vec_element_i32(s, tcg_float, rd, pass, size);
}
- tcg_temp_free_i32(tcg_single);
}
+
+ tcg_temp_free_i32(tcg_int32);
+ tcg_temp_free_i32(tcg_float);
}
- tcg_temp_free_i64(tcg_int);
tcg_temp_free_ptr(tcg_fpst);
- tcg_temp_free_i32(tcg_shift);
+ if (tcg_shift) {
+ tcg_temp_free_i32(tcg_shift);
+ }
clear_vec_high(s, elements << size == 16, rd);
}
int immh, int immb, int opcode,
int rn, int rd)
{
- bool is_double = extract32(immh, 3, 1);
- int size = is_double ? MO_64 : MO_32;
- int elements;
+ int size, elements, fracbits;
int immhb = immh << 3 | immb;
- int fracbits = (is_double ? 128 : 64) - immhb;
- if (!extract32(immh, 2, 2)) {
+ if (immh & 8) {
+ size = MO_64;
+ if (!is_scalar && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ } else if (immh & 4) {
+ size = MO_32;
+ } else if (immh & 2) {
+ size = MO_16;
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ }
+ } else {
+ /* immh == 0 would be a failure of the decode logic */
+ g_assert(immh == 1);
unallocated_encoding(s);
return;
}
if (is_scalar) {
elements = 1;
} else {
- elements = is_double ? 2 : is_q ? 4 : 2;
- if (is_double && !is_q) {
- unallocated_encoding(s);
- return;
- }
+ elements = (8 << is_q) >> size;
}
+ fracbits = (16 << size) - immhb;
if (!fp_access_check(s)) {
return;
}
- /* immh == 0 would be a failure of the decode logic */
- g_assert(immh);
-
handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
}
bool is_q, bool is_u,
int immh, int immb, int rn, int rd)
{
- bool is_double = extract32(immh, 3, 1);
int immhb = immh << 3 | immb;
- int fracbits = (is_double ? 128 : 64) - immhb;
- int pass;
+ int pass, size, fracbits;
TCGv_ptr tcg_fpstatus;
TCGv_i32 tcg_rmode, tcg_shift;
- if (!extract32(immh, 2, 2)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!is_scalar && !is_q && is_double) {
+ if (immh & 0x8) {
+ size = MO_64;
+ if (!is_scalar && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ } else if (immh & 0x4) {
+ size = MO_32;
+ } else if (immh & 0x2) {
+ size = MO_16;
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ }
+ } else {
+ /* Should have split out AdvSIMD modified immediate earlier. */
+ assert(immh == 1);
unallocated_encoding(s);
return;
}
assert(!(is_scalar && is_q));
tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
- tcg_fpstatus = get_fpstatus_ptr(false);
+ tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
+ fracbits = (16 << size) - immhb;
tcg_shift = tcg_const_i32(fracbits);
- if (is_double) {
+ if (size == MO_64) {
int maxpass = is_scalar ? 1 : 2;
for (pass = 0; pass < maxpass; pass++) {
}
clear_vec_high(s, is_q, rd);
} else {
- int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
- for (pass = 0; pass < maxpass; pass++) {
- TCGv_i32 tcg_op = tcg_temp_new_i32();
+ void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
+ int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
- read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+ switch (size) {
+ case MO_16:
+ if (is_u) {
+ fn = gen_helper_vfp_touhh;
+ } else {
+ fn = gen_helper_vfp_toshh;
+ }
+ break;
+ case MO_32:
if (is_u) {
- gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
+ fn = gen_helper_vfp_touls;
} else {
- gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
+ fn = gen_helper_vfp_tosls;
}
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ for (pass = 0; pass < maxpass; pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op, rn, pass, size);
+ fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
if (is_scalar) {
write_fp_sreg(s, rd, tcg_op);
} else {
- write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
+ write_vec_element_i32(s, tcg_op, rd, pass, size);
}
tcg_temp_free_i32(tcg_op);
}
tcg_temp_free_i64(tcg_op2);
tcg_temp_free_i64(tcg_res);
} else {
- TCGv_i32 tcg_op1 = tcg_temp_new_i32();
- TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
+ TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
TCGv_i64 tcg_res = tcg_temp_new_i64();
- read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
- read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
-
gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
/* Handle 64x64->64 opcodes which are shared between the scalar
* and vector 3-same groups. We cover every opcode where size == 3
* is valid in either the three-reg-same (integer, not pairwise)
- * or scalar-three-reg-same groups. (Some opcodes are not yet
- * implemented.)
+ * or scalar-three-reg-same groups.
*/
TCGCond cond;
tcg_temp_free_i64(tcg_rd);
}
-static void handle_2misc_64(DisasContext *s, int opcode, bool u,
- TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
- TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
+/* AdvSIMD scalar three same FP16
+ * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
+ * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
+ * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
+ * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
+ * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
+ * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
+ */
+static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
+ uint32_t insn)
{
- /* Handle 64->64 opcodes which are shared between the scalar and
- * vector 2-reg-misc groups. We cover every integer opcode where size == 3
- * is valid in either group and also the double-precision fp ops.
- * The caller only need provide tcg_rmode and tcg_fpstatus if the op
- * requires them.
- */
- TCGCond cond;
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 11, 3);
+ int rm = extract32(insn, 16, 5);
+ bool u = extract32(insn, 29, 1);
+ bool a = extract32(insn, 23, 1);
+ int fpopcode = opcode | (a << 3) | (u << 4);
+ TCGv_ptr fpst;
+ TCGv_i32 tcg_op1;
+ TCGv_i32 tcg_op2;
+ TCGv_i32 tcg_res;
- switch (opcode) {
- case 0x4: /* CLS, CLZ */
- if (u) {
- tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
- } else {
- tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
- }
+ switch (fpopcode) {
+ case 0x03: /* FMULX */
+ case 0x04: /* FCMEQ (reg) */
+ case 0x07: /* FRECPS */
+ case 0x0f: /* FRSQRTS */
+ case 0x14: /* FCMGE (reg) */
+ case 0x15: /* FACGE */
+ case 0x1a: /* FABD */
+ case 0x1c: /* FCMGT (reg) */
+ case 0x1d: /* FACGT */
break;
- case 0x5: /* NOT */
- /* This opcode is shared with CNT and RBIT but we have earlier
- * enforced that size == 3 if and only if this is the NOT insn.
- */
- tcg_gen_not_i64(tcg_rd, tcg_rn);
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ fpst = get_fpstatus_ptr(true);
+
+ tcg_op1 = read_fp_hreg(s, rn);
+ tcg_op2 = read_fp_hreg(s, rm);
+ tcg_res = tcg_temp_new_i32();
+
+ switch (fpopcode) {
+ case 0x03: /* FMULX */
+ gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
break;
- case 0x7: /* SQABS, SQNEG */
- if (u) {
- gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
- } else {
- gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
- }
+ case 0x04: /* FCMEQ (reg) */
+ gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
break;
- case 0xa: /* CMLT */
- /* 64 bit integer comparison against zero, result is
- * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
- * subtracting 1.
- */
- cond = TCG_COND_LT;
- do_cmop:
- tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
- tcg_gen_neg_i64(tcg_rd, tcg_rd);
+ case 0x07: /* FRECPS */
+ gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
break;
- case 0x8: /* CMGT, CMGE */
- cond = u ? TCG_COND_GE : TCG_COND_GT;
- goto do_cmop;
- case 0x9: /* CMEQ, CMLE */
- cond = u ? TCG_COND_LE : TCG_COND_EQ;
- goto do_cmop;
+ case 0x0f: /* FRSQRTS */
+ gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x14: /* FCMGE (reg) */
+ gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x15: /* FACGE */
+ gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1a: /* FABD */
+ gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
+ tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
+ break;
+ case 0x1c: /* FCMGT (reg) */
+ gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1d: /* FACGT */
+ gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ write_fp_sreg(s, rd, tcg_res);
+
+
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ tcg_temp_free_ptr(fpst);
+}
+
+/* AdvSIMD scalar three same extra
+ * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
+ * +-----+---+-----------+------+---+------+---+--------+---+----+----+
+ * | 0 1 | U | 1 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
+ * +-----+---+-----------+------+---+------+---+--------+---+----+----+
+ */
+static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
+ uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 11, 4);
+ int rm = extract32(insn, 16, 5);
+ int size = extract32(insn, 22, 2);
+ bool u = extract32(insn, 29, 1);
+ TCGv_i32 ele1, ele2, ele3;
+ TCGv_i64 res;
+ int feature;
+
+ switch (u * 16 + opcode) {
+ case 0x10: /* SQRDMLAH (vector) */
+ case 0x11: /* SQRDMLSH (vector) */
+ if (size != 1 && size != 2) {
+ unallocated_encoding(s);
+ return;
+ }
+ feature = ARM_FEATURE_V8_RDM;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ if (!arm_dc_feature(s, feature)) {
+ unallocated_encoding(s);
+ return;
+ }
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ /* Do a single operation on the lowest element in the vector.
+ * We use the standard Neon helpers and rely on 0 OP 0 == 0
+ * with no side effects for all these operations.
+ * OPTME: special-purpose helpers would avoid doing some
+ * unnecessary work in the helper for the 16 bit cases.
+ */
+ ele1 = tcg_temp_new_i32();
+ ele2 = tcg_temp_new_i32();
+ ele3 = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, ele1, rn, 0, size);
+ read_vec_element_i32(s, ele2, rm, 0, size);
+ read_vec_element_i32(s, ele3, rd, 0, size);
+
+ switch (opcode) {
+ case 0x0: /* SQRDMLAH */
+ if (size == 1) {
+ gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
+ } else {
+ gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
+ }
+ break;
+ case 0x1: /* SQRDMLSH */
+ if (size == 1) {
+ gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
+ } else {
+ gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_i32(ele1);
+ tcg_temp_free_i32(ele2);
+
+ res = tcg_temp_new_i64();
+ tcg_gen_extu_i32_i64(res, ele3);
+ tcg_temp_free_i32(ele3);
+
+ write_fp_dreg(s, rd, res);
+ tcg_temp_free_i64(res);
+}
+
+static void handle_2misc_64(DisasContext *s, int opcode, bool u,
+ TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
+ TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
+{
+ /* Handle 64->64 opcodes which are shared between the scalar and
+ * vector 2-reg-misc groups. We cover every integer opcode where size == 3
+ * is valid in either group and also the double-precision fp ops.
+ * The caller only need provide tcg_rmode and tcg_fpstatus if the op
+ * requires them.
+ */
+ TCGCond cond;
+
+ switch (opcode) {
+ case 0x4: /* CLS, CLZ */
+ if (u) {
+ tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
+ } else {
+ tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
+ }
+ break;
+ case 0x5: /* NOT */
+ /* This opcode is shared with CNT and RBIT but we have earlier
+ * enforced that size == 3 if and only if this is the NOT insn.
+ */
+ tcg_gen_not_i64(tcg_rd, tcg_rn);
+ break;
+ case 0x7: /* SQABS, SQNEG */
+ if (u) {
+ gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
+ } else {
+ gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
+ }
+ break;
+ case 0xa: /* CMLT */
+ /* 64 bit integer comparison against zero, result is
+ * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
+ * subtracting 1.
+ */
+ cond = TCG_COND_LT;
+ do_cmop:
+ tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
+ tcg_gen_neg_i64(tcg_rd, tcg_rd);
+ break;
+ case 0x8: /* CMGT, CMGE */
+ cond = u ? TCG_COND_GE : TCG_COND_GT;
+ goto do_cmop;
+ case 0x9: /* CMEQ, CMLE */
+ cond = u ? TCG_COND_LE : TCG_COND_EQ;
+ goto do_cmop;
case 0xb: /* ABS, NEG */
if (u) {
tcg_gen_neg_i64(tcg_rd, tcg_rn);
bool is_scalar, bool is_u, bool is_q,
int size, int rn, int rd)
{
- bool is_double = (size == 3);
+ bool is_double = (size == MO_64);
TCGv_ptr fpst;
if (!fp_access_check(s)) {
return;
}
- fpst = get_fpstatus_ptr(false);
+ fpst = get_fpstatus_ptr(size == MO_16);
if (is_double) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
bool swap = false;
int pass, maxpasses;
- switch (opcode) {
- case 0x2e: /* FCMLT (zero) */
- swap = true;
- /* fall through */
- case 0x2c: /* FCMGT (zero) */
- genfn = gen_helper_neon_cgt_f32;
- break;
- case 0x2d: /* FCMEQ (zero) */
- genfn = gen_helper_neon_ceq_f32;
- break;
- case 0x6d: /* FCMLE (zero) */
- swap = true;
- /* fall through */
- case 0x6c: /* FCMGE (zero) */
- genfn = gen_helper_neon_cge_f32;
- break;
- default:
- g_assert_not_reached();
+ if (size == MO_16) {
+ switch (opcode) {
+ case 0x2e: /* FCMLT (zero) */
+ swap = true;
+ /* fall through */
+ case 0x2c: /* FCMGT (zero) */
+ genfn = gen_helper_advsimd_cgt_f16;
+ break;
+ case 0x2d: /* FCMEQ (zero) */
+ genfn = gen_helper_advsimd_ceq_f16;
+ break;
+ case 0x6d: /* FCMLE (zero) */
+ swap = true;
+ /* fall through */
+ case 0x6c: /* FCMGE (zero) */
+ genfn = gen_helper_advsimd_cge_f16;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else {
+ switch (opcode) {
+ case 0x2e: /* FCMLT (zero) */
+ swap = true;
+ /* fall through */
+ case 0x2c: /* FCMGT (zero) */
+ genfn = gen_helper_neon_cgt_f32;
+ break;
+ case 0x2d: /* FCMEQ (zero) */
+ genfn = gen_helper_neon_ceq_f32;
+ break;
+ case 0x6d: /* FCMLE (zero) */
+ swap = true;
+ /* fall through */
+ case 0x6c: /* FCMGE (zero) */
+ genfn = gen_helper_neon_cge_f32;
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
if (is_scalar) {
maxpasses = 1;
} else {
- maxpasses = is_q ? 4 : 2;
+ int vector_size = 8 << is_q;
+ maxpasses = vector_size >> size;
}
for (pass = 0; pass < maxpasses; pass++) {
- read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+ read_vec_element_i32(s, tcg_op, rn, pass, size);
if (swap) {
genfn(tcg_res, tcg_zero, tcg_op, fpst);
} else {
if (is_scalar) {
write_fp_sreg(s, rd, tcg_res);
} else {
- write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ write_vec_element_i32(s, tcg_res, rd, pass, size);
}
}
tcg_temp_free_i32(tcg_res);
} else {
TCGv_i32 tcg_lo = tcg_temp_new_i32();
TCGv_i32 tcg_hi = tcg_temp_new_i32();
+ TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_i32 ahp = get_ahp_flag();
+
tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
- gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
- gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
+ gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
+ gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
tcg_temp_free_i32(tcg_lo);
tcg_temp_free_i32(tcg_hi);
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(ahp);
}
break;
case 0x56: /* FCVTXN, FCVTXN2 */
unallocated_encoding(s);
return;
}
-
- if (size > 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
+ tcg_debug_assert(size <= 3);
if (!fp_access_check(s)) {
return;
}
}
-/* Helper functions for 32 bit comparisons */
-static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
-}
-
-static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
-}
-
-static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
-}
-
-static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
-{
- tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
-}
-
/* Pairwise op subgroup of C3.6.16.
*
* This is called directly or via the handle_3same_float for float pairwise
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
{ gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
- { gen_max_s32, gen_max_u32 },
+ { tcg_gen_smax_i32, tcg_gen_umax_i32 },
};
genfn = fns[size][u];
break;
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
{ gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
- { gen_min_s32, gen_min_u32 },
+ { tcg_gen_smin_i32, tcg_gen_umin_i32 },
};
genfn = fns[size][u];
break;
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
{ gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
- { gen_max_s32, gen_max_u32 },
+ { tcg_gen_smax_i32, tcg_gen_umax_i32 },
};
genfn = fns[size][u];
break;
static NeonGenTwoOpFn * const fns[3][2] = {
{ gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
{ gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
- { gen_min_s32, gen_min_u32 },
+ { tcg_gen_smin_i32, tcg_gen_umin_i32 },
};
genfn = fns[size][u];
break;
}
}
+/*
+ * Advanced SIMD three same (ARMv8.2 FP16 variants)
+ *
+ * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0
+ * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
+ * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd |
+ * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
+ *
+ * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
+ * (register), FACGE, FABD, FCMGT (register) and FACGT.
+ *
+ */
+static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
+{
+ int opcode, fpopcode;
+ int is_q, u, a, rm, rn, rd;
+ int datasize, elements;
+ int pass;
+ TCGv_ptr fpst;
+ bool pairwise = false;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ /* For these floating point ops, the U, a and opcode bits
+ * together indicate the operation.
+ */
+ opcode = extract32(insn, 11, 3);
+ u = extract32(insn, 29, 1);
+ a = extract32(insn, 23, 1);
+ is_q = extract32(insn, 30, 1);
+ rm = extract32(insn, 16, 5);
+ rn = extract32(insn, 5, 5);
+ rd = extract32(insn, 0, 5);
+
+ fpopcode = opcode | (a << 3) | (u << 4);
+ datasize = is_q ? 128 : 64;
+ elements = datasize / 16;
+
+ switch (fpopcode) {
+ case 0x10: /* FMAXNMP */
+ case 0x12: /* FADDP */
+ case 0x16: /* FMAXP */
+ case 0x18: /* FMINNMP */
+ case 0x1e: /* FMINP */
+ pairwise = true;
+ break;
+ }
+
+ fpst = get_fpstatus_ptr(true);
+
+ if (pairwise) {
+ int maxpass = is_q ? 8 : 4;
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_res[8];
+
+ for (pass = 0; pass < maxpass; pass++) {
+ int passreg = pass < (maxpass / 2) ? rn : rm;
+ int passelt = (pass << 1) & (maxpass - 1);
+
+ read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
+ read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
+ tcg_res[pass] = tcg_temp_new_i32();
+
+ switch (fpopcode) {
+ case 0x10: /* FMAXNMP */
+ gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
+ fpst);
+ break;
+ case 0x12: /* FADDP */
+ gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x16: /* FMAXP */
+ gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x18: /* FMINNMP */
+ gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
+ fpst);
+ break;
+ case 0x1e: /* FMINP */
+ gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ for (pass = 0; pass < maxpass; pass++) {
+ write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
+ tcg_temp_free_i32(tcg_res[pass]);
+ }
+
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+
+ } else {
+ for (pass = 0; pass < elements; pass++) {
+ TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+ TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
+ read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
+
+ switch (fpopcode) {
+ case 0x0: /* FMAXNM */
+ gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1: /* FMLA */
+ read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
+ gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
+ fpst);
+ break;
+ case 0x2: /* FADD */
+ gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x3: /* FMULX */
+ gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x4: /* FCMEQ */
+ gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x6: /* FMAX */
+ gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x7: /* FRECPS */
+ gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x8: /* FMINNM */
+ gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x9: /* FMLS */
+ /* As usual for ARM, separate negation for fused multiply-add */
+ tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
+ read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
+ gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
+ fpst);
+ break;
+ case 0xa: /* FSUB */
+ gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xe: /* FMIN */
+ gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0xf: /* FRSQRTS */
+ gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x13: /* FMUL */
+ gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x14: /* FCMGE */
+ gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x15: /* FACGE */
+ gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x17: /* FDIV */
+ gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1a: /* FABD */
+ gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
+ tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
+ break;
+ case 0x1c: /* FCMGT */
+ gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ case 0x1d: /* FACGT */
+ gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
+ break;
+ default:
+ fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
+ __func__, insn, fpopcode, s->pc);
+ g_assert_not_reached();
+ }
+
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op1);
+ tcg_temp_free_i32(tcg_op2);
+ }
+ }
+
+ tcg_temp_free_ptr(fpst);
+
+ clear_vec_high(s, is_q, rd);
+}
+
+/* AdvSIMD three same extra
+ * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
+ * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
+ * | 0 | Q | U | 0 1 1 1 0 | size | 0 | Rm | 1 | opcode | 1 | Rn | Rd |
+ * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
+ */
+static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
+{
+ int rd = extract32(insn, 0, 5);
+ int rn = extract32(insn, 5, 5);
+ int opcode = extract32(insn, 11, 4);
+ int rm = extract32(insn, 16, 5);
+ int size = extract32(insn, 22, 2);
+ bool u = extract32(insn, 29, 1);
+ bool is_q = extract32(insn, 30, 1);
+ int feature, rot;
+
+ switch (u * 16 + opcode) {
+ case 0x10: /* SQRDMLAH (vector) */
+ case 0x11: /* SQRDMLSH (vector) */
+ if (size != 1 && size != 2) {
+ unallocated_encoding(s);
+ return;
+ }
+ feature = ARM_FEATURE_V8_RDM;
+ break;
+ case 0x8: /* FCMLA, #0 */
+ case 0x9: /* FCMLA, #90 */
+ case 0xa: /* FCMLA, #180 */
+ case 0xb: /* FCMLA, #270 */
+ case 0xc: /* FCADD, #90 */
+ case 0xe: /* FCADD, #270 */
+ if (size == 0
+ || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))
+ || (size == 3 && !is_q)) {
+ unallocated_encoding(s);
+ return;
+ }
+ feature = ARM_FEATURE_V8_FCMA;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ if (!arm_dc_feature(s, feature)) {
+ unallocated_encoding(s);
+ return;
+ }
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ switch (opcode) {
+ case 0x0: /* SQRDMLAH (vector) */
+ switch (size) {
+ case 1:
+ gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
+ break;
+ case 2:
+ gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return;
+
+ case 0x1: /* SQRDMLSH (vector) */
+ switch (size) {
+ case 1:
+ gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
+ break;
+ case 2:
+ gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return;
+
+ case 0x8: /* FCMLA, #0 */
+ case 0x9: /* FCMLA, #90 */
+ case 0xa: /* FCMLA, #180 */
+ case 0xb: /* FCMLA, #270 */
+ rot = extract32(opcode, 0, 2);
+ switch (size) {
+ case 1:
+ gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
+ gen_helper_gvec_fcmlah);
+ break;
+ case 2:
+ gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
+ gen_helper_gvec_fcmlas);
+ break;
+ case 3:
+ gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
+ gen_helper_gvec_fcmlad);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return;
+
+ case 0xc: /* FCADD, #90 */
+ case 0xe: /* FCADD, #270 */
+ rot = extract32(opcode, 1, 1);
+ switch (size) {
+ case 1:
+ gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
+ gen_helper_gvec_fcaddh);
+ break;
+ case 2:
+ gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
+ gen_helper_gvec_fcadds);
+ break;
+ case 3:
+ gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
+ gen_helper_gvec_fcaddd);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return;
+
+ default:
+ g_assert_not_reached();
+ }
+}
+
static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
int size, int rn, int rd)
{
/* 16 -> 32 bit fp conversion */
int srcelt = is_q ? 4 : 0;
TCGv_i32 tcg_res[4];
+ TCGv_ptr fpst = get_fpstatus_ptr(false);
+ TCGv_i32 ahp = get_ahp_flag();
for (pass = 0; pass < 4; pass++) {
tcg_res[pass] = tcg_temp_new_i32();
read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
- cpu_env);
+ fpst, ahp);
}
for (pass = 0; pass < 4; pass++) {
write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
tcg_temp_free_i32(tcg_res[pass]);
}
+
+ tcg_temp_free_ptr(fpst);
+ tcg_temp_free_i32(ahp);
}
}
/* All 64-bit element operations can be shared with scalar 2misc */
int pass;
- for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
+ /* Coverity claims (size == 3 && !is_q) has been eliminated
+ * from all paths leading to here.
+ */
+ tcg_debug_assert(is_q);
+ for (pass = 0; pass < 2; pass++) {
TCGv_i64 tcg_op = tcg_temp_new_i64();
TCGv_i64 tcg_res = tcg_temp_new_i64();
bool reverse;
TCGv_i32 tcg_zero = tcg_const_i32(0);
- /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
- comp = (opcode - 0x8) * 2 + u;
- /* ...but LE, LT are implemented as reverse GE, GT */
- reverse = (comp > 2);
- if (reverse) {
- comp = 4 - comp;
- }
- genfn = fns[comp][size];
- if (reverse) {
- genfn(tcg_res, tcg_zero, tcg_op);
- } else {
- genfn(tcg_res, tcg_op, tcg_zero);
- }
- tcg_temp_free_i32(tcg_zero);
- break;
- }
- case 0xb: /* ABS, NEG */
- if (u) {
- TCGv_i32 tcg_zero = tcg_const_i32(0);
- if (size) {
- gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
- } else {
- gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
- }
- tcg_temp_free_i32(tcg_zero);
- } else {
- if (size) {
- gen_helper_neon_abs_s16(tcg_res, tcg_op);
- } else {
- gen_helper_neon_abs_s8(tcg_res, tcg_op);
- }
- }
- break;
- case 0x4: /* CLS, CLZ */
- if (u) {
- if (size == 0) {
- gen_helper_neon_clz_u8(tcg_res, tcg_op);
- } else {
- gen_helper_neon_clz_u16(tcg_res, tcg_op);
- }
- } else {
- if (size == 0) {
- gen_helper_neon_cls_s8(tcg_res, tcg_op);
- } else {
- gen_helper_neon_cls_s16(tcg_res, tcg_op);
- }
- }
- break;
- default:
- g_assert_not_reached();
- }
+ /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
+ comp = (opcode - 0x8) * 2 + u;
+ /* ...but LE, LT are implemented as reverse GE, GT */
+ reverse = (comp > 2);
+ if (reverse) {
+ comp = 4 - comp;
+ }
+ genfn = fns[comp][size];
+ if (reverse) {
+ genfn(tcg_res, tcg_zero, tcg_op);
+ } else {
+ genfn(tcg_res, tcg_op, tcg_zero);
+ }
+ tcg_temp_free_i32(tcg_zero);
+ break;
+ }
+ case 0xb: /* ABS, NEG */
+ if (u) {
+ TCGv_i32 tcg_zero = tcg_const_i32(0);
+ if (size) {
+ gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
+ } else {
+ gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
+ }
+ tcg_temp_free_i32(tcg_zero);
+ } else {
+ if (size) {
+ gen_helper_neon_abs_s16(tcg_res, tcg_op);
+ } else {
+ gen_helper_neon_abs_s8(tcg_res, tcg_op);
+ }
+ }
+ break;
+ case 0x4: /* CLS, CLZ */
+ if (u) {
+ if (size == 0) {
+ gen_helper_neon_clz_u8(tcg_res, tcg_op);
+ } else {
+ gen_helper_neon_clz_u16(tcg_res, tcg_op);
+ }
+ } else {
+ if (size == 0) {
+ gen_helper_neon_cls_s8(tcg_res, tcg_op);
+ } else {
+ gen_helper_neon_cls_s16(tcg_res, tcg_op);
+ }
+ }
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op);
+ }
+ }
+ clear_vec_high(s, is_q, rd);
+
+ if (need_rmode) {
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
+ tcg_temp_free_i32(tcg_rmode);
+ }
+ if (need_fpstatus) {
+ tcg_temp_free_ptr(tcg_fpstatus);
+ }
+}
+
+/* AdvSIMD [scalar] two register miscellaneous (FP16)
+ *
+ * 31 30 29 28 27 24 23 22 21 17 16 12 11 10 9 5 4 0
+ * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
+ * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 | Rn | Rd |
+ * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
+ * mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
+ * val: 0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
+ *
+ * This actually covers two groups where scalar access is governed by
+ * bit 28. A bunch of the instructions (float to integral) only exist
+ * in the vector form and are un-allocated for the scalar decode. Also
+ * in the scalar decode Q is always 1.
+ */
+static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
+{
+ int fpop, opcode, a, u;
+ int rn, rd;
+ bool is_q;
+ bool is_scalar;
+ bool only_in_vector = false;
+
+ int pass;
+ TCGv_i32 tcg_rmode = NULL;
+ TCGv_ptr tcg_fpstatus = NULL;
+ bool need_rmode = false;
+ bool need_fpst = true;
+ int rmode;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ rd = extract32(insn, 0, 5);
+ rn = extract32(insn, 5, 5);
+
+ a = extract32(insn, 23, 1);
+ u = extract32(insn, 29, 1);
+ is_scalar = extract32(insn, 28, 1);
+ is_q = extract32(insn, 30, 1);
+
+ opcode = extract32(insn, 12, 5);
+ fpop = deposit32(opcode, 5, 1, a);
+ fpop = deposit32(fpop, 6, 1, u);
+
+ rd = extract32(insn, 0, 5);
+ rn = extract32(insn, 5, 5);
+
+ switch (fpop) {
+ case 0x1d: /* SCVTF */
+ case 0x5d: /* UCVTF */
+ {
+ int elements;
+
+ if (is_scalar) {
+ elements = 1;
+ } else {
+ elements = (is_q ? 8 : 4);
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+ handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
+ return;
+ }
+ break;
+ case 0x2c: /* FCMGT (zero) */
+ case 0x2d: /* FCMEQ (zero) */
+ case 0x2e: /* FCMLT (zero) */
+ case 0x6c: /* FCMGE (zero) */
+ case 0x6d: /* FCMLE (zero) */
+ handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
+ return;
+ case 0x3d: /* FRECPE */
+ case 0x3f: /* FRECPX */
+ break;
+ case 0x18: /* FRINTN */
+ need_rmode = true;
+ only_in_vector = true;
+ rmode = FPROUNDING_TIEEVEN;
+ break;
+ case 0x19: /* FRINTM */
+ need_rmode = true;
+ only_in_vector = true;
+ rmode = FPROUNDING_NEGINF;
+ break;
+ case 0x38: /* FRINTP */
+ need_rmode = true;
+ only_in_vector = true;
+ rmode = FPROUNDING_POSINF;
+ break;
+ case 0x39: /* FRINTZ */
+ need_rmode = true;
+ only_in_vector = true;
+ rmode = FPROUNDING_ZERO;
+ break;
+ case 0x58: /* FRINTA */
+ need_rmode = true;
+ only_in_vector = true;
+ rmode = FPROUNDING_TIEAWAY;
+ break;
+ case 0x59: /* FRINTX */
+ case 0x79: /* FRINTI */
+ only_in_vector = true;
+ /* current rounding mode */
+ break;
+ case 0x1a: /* FCVTNS */
+ need_rmode = true;
+ rmode = FPROUNDING_TIEEVEN;
+ break;
+ case 0x1b: /* FCVTMS */
+ need_rmode = true;
+ rmode = FPROUNDING_NEGINF;
+ break;
+ case 0x1c: /* FCVTAS */
+ need_rmode = true;
+ rmode = FPROUNDING_TIEAWAY;
+ break;
+ case 0x3a: /* FCVTPS */
+ need_rmode = true;
+ rmode = FPROUNDING_POSINF;
+ break;
+ case 0x3b: /* FCVTZS */
+ need_rmode = true;
+ rmode = FPROUNDING_ZERO;
+ break;
+ case 0x5a: /* FCVTNU */
+ need_rmode = true;
+ rmode = FPROUNDING_TIEEVEN;
+ break;
+ case 0x5b: /* FCVTMU */
+ need_rmode = true;
+ rmode = FPROUNDING_NEGINF;
+ break;
+ case 0x5c: /* FCVTAU */
+ need_rmode = true;
+ rmode = FPROUNDING_TIEAWAY;
+ break;
+ case 0x7a: /* FCVTPU */
+ need_rmode = true;
+ rmode = FPROUNDING_POSINF;
+ break;
+ case 0x7b: /* FCVTZU */
+ need_rmode = true;
+ rmode = FPROUNDING_ZERO;
+ break;
+ case 0x2f: /* FABS */
+ case 0x6f: /* FNEG */
+ need_fpst = false;
+ break;
+ case 0x7d: /* FRSQRTE */
+ case 0x7f: /* FSQRT (vector) */
+ break;
+ default:
+ fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
+ g_assert_not_reached();
+ }
+
+
+ /* Check additional constraints for the scalar encoding */
+ if (is_scalar) {
+ if (!is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ /* FRINTxx is only in the vector form */
+ if (only_in_vector) {
+ unallocated_encoding(s);
+ return;
+ }
+ }
+
+ if (!fp_access_check(s)) {
+ return;
+ }
+
+ if (need_rmode || need_fpst) {
+ tcg_fpstatus = get_fpstatus_ptr(true);
+ }
+
+ if (need_rmode) {
+ tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
+ gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
+ }
+
+ if (is_scalar) {
+ TCGv_i32 tcg_op = read_fp_hreg(s, rn);
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ switch (fpop) {
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x3d: /* FRECPE */
+ gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x3f: /* FRECPX */
+ gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x6f: /* FNEG */
+ tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
+ break;
+ case 0x7d: /* FRSQRTE */
+ gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ /* limit any sign extension going on */
+ tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
+ write_fp_sreg(s, rd, tcg_res);
+
+ tcg_temp_free_i32(tcg_res);
+ tcg_temp_free_i32(tcg_op);
+ } else {
+ for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
+ TCGv_i32 tcg_op = tcg_temp_new_i32();
+ TCGv_i32 tcg_res = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
+
+ switch (fpop) {
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x3d: /* FRECPE */
+ gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x18: /* FRINTN */
+ case 0x19: /* FRINTM */
+ case 0x38: /* FRINTP */
+ case 0x39: /* FRINTZ */
+ case 0x58: /* FRINTA */
+ case 0x79: /* FRINTI */
+ gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x59: /* FRINTX */
+ gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x2f: /* FABS */
+ tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
+ break;
+ case 0x6f: /* FNEG */
+ tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
+ break;
+ case 0x7d: /* FRSQRTE */
+ gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ case 0x7f: /* FSQRT */
+ gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
+ break;
+ default:
+ g_assert_not_reached();
}
- write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+ write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
tcg_temp_free_i32(tcg_res);
tcg_temp_free_i32(tcg_op);
}
+
+ clear_vec_high(s, is_q, rd);
}
- clear_vec_high(s, is_q, rd);
- if (need_rmode) {
+ if (tcg_rmode) {
gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
tcg_temp_free_i32(tcg_rmode);
}
- if (need_fpstatus) {
+
+ if (tcg_fpstatus) {
tcg_temp_free_ptr(tcg_fpstatus);
}
}
int rn = extract32(insn, 5, 5);
int rd = extract32(insn, 0, 5);
bool is_long = false;
- bool is_fp = false;
+ int is_fp = 0;
+ bool is_fp16 = false;
int index;
TCGv_ptr fpst;
- switch (opcode) {
- case 0x0: /* MLA */
- case 0x4: /* MLS */
- if (!u || is_scalar) {
+ switch (16 * u + opcode) {
+ case 0x08: /* MUL */
+ case 0x10: /* MLA */
+ case 0x14: /* MLS */
+ if (is_scalar) {
unallocated_encoding(s);
return;
}
break;
- case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
- case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
- case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
+ case 0x02: /* SMLAL, SMLAL2 */
+ case 0x12: /* UMLAL, UMLAL2 */
+ case 0x06: /* SMLSL, SMLSL2 */
+ case 0x16: /* UMLSL, UMLSL2 */
+ case 0x0a: /* SMULL, SMULL2 */
+ case 0x1a: /* UMULL, UMULL2 */
if (is_scalar) {
unallocated_encoding(s);
return;
}
is_long = true;
break;
- case 0x3: /* SQDMLAL, SQDMLAL2 */
- case 0x7: /* SQDMLSL, SQDMLSL2 */
- case 0xb: /* SQDMULL, SQDMULL2 */
+ case 0x03: /* SQDMLAL, SQDMLAL2 */
+ case 0x07: /* SQDMLSL, SQDMLSL2 */
+ case 0x0b: /* SQDMULL, SQDMULL2 */
is_long = true;
- /* fall through */
- case 0xc: /* SQDMULH */
- case 0xd: /* SQRDMULH */
- if (u) {
- unallocated_encoding(s);
- return;
- }
break;
- case 0x8: /* MUL */
- if (u || is_scalar) {
- unallocated_encoding(s);
- return;
- }
+ case 0x0c: /* SQDMULH */
+ case 0x0d: /* SQRDMULH */
break;
- case 0x1: /* FMLA */
- case 0x5: /* FMLS */
- if (u) {
+ case 0x01: /* FMLA */
+ case 0x05: /* FMLS */
+ case 0x09: /* FMUL */
+ case 0x19: /* FMULX */
+ is_fp = 1;
+ break;
+ case 0x1d: /* SQRDMLAH */
+ case 0x1f: /* SQRDMLSH */
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
unallocated_encoding(s);
return;
}
- /* fall through */
- case 0x9: /* FMUL, FMULX */
- if (!extract32(size, 1, 1)) {
+ break;
+ case 0x11: /* FCMLA #0 */
+ case 0x13: /* FCMLA #90 */
+ case 0x15: /* FCMLA #180 */
+ case 0x17: /* FCMLA #270 */
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {
unallocated_encoding(s);
return;
}
- is_fp = true;
+ is_fp = 2;
break;
default:
unallocated_encoding(s);
return;
}
- if (is_fp) {
- /* low bit of size indicates single/double */
- size = extract32(size, 0, 1) ? 3 : 2;
- if (size == 2) {
- index = h << 1 | l;
- } else {
- if (l || !is_q) {
+ switch (is_fp) {
+ case 1: /* normal fp */
+ /* convert insn encoded size to TCGMemOp size */
+ switch (size) {
+ case 0: /* half-precision */
+ size = MO_16;
+ is_fp16 = true;
+ break;
+ case MO_32: /* single precision */
+ case MO_64: /* double precision */
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+
+ case 2: /* complex fp */
+ /* Each indexable element is a complex pair. */
+ size <<= 1;
+ switch (size) {
+ case MO_32:
+ if (h && !is_q) {
unallocated_encoding(s);
return;
}
- index = h;
- }
- rm |= (m << 4);
- } else {
- switch (size) {
- case 1:
- index = h << 2 | l << 1 | m;
+ is_fp16 = true;
break;
- case 2:
- index = h << 1 | l;
- rm |= (m << 4);
+ case MO_64:
break;
default:
unallocated_encoding(s);
return;
}
+ break;
+
+ default: /* integer */
+ switch (size) {
+ case MO_8:
+ case MO_64:
+ unallocated_encoding(s);
+ return;
+ }
+ break;
+ }
+ if (is_fp16 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* Given TCGMemOp size, adjust register and indexing. */
+ switch (size) {
+ case MO_16:
+ index = h << 2 | l << 1 | m;
+ break;
+ case MO_32:
+ index = h << 1 | l;
+ rm |= m << 4;
+ break;
+ case MO_64:
+ if (l || !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ index = h;
+ rm |= m << 4;
+ break;
+ default:
+ g_assert_not_reached();
}
if (!fp_access_check(s)) {
}
if (is_fp) {
- fpst = get_fpstatus_ptr(false);
+ fpst = get_fpstatus_ptr(is_fp16);
} else {
fpst = NULL;
}
+ switch (16 * u + opcode) {
+ case 0x11: /* FCMLA #0 */
+ case 0x13: /* FCMLA #90 */
+ case 0x15: /* FCMLA #180 */
+ case 0x17: /* FCMLA #270 */
+ tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_reg_offset(s, rm, index, size), fpst,
+ is_q ? 16 : 8, vec_full_reg_size(s),
+ extract32(insn, 13, 2), /* rot */
+ size == MO_64
+ ? gen_helper_gvec_fcmlas_idx
+ : gen_helper_gvec_fcmlah_idx);
+ tcg_temp_free_ptr(fpst);
+ return;
+ }
+
if (size == 3) {
TCGv_i64 tcg_idx = tcg_temp_new_i64();
int pass;
read_vec_element(s, tcg_op, rn, pass, MO_64);
- switch (opcode) {
- case 0x5: /* FMLS */
+ switch (16 * u + opcode) {
+ case 0x05: /* FMLS */
/* As usual for ARM, separate negation for fused multiply-add */
gen_helper_vfp_negd(tcg_op, tcg_op);
/* fall through */
- case 0x1: /* FMLA */
+ case 0x01: /* FMLA */
read_vec_element(s, tcg_res, rd, pass, MO_64);
gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
break;
- case 0x9: /* FMUL, FMULX */
- if (u) {
- gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
- } else {
- gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
- }
+ case 0x09: /* FMUL */
+ gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
+ break;
+ case 0x19: /* FMULX */
+ gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
break;
default:
g_assert_not_reached();
read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
- switch (opcode) {
- case 0x0: /* MLA */
- case 0x4: /* MLS */
- case 0x8: /* MUL */
+ switch (16 * u + opcode) {
+ case 0x08: /* MUL */
+ case 0x10: /* MLA */
+ case 0x14: /* MLS */
{
static NeonGenTwoOpFn * const fns[2][2] = {
{ gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
genfn(tcg_res, tcg_op, tcg_res);
break;
}
- case 0x5: /* FMLS */
- /* As usual for ARM, separate negation for fused multiply-add */
- gen_helper_vfp_negs(tcg_op, tcg_op);
- /* fall through */
- case 0x1: /* FMLA */
- read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
- gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+ case 0x05: /* FMLS */
+ case 0x01: /* FMLA */
+ read_vec_element_i32(s, tcg_res, rd, pass,
+ is_scalar ? size : MO_32);
+ switch (size) {
+ case 1:
+ if (opcode == 0x5) {
+ /* As usual for ARM, separate negation for fused
+ * multiply-add */
+ tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
+ }
+ if (is_scalar) {
+ gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
+ tcg_res, fpst);
+ } else {
+ gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
+ tcg_res, fpst);
+ }
+ break;
+ case 2:
+ if (opcode == 0x5) {
+ /* As usual for ARM, separate negation for
+ * fused multiply-add */
+ tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
+ }
+ gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
+ tcg_res, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
break;
- case 0x9: /* FMUL, FMULX */
- if (u) {
- gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
- } else {
+ case 0x09: /* FMUL */
+ switch (size) {
+ case 1:
+ if (is_scalar) {
+ gen_helper_advsimd_mulh(tcg_res, tcg_op,
+ tcg_idx, fpst);
+ } else {
+ gen_helper_advsimd_mul2h(tcg_res, tcg_op,
+ tcg_idx, fpst);
+ }
+ break;
+ case 2:
gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ break;
+ case 0x19: /* FMULX */
+ switch (size) {
+ case 1:
+ if (is_scalar) {
+ gen_helper_advsimd_mulxh(tcg_res, tcg_op,
+ tcg_idx, fpst);
+ } else {
+ gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
+ tcg_idx, fpst);
+ }
+ break;
+ case 2:
+ gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
+ break;
+ default:
+ g_assert_not_reached();
}
break;
- case 0xc: /* SQDMULH */
+ case 0x0c: /* SQDMULH */
if (size == 1) {
gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
tcg_op, tcg_idx);
tcg_op, tcg_idx);
}
break;
- case 0xd: /* SQRDMULH */
+ case 0x0d: /* SQRDMULH */
if (size == 1) {
gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
tcg_op, tcg_idx);
tcg_op, tcg_idx);
}
break;
+ case 0x1d: /* SQRDMLAH */
+ read_vec_element_i32(s, tcg_res, rd, pass,
+ is_scalar ? size : MO_32);
+ if (size == 1) {
+ gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
+ tcg_op, tcg_idx, tcg_res);
+ } else {
+ gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
+ tcg_op, tcg_idx, tcg_res);
+ }
+ break;
+ case 0x1f: /* SQRDMLSH */
+ read_vec_element_i32(s, tcg_res, rd, pass,
+ is_scalar ? size : MO_32);
+ if (size == 1) {
+ gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
+ tcg_op, tcg_idx, tcg_res);
+ } else {
+ gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
+ tcg_op, tcg_idx, tcg_res);
+ }
+ break;
default:
g_assert_not_reached();
}
static const AArch64DecodeTable data_proc_simd[] = {
/* pattern , mask , fn */
{ 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
+ { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
{ 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
{ 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
{ 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
{ 0x2e000000, 0xbf208400, disas_simd_ext },
{ 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
+ { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
{ 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
{ 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
{ 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
{ 0xce000000, 0xff808000, disas_crypto_four_reg },
{ 0xce800000, 0xffe00000, disas_crypto_xar },
{ 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
+ { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
+ { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
+ { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
{ 0x00000000, 0x00000000, NULL }
};
s->fp_access_checked = false;
switch (extract32(insn, 25, 4)) {
- case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
+ case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
unallocated_encoding(s);
break;
+ case 0x2:
+ if (!arm_dc_feature(s, ARM_FEATURE_SVE) || !disas_sve(s, insn)) {
+ unallocated_encoding(s);
+ }
+ break;
case 0x8: case 0x9: /* Data processing - immediate */
disas_data_proc_imm(s, insn);
break;
free_tmp_a64(s);
}
-static int aarch64_tr_init_disas_context(DisasContextBase *dcbase,
- CPUState *cpu, int max_insns)
+static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
+ CPUState *cpu)
{
DisasContext *dc = container_of(dcbase, DisasContext, base);
CPUARMState *env = cpu->env_ptr;
if (dc->ss_active) {
bound = 1;
}
- max_insns = MIN(max_insns, bound);
+ dc->base.max_insns = MIN(dc->base.max_insns, bound);
init_tmp_a64_array(dc);
-
- return max_insns;
}
static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
case DISAS_UPDATE:
gen_a64_set_pc_im(dc->pc);
/* fall through */
+ case DISAS_EXIT:
+ tcg_gen_exit_tb(NULL, 0);
+ break;
case DISAS_JUMP:
tcg_gen_lookup_and_goto_ptr();
break;
- case DISAS_EXIT:
- tcg_gen_exit_tb(0);
- break;
case DISAS_NORETURN:
case DISAS_SWI:
break;
/* The helper doesn't necessarily throw an exception, but we
* must go back to the main loop to check for interrupts anyway.
*/
- tcg_gen_exit_tb(0);
+ tcg_gen_exit_tb(NULL, 0);
break;
}
}