#include "tcg-op-gvec.h"
#include "qemu/log.h"
#include "qemu/bitops.h"
+#include "qemu/qemu-print.h"
#include "arm_ldst.h"
#include "exec/semihost.h"
#define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
/* currently all emulated v5 cores are also v5TE, so don't bother */
#define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
-#define ENABLE_ARCH_5J arm_dc_feature(s, ARM_FEATURE_JAZELLE)
+#define ENABLE_ARCH_5J dc_isar_feature(jazelle, s)
#define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
#define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
#define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
#include "exec/gen-icount.h"
-static const char *regnames[] =
+static const char * const regnames[] =
{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
tcg_temp_free_i32(var);
}
+/*
+ * Variant of store_reg which applies v8M stack-limit checks before updating
+ * SP. If the check fails this will result in an exception being taken.
+ * We disable the stack checks for CONFIG_USER_ONLY because we have
+ * no idea what the stack limits should be in that case.
+ * If stack checking is not being done this just acts like store_reg().
+ */
+static void store_sp_checked(DisasContext *s, TCGv_i32 var)
+{
+#ifndef CONFIG_USER_ONLY
+ if (s->v8m_stackcheck) {
+ gen_helper_v8m_stackcheck(cpu_env, var);
+ }
+#endif
+ store_reg(s, 13, var);
+}
+
/* Value extensions. */
#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
tcg_temp_free_i32(tcg_excp);
}
-static void gen_ss_advance(DisasContext *s)
-{
- /* If the singlestep state is Active-not-pending, advance to
- * Active-pending.
- */
- if (s->ss_active) {
- s->pstate_ss = 0;
- gen_helper_clear_pstate_ss(cpu_env);
- }
-}
-
static void gen_step_complete_exception(DisasContext *s)
{
/* We just completed step of an insn. Move from Active-not-pending
return vfp_reg_offset(0, sreg);
}
+/* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
+ * where 0 is the least significant end of the register.
+ */
+static inline long
+neon_element_offset(int reg, int element, TCGMemOp size)
+{
+ int element_size = 1 << size;
+ int ofs = element * element_size;
+#ifdef HOST_WORDS_BIGENDIAN
+ /* Calculate the offset assuming fully little-endian,
+ * then XOR to account for the order of the 8-byte units.
+ */
+ if (element_size < 8) {
+ ofs ^= 8 - element_size;
+ }
+#endif
+ return neon_reg_offset(reg, 0) + ofs;
+}
+
static TCGv_i32 neon_load_reg(int reg, int pass)
{
TCGv_i32 tmp = tcg_temp_new_i32();
return tmp;
}
+static void neon_load_element(TCGv_i32 var, int reg, int ele, TCGMemOp mop)
+{
+ long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+ switch (mop) {
+ case MO_UB:
+ tcg_gen_ld8u_i32(var, cpu_env, offset);
+ break;
+ case MO_UW:
+ tcg_gen_ld16u_i32(var, cpu_env, offset);
+ break;
+ case MO_UL:
+ tcg_gen_ld_i32(var, cpu_env, offset);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void neon_load_element64(TCGv_i64 var, int reg, int ele, TCGMemOp mop)
+{
+ long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
+
+ switch (mop) {
+ case MO_UB:
+ tcg_gen_ld8u_i64(var, cpu_env, offset);
+ break;
+ case MO_UW:
+ tcg_gen_ld16u_i64(var, cpu_env, offset);
+ break;
+ case MO_UL:
+ tcg_gen_ld32u_i64(var, cpu_env, offset);
+ break;
+ case MO_Q:
+ tcg_gen_ld_i64(var, cpu_env, offset);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
static void neon_store_reg(int reg, int pass, TCGv_i32 var)
{
tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
tcg_temp_free_i32(var);
}
+static void neon_store_element(int reg, int ele, TCGMemOp size, TCGv_i32 var)
+{
+ long offset = neon_element_offset(reg, ele, size);
+
+ switch (size) {
+ case MO_8:
+ tcg_gen_st8_i32(var, cpu_env, offset);
+ break;
+ case MO_16:
+ tcg_gen_st16_i32(var, cpu_env, offset);
+ break;
+ case MO_32:
+ tcg_gen_st_i32(var, cpu_env, offset);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void neon_store_element64(int reg, int ele, TCGMemOp size, TCGv_i64 var)
+{
+ long offset = neon_element_offset(reg, ele, size);
+
+ switch (size) {
+ case MO_8:
+ tcg_gen_st8_i64(var, cpu_env, offset);
+ break;
+ case MO_16:
+ tcg_gen_st16_i64(var, cpu_env, offset);
+ break;
+ case MO_32:
+ tcg_gen_st32_i64(var, cpu_env, offset);
+ break;
+ case MO_64:
+ tcg_gen_st_i64(var, cpu_env, offset);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
static inline void neon_load_reg64(TCGv_i64 var, int reg)
{
tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
tcg_temp_free_i32(tmp);
}
-static void gen_neon_dup_u8(TCGv_i32 var, int shift)
-{
- TCGv_i32 tmp = tcg_temp_new_i32();
- if (shift)
- tcg_gen_shri_i32(var, var, shift);
- tcg_gen_ext8u_i32(var, var);
- tcg_gen_shli_i32(tmp, var, 8);
- tcg_gen_or_i32(var, var, tmp);
- tcg_gen_shli_i32(tmp, var, 16);
- tcg_gen_or_i32(var, var, tmp);
- tcg_temp_free_i32(tmp);
-}
-
static void gen_neon_dup_low16(TCGv_i32 var)
{
TCGv_i32 tmp = tcg_temp_new_i32();
tcg_temp_free_i32(tmp);
}
-static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size)
-{
- /* Load a single Neon element and replicate into a 32 bit TCG reg */
- TCGv_i32 tmp = tcg_temp_new_i32();
- switch (size) {
- case 0:
- gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
- gen_neon_dup_u8(tmp, 0);
- break;
- case 1:
- gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
- gen_neon_dup_low16(tmp);
- break;
- case 2:
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- break;
- default: /* Avoid compiler warnings. */
- abort();
- }
- return tmp;
-}
-
static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
uint32_t dp)
{
FPROUNDING_NEGINF,
};
-static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
+static int disas_vfp_misc_insn(DisasContext *s, uint32_t insn)
{
uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
- if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
- return 1;
- }
-
if (dp) {
VFP_DREG_D(rd, insn);
VFP_DREG_N(rn, insn);
rm = VFP_SREG_M(insn);
}
- if ((insn & 0x0f800e50) == 0x0e000a00) {
+ if ((insn & 0x0f800e50) == 0x0e000a00 && dc_isar_feature(aa32_vsel, s)) {
return handle_vsel(insn, rd, rn, rm, dp);
- } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
+ } else if ((insn & 0x0fb00e10) == 0x0e800a00 &&
+ dc_isar_feature(aa32_vminmaxnm, s)) {
return handle_vminmaxnm(insn, rd, rn, rm, dp);
- } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
+ } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40 &&
+ dc_isar_feature(aa32_vrint, s)) {
/* VRINTA, VRINTN, VRINTP, VRINTM */
int rounding = fp_decode_rm[extract32(insn, 16, 2)];
return handle_vrint(insn, rd, rm, dp, rounding);
- } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
+ } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40 &&
+ dc_isar_feature(aa32_vcvt_dr, s)) {
/* VCVTA, VCVTN, VCVTP, VCVTM */
int rounding = fp_decode_rm[extract32(insn, 16, 2)];
return handle_vcvt(insn, rd, rm, dp, rounding);
* for attempts to execute invalid vfp/neon encodings with FP disabled.
*/
if (s->fp_excp_el) {
- gen_exception_insn(s, 4, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ gen_exception_insn(s, 4, EXCP_NOCP, syn_uncategorized(),
+ s->fp_excp_el);
+ } else {
+ gen_exception_insn(s, 4, EXCP_UDEF,
+ syn_fp_access_trap(1, 0xe, false),
+ s->fp_excp_el);
+ }
return 0;
}
}
}
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ /* Handle M-profile lazy FP state mechanics */
+
+ /* Trigger lazy-state preservation if necessary */
+ if (s->v7m_lspact) {
+ /*
+ * Lazy state saving affects external memory and also the NVIC,
+ * so we must mark it as an IO operation for icount.
+ */
+ if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+ gen_io_start();
+ }
+ gen_helper_v7m_preserve_fp_state(cpu_env);
+ if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+ gen_io_end();
+ }
+ /*
+ * If the preserve_fp_state helper doesn't throw an exception
+ * then it will clear LSPACT; we don't need to repeat this for
+ * any further FP insns in this TB.
+ */
+ s->v7m_lspact = false;
+ }
+
+ /* Update ownership of FP context: set FPCCR.S to match current state */
+ if (s->v8m_fpccr_s_wrong) {
+ TCGv_i32 tmp;
+
+ tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
+ if (s->v8m_secure) {
+ tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
+ } else {
+ tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
+ }
+ store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
+ /* Don't need to do this for any further FP insns in this TB */
+ s->v8m_fpccr_s_wrong = false;
+ }
+
+ if (s->v7m_new_fp_ctxt_needed) {
+ /*
+ * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
+ * and the FPSCR.
+ */
+ TCGv_i32 control, fpscr;
+ uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
+
+ fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
+ gen_helper_vfp_set_fpscr(cpu_env, fpscr);
+ tcg_temp_free_i32(fpscr);
+ /*
+ * We don't need to arrange to end the TB, because the only
+ * parts of FPSCR which we cache in the TB flags are the VECLEN
+ * and VECSTRIDE, and those don't exist for M-profile.
+ */
+
+ if (s->v8m_secure) {
+ bits |= R_V7M_CONTROL_SFPA_MASK;
+ }
+ control = load_cpu_field(v7m.control[M_REG_S]);
+ tcg_gen_ori_i32(control, control, bits);
+ store_cpu_field(control, v7m.control[M_REG_S]);
+ /* Don't need to do this for any further FP insns in this TB */
+ s->v7m_new_fp_ctxt_needed = false;
+ }
+ }
+
if (extract32(insn, 28, 4) == 0xf) {
- /* Encodings with T=1 (Thumb) or unconditional (ARM):
- * only used in v8 and above.
+ /*
+ * Encodings with T=1 (Thumb) or unconditional (ARM):
+ * only used for the "miscellaneous VFP features" added in v8A
+ * and v7M (and gated on the MVFR2.FPMisc field).
*/
- return disas_vfp_v8_insn(s, insn);
+ return disas_vfp_misc_insn(s, insn);
}
dp = ((insn & 0xf00) == 0xb00);
tmp = load_reg(s, rd);
if (insn & (1 << 23)) {
/* VDUP */
- if (size == 0) {
- gen_neon_dup_u8(tmp, 0);
- } else if (size == 1) {
- gen_neon_dup_low16(tmp);
- }
- for (n = 0; n <= pass * 2; n++) {
- tmp2 = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp2, tmp);
- neon_store_reg(rn, n, tmp2);
- }
- neon_store_reg(rn, n, tmp);
+ int vec_size = pass ? 16 : 8;
+ tcg_gen_gvec_dup_i32(size, neon_reg_offset(rn, 0),
+ vec_size, vec_size, tmp);
+ tcg_temp_free_i32(tmp);
} else {
/* VMOV */
switch (size) {
}
}
} else { /* !dp */
+ bool is_sysreg;
+
if ((insn & 0x6f) != 0x00)
return 1;
rn = VFP_SREG_N(insn);
+
+ is_sysreg = extract32(insn, 21, 1);
+
+ if (arm_dc_feature(s, ARM_FEATURE_M)) {
+ /*
+ * The only M-profile VFP vmrs/vmsr sysreg is FPSCR.
+ * Writes to R15 are UNPREDICTABLE; we choose to undef.
+ */
+ if (is_sysreg && (rd == 15 || (rn >> 1) != ARM_VFP_FPSCR)) {
+ return 1;
+ }
+ }
+
if (insn & ARM_CP_RW_BIT) {
/* vfp->arm */
- if (insn & (1 << 21)) {
+ if (is_sysreg) {
/* system register */
rn >>= 1;
}
} else {
/* arm->vfp */
- if (insn & (1 << 21)) {
+ if (is_sysreg) {
rn >>= 1;
/* system register */
switch (rn) {
}
} else {
/* data processing */
+ bool rd_is_dp = dp;
+ bool rm_is_dp = dp;
+ bool no_output = false;
+
/* The opcode is in bits 23, 21, 20 and 6. */
op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
- if (dp) {
- if (op == 15) {
- /* rn is opcode */
- rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
- } else {
- /* rn is register number */
- VFP_DREG_N(rn, insn);
- }
+ rn = VFP_SREG_N(insn);
- if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
- ((rn & 0x1e) == 0x6))) {
- /* Integer or single/half precision destination. */
- rd = VFP_SREG_D(insn);
- } else {
- VFP_DREG_D(rd, insn);
- }
- if (op == 15 &&
- (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
- ((rn & 0x1e) == 0x4))) {
- /* VCVT from int or half precision is always from S reg
- * regardless of dp bit. VCVT with immediate frac_bits
- * has same format as SREG_M.
+ if (op == 15) {
+ /* rn is opcode, encoded as per VFP_SREG_N. */
+ switch (rn) {
+ case 0x00: /* vmov */
+ case 0x01: /* vabs */
+ case 0x02: /* vneg */
+ case 0x03: /* vsqrt */
+ break;
+
+ case 0x04: /* vcvtb.f64.f16, vcvtb.f32.f16 */
+ case 0x05: /* vcvtt.f64.f16, vcvtt.f32.f16 */
+ /*
+ * VCVTB, VCVTT: only present with the halfprec extension
+ * UNPREDICTABLE if bit 8 is set prior to ARMv8
+ * (we choose to UNDEF)
*/
- rm = VFP_SREG_M(insn);
- } else {
- VFP_DREG_M(rm, insn);
+ if (dp) {
+ if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
+ return 1;
+ }
+ } else {
+ if (!dc_isar_feature(aa32_fp16_spconv, s)) {
+ return 1;
+ }
+ }
+ rm_is_dp = false;
+ break;
+ case 0x06: /* vcvtb.f16.f32, vcvtb.f16.f64 */
+ case 0x07: /* vcvtt.f16.f32, vcvtt.f16.f64 */
+ if (dp) {
+ if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
+ return 1;
+ }
+ } else {
+ if (!dc_isar_feature(aa32_fp16_spconv, s)) {
+ return 1;
+ }
+ }
+ rd_is_dp = false;
+ break;
+
+ case 0x08: case 0x0a: /* vcmp, vcmpz */
+ case 0x09: case 0x0b: /* vcmpe, vcmpez */
+ no_output = true;
+ break;
+
+ case 0x0c: /* vrintr */
+ case 0x0d: /* vrintz */
+ case 0x0e: /* vrintx */
+ break;
+
+ case 0x0f: /* vcvt double<->single */
+ rd_is_dp = !dp;
+ break;
+
+ case 0x10: /* vcvt.fxx.u32 */
+ case 0x11: /* vcvt.fxx.s32 */
+ rm_is_dp = false;
+ break;
+ case 0x18: /* vcvtr.u32.fxx */
+ case 0x19: /* vcvtz.u32.fxx */
+ case 0x1a: /* vcvtr.s32.fxx */
+ case 0x1b: /* vcvtz.s32.fxx */
+ rd_is_dp = false;
+ break;
+
+ case 0x14: /* vcvt fp <-> fixed */
+ case 0x15:
+ case 0x16:
+ case 0x17:
+ case 0x1c:
+ case 0x1d:
+ case 0x1e:
+ case 0x1f:
+ if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
+ return 1;
+ }
+ /* Immediate frac_bits has same format as SREG_M. */
+ rm_is_dp = false;
+ break;
+
+ case 0x13: /* vjcvt */
+ if (!dp || !dc_isar_feature(aa32_jscvt, s)) {
+ return 1;
+ }
+ rd_is_dp = false;
+ break;
+
+ default:
+ return 1;
}
+ } else if (dp) {
+ /* rn is register number */
+ VFP_DREG_N(rn, insn);
+ }
+
+ if (rd_is_dp) {
+ VFP_DREG_D(rd, insn);
+ } else {
+ rd = VFP_SREG_D(insn);
+ }
+ if (rm_is_dp) {
+ VFP_DREG_M(rm, insn);
} else {
- rn = VFP_SREG_N(insn);
- if (op == 15 && rn == 15) {
- /* Double precision destination. */
- VFP_DREG_D(rd, insn);
- } else {
- rd = VFP_SREG_D(insn);
- }
- /* NB that we implicitly rely on the encoding for the frac_bits
- * in VCVT of fixed to float being the same as that of an SREG_M
- */
rm = VFP_SREG_M(insn);
}
veclen = s->vec_len;
- if (op == 15 && rn > 3)
+ if (op == 15 && rn > 3) {
veclen = 0;
+ }
/* Shut up compiler warnings. */
delta_m = 0;
/* Load the initial operands. */
if (op == 15) {
switch (rn) {
- case 16:
- case 17:
- /* Integer source */
- gen_mov_F0_vreg(0, rm);
- break;
- case 8:
- case 9:
- /* Compare */
+ case 0x08: case 0x09: /* Compare */
gen_mov_F0_vreg(dp, rd);
gen_mov_F1_vreg(dp, rm);
break;
- case 10:
- case 11:
- /* Compare with zero */
+ case 0x0a: case 0x0b: /* Compare with zero */
gen_mov_F0_vreg(dp, rd);
gen_vfp_F1_ld0(dp);
break;
- case 20:
- case 21:
- case 22:
- case 23:
- case 28:
- case 29:
- case 30:
- case 31:
+ case 0x14: /* vcvt fp <-> fixed */
+ case 0x15:
+ case 0x16:
+ case 0x17:
+ case 0x1c:
+ case 0x1d:
+ case 0x1e:
+ case 0x1f:
/* Source and destination the same. */
gen_mov_F0_vreg(dp, rd);
break;
- case 4:
- case 5:
- case 6:
- case 7:
- /* VCVTB, VCVTT: only present with the halfprec extension
- * UNPREDICTABLE if bit 8 is set prior to ARMv8
- * (we choose to UNDEF)
- */
- if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
- !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
- return 1;
- }
- if (!extract32(rn, 1, 1)) {
- /* Half precision source. */
- gen_mov_F0_vreg(0, rm);
- break;
- }
- /* Otherwise fall through */
default:
/* One source operand. */
- gen_mov_F0_vreg(dp, rm);
+ gen_mov_F0_vreg(rm_is_dp, rm);
break;
}
} else {
break;
}
case 15: /* single<->double conversion */
- if (dp)
+ if (dp) {
gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
- else
+ } else {
gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
+ }
break;
case 16: /* fuito */
gen_vfp_uito(dp, 0);
case 17: /* fsito */
gen_vfp_sito(dp, 0);
break;
+ case 19: /* vjcvt */
+ gen_helper_vjcvt(cpu_F0s, cpu_F0d, cpu_env);
+ break;
case 20: /* fshto */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
gen_vfp_shto(dp, 16 - rm, 0);
break;
case 21: /* fslto */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
gen_vfp_slto(dp, 32 - rm, 0);
break;
case 22: /* fuhto */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
gen_vfp_uhto(dp, 16 - rm, 0);
break;
case 23: /* fulto */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
gen_vfp_ulto(dp, 32 - rm, 0);
break;
case 24: /* ftoui */
gen_vfp_tosiz(dp, 0);
break;
case 28: /* ftosh */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
gen_vfp_tosh(dp, 16 - rm, 0);
break;
case 29: /* ftosl */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
gen_vfp_tosl(dp, 32 - rm, 0);
break;
case 30: /* ftouh */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
gen_vfp_touh(dp, 16 - rm, 0);
break;
case 31: /* ftoul */
- if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
- return 1;
- }
gen_vfp_toul(dp, 32 - rm, 0);
break;
default: /* undefined */
- return 1;
+ g_assert_not_reached();
}
break;
default: /* undefined */
return 1;
}
- /* Write back the result. */
- if (op == 15 && (rn >= 8 && rn <= 11)) {
- /* Comparison, do nothing. */
- } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
- (rn & 0x1e) == 0x6)) {
- /* VCVT double to int: always integer result.
- * VCVT double to half precision is always a single
- * precision result.
- */
- gen_mov_vreg_F0(0, rd);
- } else if (op == 15 && rn == 15) {
- /* conversion */
- gen_mov_vreg_F0(!dp, rd);
- } else {
- gen_mov_vreg_F0(dp, rd);
+ /* Write back the result, if any. */
+ if (!no_output) {
+ gen_mov_vreg_F0(rd_is_dp, rd);
}
/* break out of the loop if we have finished */
- if (veclen == 0)
+ if (veclen == 0) {
break;
+ }
if (op == 15 && delta_m == 0) {
/* single source one-many */
if (insn & (1 << 24)) /* pre-decrement */
tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
+ if (s->v8m_stackcheck && rn == 13 && w) {
+ /*
+ * Here 'addr' is the lowest address we will store to,
+ * and is either the old SP (if post-increment) or
+ * the new SP (if pre-decrement). For post-increment
+ * where the old value is below the limit and the new
+ * value is above, it is UNKNOWN whether the limit check
+ * triggers; we choose to trigger.
+ */
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+
if (dp)
offset = 8;
else
}
/* 32-bit pairwise ops end up the same as the elementwise versions. */
-#define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32
-#define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32
-#define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32
-#define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32
+#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
+#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
+#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
+#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
#define GEN_NEON_INTEGER_OP_ENV(name) do { \
switch ((size << 1) | u) { \
int nregs;
int interleave;
int spacing;
-} neon_ls_element_type[11] = {
- {4, 4, 1},
- {4, 4, 2},
+} const neon_ls_element_type[11] = {
+ {1, 4, 1},
+ {1, 4, 2},
{4, 1, 1},
- {4, 2, 1},
- {3, 3, 1},
- {3, 3, 2},
+ {2, 2, 2},
+ {1, 3, 1},
+ {1, 3, 2},
{3, 1, 1},
{1, 1, 1},
- {2, 2, 1},
- {2, 2, 2},
+ {1, 2, 1},
+ {1, 2, 2},
{2, 1, 1}
};
int stride;
int size;
int reg;
- int pass;
int load;
- int shift;
int n;
+ int vec_size;
+ int mmu_idx;
+ TCGMemOp endian;
TCGv_i32 addr;
TCGv_i32 tmp;
TCGv_i32 tmp2;
*/
if (s->fp_excp_el) {
gen_exception_insn(s, 4, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+ syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
return 0;
}
rn = (insn >> 16) & 0xf;
rm = insn & 0xf;
load = (insn & (1 << 21)) != 0;
+ endian = s->be_data;
+ mmu_idx = get_mem_index(s);
if ((insn & (1 << 23)) == 0) {
/* Load store all elements. */
op = (insn >> 8) & 0xf;
nregs = neon_ls_element_type[op].nregs;
interleave = neon_ls_element_type[op].interleave;
spacing = neon_ls_element_type[op].spacing;
- if (size == 3 && (interleave | spacing) != 1)
+ if (size == 3 && (interleave | spacing) != 1) {
return 1;
+ }
+ /* For our purposes, bytes are always little-endian. */
+ if (size == 0) {
+ endian = MO_LE;
+ }
+ /* Consecutive little-endian elements from a single register
+ * can be promoted to a larger little-endian operation.
+ */
+ if (interleave == 1 && endian == MO_LE) {
+ size = 3;
+ }
+ tmp64 = tcg_temp_new_i64();
addr = tcg_temp_new_i32();
+ tmp2 = tcg_const_i32(1 << size);
load_reg_var(s, addr, rn);
- stride = (1 << size) * interleave;
for (reg = 0; reg < nregs; reg++) {
- if (interleave > 2 || (interleave == 2 && nregs == 2)) {
- load_reg_var(s, addr, rn);
- tcg_gen_addi_i32(addr, addr, (1 << size) * reg);
- } else if (interleave == 2 && nregs == 4 && reg == 2) {
- load_reg_var(s, addr, rn);
- tcg_gen_addi_i32(addr, addr, 1 << size);
- }
- if (size == 3) {
- tmp64 = tcg_temp_new_i64();
- if (load) {
- gen_aa32_ld64(s, tmp64, addr, get_mem_index(s));
- neon_store_reg64(tmp64, rd);
- } else {
- neon_load_reg64(tmp64, rd);
- gen_aa32_st64(s, tmp64, addr, get_mem_index(s));
- }
- tcg_temp_free_i64(tmp64);
- tcg_gen_addi_i32(addr, addr, stride);
- } else {
- for (pass = 0; pass < 2; pass++) {
- if (size == 2) {
- if (load) {
- tmp = tcg_temp_new_i32();
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- neon_store_reg(rd, pass, tmp);
- } else {
- tmp = neon_load_reg(rd, pass);
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- }
- tcg_gen_addi_i32(addr, addr, stride);
- } else if (size == 1) {
- if (load) {
- tmp = tcg_temp_new_i32();
- gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
- tcg_gen_addi_i32(addr, addr, stride);
- tmp2 = tcg_temp_new_i32();
- gen_aa32_ld16u(s, tmp2, addr, get_mem_index(s));
- tcg_gen_addi_i32(addr, addr, stride);
- tcg_gen_shli_i32(tmp2, tmp2, 16);
- tcg_gen_or_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- neon_store_reg(rd, pass, tmp);
- } else {
- tmp = neon_load_reg(rd, pass);
- tmp2 = tcg_temp_new_i32();
- tcg_gen_shri_i32(tmp2, tmp, 16);
- gen_aa32_st16(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- tcg_gen_addi_i32(addr, addr, stride);
- gen_aa32_st16(s, tmp2, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp2);
- tcg_gen_addi_i32(addr, addr, stride);
- }
- } else /* size == 0 */ {
- if (load) {
- tmp2 = NULL;
- for (n = 0; n < 4; n++) {
- tmp = tcg_temp_new_i32();
- gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
- tcg_gen_addi_i32(addr, addr, stride);
- if (n == 0) {
- tmp2 = tmp;
- } else {
- tcg_gen_shli_i32(tmp, tmp, n * 8);
- tcg_gen_or_i32(tmp2, tmp2, tmp);
- tcg_temp_free_i32(tmp);
- }
- }
- neon_store_reg(rd, pass, tmp2);
- } else {
- tmp2 = neon_load_reg(rd, pass);
- for (n = 0; n < 4; n++) {
- tmp = tcg_temp_new_i32();
- if (n == 0) {
- tcg_gen_mov_i32(tmp, tmp2);
- } else {
- tcg_gen_shri_i32(tmp, tmp2, n * 8);
- }
- gen_aa32_st8(s, tmp, addr, get_mem_index(s));
- tcg_temp_free_i32(tmp);
- tcg_gen_addi_i32(addr, addr, stride);
- }
- tcg_temp_free_i32(tmp2);
- }
- }
+ for (n = 0; n < 8 >> size; n++) {
+ int xs;
+ for (xs = 0; xs < interleave; xs++) {
+ int tt = rd + reg + spacing * xs;
+
+ if (load) {
+ gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
+ neon_store_element64(tt, n, size, tmp64);
+ } else {
+ neon_load_element64(tmp64, tt, n, size);
+ gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
+ }
+ tcg_gen_add_i32(addr, addr, tmp2);
}
}
- rd += spacing;
}
tcg_temp_free_i32(addr);
- stride = nregs * 8;
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i64(tmp64);
+ stride = nregs * interleave * 8;
} else {
size = (insn >> 10) & 3;
if (size == 3) {
}
addr = tcg_temp_new_i32();
load_reg_var(s, addr, rn);
- if (nregs == 1) {
- /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
- tmp = gen_load_and_replicate(s, addr, size);
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
- if (insn & (1 << 5)) {
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
- }
- tcg_temp_free_i32(tmp);
- } else {
- /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
- stride = (insn & (1 << 5)) ? 2 : 1;
- for (reg = 0; reg < nregs; reg++) {
- tmp = gen_load_and_replicate(s, addr, size);
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
- tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
- tcg_temp_free_i32(tmp);
- tcg_gen_addi_i32(addr, addr, 1 << size);
- rd += stride;
+
+ /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
+ * VLD2/3/4 to all lanes: bit 5 indicates register stride.
+ */
+ stride = (insn & (1 << 5)) ? 2 : 1;
+ vec_size = nregs == 1 ? stride * 8 : 8;
+
+ tmp = tcg_temp_new_i32();
+ for (reg = 0; reg < nregs; reg++) {
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
+ s->be_data | size);
+ if ((rd & 1) && vec_size == 16) {
+ /* We cannot write 16 bytes at once because the
+ * destination is unaligned.
+ */
+ tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
+ 8, 8, tmp);
+ tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
+ neon_reg_offset(rd, 0), 8, 8);
+ } else {
+ tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
+ vec_size, vec_size, tmp);
}
+ tcg_gen_addi_i32(addr, addr, 1 << size);
+ rd += stride;
}
+ tcg_temp_free_i32(tmp);
tcg_temp_free_i32(addr);
stride = (1 << size) * nregs;
} else {
/* Single element. */
int idx = (insn >> 4) & 0xf;
- pass = (insn >> 7) & 1;
+ int reg_idx;
switch (size) {
case 0:
- shift = ((insn >> 5) & 3) * 8;
+ reg_idx = (insn >> 5) & 7;
stride = 1;
break;
case 1:
- shift = ((insn >> 6) & 1) * 16;
+ reg_idx = (insn >> 6) & 3;
stride = (insn & (1 << 5)) ? 2 : 1;
break;
case 2:
- shift = 0;
+ reg_idx = (insn >> 7) & 1;
stride = (insn & (1 << 6)) ? 2 : 1;
break;
default:
*/
return 1;
}
+ tmp = tcg_temp_new_i32();
addr = tcg_temp_new_i32();
load_reg_var(s, addr, rn);
for (reg = 0; reg < nregs; reg++) {
if (load) {
- tmp = tcg_temp_new_i32();
- switch (size) {
- case 0:
- gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
- break;
- case 1:
- gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
- break;
- case 2:
- gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
- break;
- default: /* Avoid compiler warnings. */
- abort();
- }
- if (size != 2) {
- tmp2 = neon_load_reg(rd, pass);
- tcg_gen_deposit_i32(tmp, tmp2, tmp,
- shift, size ? 16 : 8);
- tcg_temp_free_i32(tmp2);
- }
- neon_store_reg(rd, pass, tmp);
+ gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
+ s->be_data | size);
+ neon_store_element(rd, reg_idx, size, tmp);
} else { /* Store */
- tmp = neon_load_reg(rd, pass);
- if (shift)
- tcg_gen_shri_i32(tmp, tmp, shift);
- switch (size) {
- case 0:
- gen_aa32_st8(s, tmp, addr, get_mem_index(s));
- break;
- case 1:
- gen_aa32_st16(s, tmp, addr, get_mem_index(s));
- break;
- case 2:
- gen_aa32_st32(s, tmp, addr, get_mem_index(s));
- break;
- }
- tcg_temp_free_i32(tmp);
+ neon_load_element(tmp, rd, reg_idx, size);
+ gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
+ s->be_data | size);
}
rd += stride;
tcg_gen_addi_i32(addr, addr, 1 << size);
}
tcg_temp_free_i32(addr);
+ tcg_temp_free_i32(tmp);
stride = nregs * (1 << size);
}
}
return 0;
}
-/* Bitwise select. dest = c ? t : f. Clobbers T and F. */
-static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c)
-{
- tcg_gen_and_i32(t, t, c);
- tcg_gen_andc_i32(f, f, c);
- tcg_gen_or_i32(dest, t, f);
-}
-
static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
{
switch (size) {
#define NEON_3R_VABA 15
#define NEON_3R_VADD_VSUB 16
#define NEON_3R_VTST_VCEQ 17
-#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
+#define NEON_3R_VML 18 /* VMLA, VMLS */
#define NEON_3R_VMUL 19
#define NEON_3R_VPMAX 20
#define NEON_3R_VPMIN 21
static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
int q, int rd, int rn, int rm)
{
- if (arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
+ if (dc_isar_feature(aa32_rdm, s)) {
int opr_sz = (1 + q) * 8;
tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
vfp_reg_offset(1, rn),
return 1;
}
+/*
+ * Expanders for VBitOps_VBIF, VBIT, VBSL.
+ */
+static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ tcg_gen_xor_i64(rn, rn, rm);
+ tcg_gen_and_i64(rn, rn, rd);
+ tcg_gen_xor_i64(rd, rm, rn);
+}
+
+static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ tcg_gen_xor_i64(rn, rn, rd);
+ tcg_gen_and_i64(rn, rn, rm);
+ tcg_gen_xor_i64(rd, rd, rn);
+}
+
+static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
+{
+ tcg_gen_xor_i64(rn, rn, rd);
+ tcg_gen_andc_i64(rn, rn, rm);
+ tcg_gen_xor_i64(rd, rd, rn);
+}
+
+static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+ tcg_gen_xor_vec(vece, rn, rn, rm);
+ tcg_gen_and_vec(vece, rn, rn, rd);
+ tcg_gen_xor_vec(vece, rd, rm, rn);
+}
+
+static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+ tcg_gen_xor_vec(vece, rn, rn, rd);
+ tcg_gen_and_vec(vece, rn, rn, rm);
+ tcg_gen_xor_vec(vece, rd, rd, rn);
+}
+
+static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
+{
+ tcg_gen_xor_vec(vece, rn, rn, rd);
+ tcg_gen_andc_vec(vece, rn, rn, rm);
+ tcg_gen_xor_vec(vece, rd, rd, rn);
+}
+
+const GVecGen3 bsl_op = {
+ .fni8 = gen_bsl_i64,
+ .fniv = gen_bsl_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true
+};
+
+const GVecGen3 bit_op = {
+ .fni8 = gen_bit_i64,
+ .fniv = gen_bit_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true
+};
+
+const GVecGen3 bif_op = {
+ .fni8 = gen_bif_i64,
+ .fniv = gen_bif_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true
+};
+
+static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_sar8i_i64(a, a, shift);
+ tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_sar16i_i64(a, a, shift);
+ tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_sari_i32(a, a, shift);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_sari_i64(a, a, shift);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ tcg_gen_sari_vec(vece, a, a, sh);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+const GVecGen2i ssra_op[4] = {
+ { .fni8 = gen_ssra8_i64,
+ .fniv = gen_ssra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_sari_vec,
+ .vece = MO_8 },
+ { .fni8 = gen_ssra16_i64,
+ .fniv = gen_ssra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_sari_vec,
+ .vece = MO_16 },
+ { .fni4 = gen_ssra32_i32,
+ .fniv = gen_ssra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_sari_vec,
+ .vece = MO_32 },
+ { .fni8 = gen_ssra64_i64,
+ .fniv = gen_ssra_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opc = INDEX_op_sari_vec,
+ .vece = MO_64 },
+};
+
+static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_shr8i_i64(a, a, shift);
+ tcg_gen_vec_add8_i64(d, d, a);
+}
+
+static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_vec_shr16i_i64(a, a, shift);
+ tcg_gen_vec_add16_i64(d, d, a);
+}
+
+static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_shri_i32(a, a, shift);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_shri_i64(a, a, shift);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ tcg_gen_shri_vec(vece, a, a, sh);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+const GVecGen2i usra_op[4] = {
+ { .fni8 = gen_usra8_i64,
+ .fniv = gen_usra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_8, },
+ { .fni8 = gen_usra16_i64,
+ .fniv = gen_usra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_16, },
+ { .fni4 = gen_usra32_i32,
+ .fniv = gen_usra_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_32, },
+ { .fni8 = gen_usra64_i64,
+ .fniv = gen_usra_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_64, },
+};
+
+static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_8, 0xff >> shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff >> shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shri_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_shri_i32(a, a, shift);
+ tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
+}
+
+static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_shri_i64(a, a, shift);
+ tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
+}
+
+static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ if (sh == 0) {
+ tcg_gen_mov_vec(d, a);
+ } else {
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
+ tcg_gen_shri_vec(vece, t, a, sh);
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
+ }
+}
+
+const GVecGen2i sri_op[4] = {
+ { .fni8 = gen_shr8_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_8 },
+ { .fni8 = gen_shr16_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_16 },
+ { .fni4 = gen_shr32_ins_i32,
+ .fniv = gen_shr_ins_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_32 },
+ { .fni8 = gen_shr64_ins_i64,
+ .fniv = gen_shr_ins_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opc = INDEX_op_shri_vec,
+ .vece = MO_64 },
+};
+
+static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_8, 0xff << shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ uint64_t mask = dup_const(MO_16, 0xffff << shift);
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_shli_i64(t, a, shift);
+ tcg_gen_andi_i64(t, t, mask);
+ tcg_gen_andi_i64(d, d, ~mask);
+ tcg_gen_or_i64(d, d, t);
+ tcg_temp_free_i64(t);
+}
+
+static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
+{
+ tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
+}
+
+static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
+{
+ tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
+}
+
+static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
+{
+ if (sh == 0) {
+ tcg_gen_mov_vec(d, a);
+ } else {
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_temp_new_vec_matching(d);
+
+ tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
+ tcg_gen_shli_vec(vece, t, a, sh);
+ tcg_gen_and_vec(vece, d, d, m);
+ tcg_gen_or_vec(vece, d, d, t);
+
+ tcg_temp_free_vec(t);
+ tcg_temp_free_vec(m);
+ }
+}
+
+const GVecGen2i sli_op[4] = {
+ { .fni8 = gen_shl8_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shli_vec,
+ .vece = MO_8 },
+ { .fni8 = gen_shl16_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shli_vec,
+ .vece = MO_16 },
+ { .fni4 = gen_shl32_ins_i32,
+ .fniv = gen_shl_ins_vec,
+ .load_dest = true,
+ .opc = INDEX_op_shli_vec,
+ .vece = MO_32 },
+ { .fni8 = gen_shl64_ins_i64,
+ .fniv = gen_shl_ins_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .opc = INDEX_op_shli_vec,
+ .vece = MO_64 },
+};
+
+static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u8(a, a, b);
+ gen_helper_neon_add_u8(d, d, a);
+}
+
+static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u8(a, a, b);
+ gen_helper_neon_sub_u8(d, d, a);
+}
+
+static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u16(a, a, b);
+ gen_helper_neon_add_u16(d, d, a);
+}
+
+static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ gen_helper_neon_mul_u16(a, a, b);
+ gen_helper_neon_sub_u16(d, d, a);
+}
+
+static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_mul_i32(a, a, b);
+ tcg_gen_add_i32(d, d, a);
+}
+
+static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_mul_i32(a, a, b);
+ tcg_gen_sub_i32(d, d, a);
+}
+
+static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_mul_i64(a, a, b);
+ tcg_gen_add_i64(d, d, a);
+}
+
+static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_mul_i64(a, a, b);
+ tcg_gen_sub_i64(d, d, a);
+}
+
+static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mul_vec(vece, a, a, b);
+ tcg_gen_add_vec(vece, d, d, a);
+}
+
+static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_mul_vec(vece, a, a, b);
+ tcg_gen_sub_vec(vece, d, d, a);
+}
+
+/* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
+ * these tables are shared with AArch64 which does support them.
+ */
+const GVecGen3 mla_op[4] = {
+ { .fni4 = gen_mla8_i32,
+ .fniv = gen_mla_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni4 = gen_mla16_i32,
+ .fniv = gen_mla_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_mla32_i32,
+ .fniv = gen_mla_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_mla64_i64,
+ .fniv = gen_mla_vec,
+ .opc = INDEX_op_mul_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .vece = MO_64 },
+};
+
+const GVecGen3 mls_op[4] = {
+ { .fni4 = gen_mls8_i32,
+ .fniv = gen_mls_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_8 },
+ { .fni4 = gen_mls16_i32,
+ .fniv = gen_mls_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_16 },
+ { .fni4 = gen_mls32_i32,
+ .fniv = gen_mls_vec,
+ .opc = INDEX_op_mul_vec,
+ .load_dest = true,
+ .vece = MO_32 },
+ { .fni8 = gen_mls64_i64,
+ .fniv = gen_mls_vec,
+ .opc = INDEX_op_mul_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .load_dest = true,
+ .vece = MO_64 },
+};
+
+/* CMTST : test is "if (X & Y != 0)". */
+static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
+{
+ tcg_gen_and_i32(d, a, b);
+ tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
+ tcg_gen_neg_i32(d, d);
+}
+
+void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
+{
+ tcg_gen_and_i64(d, a, b);
+ tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
+ tcg_gen_neg_i64(d, d);
+}
+
+static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
+{
+ tcg_gen_and_vec(vece, d, a, b);
+ tcg_gen_dupi_vec(vece, a, 0);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
+}
+
+const GVecGen3 cmtst_op[4] = {
+ { .fni4 = gen_helper_neon_tst_u8,
+ .fniv = gen_cmtst_vec,
+ .vece = MO_8 },
+ { .fni4 = gen_helper_neon_tst_u16,
+ .fniv = gen_cmtst_vec,
+ .vece = MO_16 },
+ { .fni4 = gen_cmtst_i32,
+ .fniv = gen_cmtst_vec,
+ .vece = MO_32 },
+ { .fni8 = gen_cmtst_i64,
+ .fniv = gen_cmtst_vec,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ .vece = MO_64 },
+};
+
+static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_add_vec(vece, x, a, b);
+ tcg_gen_usadd_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+ tcg_temp_free_vec(x);
+}
+
+const GVecGen4 uqadd_op[4] = {
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_b,
+ .opc = INDEX_op_usadd_vec,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_h,
+ .opc = INDEX_op_usadd_vec,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_s,
+ .opc = INDEX_op_usadd_vec,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_uqadd_vec,
+ .fno = gen_helper_gvec_uqadd_d,
+ .opc = INDEX_op_usadd_vec,
+ .write_aofs = true,
+ .vece = MO_64 },
+};
+
+static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_add_vec(vece, x, a, b);
+ tcg_gen_ssadd_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+ tcg_temp_free_vec(x);
+}
+
+const GVecGen4 sqadd_op[4] = {
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_b,
+ .opc = INDEX_op_ssadd_vec,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_h,
+ .opc = INDEX_op_ssadd_vec,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_s,
+ .opc = INDEX_op_ssadd_vec,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqadd_vec,
+ .fno = gen_helper_gvec_sqadd_d,
+ .opc = INDEX_op_ssadd_vec,
+ .write_aofs = true,
+ .vece = MO_64 },
+};
+
+static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_sub_vec(vece, x, a, b);
+ tcg_gen_ussub_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+ tcg_temp_free_vec(x);
+}
+
+const GVecGen4 uqsub_op[4] = {
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_b,
+ .opc = INDEX_op_ussub_vec,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_h,
+ .opc = INDEX_op_ussub_vec,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_s,
+ .opc = INDEX_op_ussub_vec,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_uqsub_vec,
+ .fno = gen_helper_gvec_uqsub_d,
+ .opc = INDEX_op_ussub_vec,
+ .write_aofs = true,
+ .vece = MO_64 },
+};
+
+static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+ TCGv_vec a, TCGv_vec b)
+{
+ TCGv_vec x = tcg_temp_new_vec_matching(t);
+ tcg_gen_sub_vec(vece, x, a, b);
+ tcg_gen_sssub_vec(vece, t, a, b);
+ tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+ tcg_gen_or_vec(vece, sat, sat, x);
+ tcg_temp_free_vec(x);
+}
+
+const GVecGen4 sqsub_op[4] = {
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_b,
+ .opc = INDEX_op_sssub_vec,
+ .write_aofs = true,
+ .vece = MO_8 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_h,
+ .opc = INDEX_op_sssub_vec,
+ .write_aofs = true,
+ .vece = MO_16 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_s,
+ .opc = INDEX_op_sssub_vec,
+ .write_aofs = true,
+ .vece = MO_32 },
+ { .fniv = gen_sqsub_vec,
+ .fno = gen_helper_gvec_sqsub_d,
+ .opc = INDEX_op_sssub_vec,
+ .write_aofs = true,
+ .vece = MO_64 },
+};
+
/* Translate a NEON data processing instruction. Return nonzero if the
instruction is invalid.
We process data in a mixture of 32-bit and 64-bit chunks.
{
int op;
int q;
- int rd, rn, rm;
+ int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
int size;
int shift;
int pass;
int count;
int pairwise;
int u;
- uint32_t imm, mask;
+ int vec_size;
+ uint32_t imm;
TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
TCGv_ptr ptr1, ptr2, ptr3;
TCGv_i64 tmp64;
*/
if (s->fp_excp_el) {
gen_exception_insn(s, 4, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+ syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
return 0;
}
VFP_DREG_N(rn, insn);
VFP_DREG_M(rm, insn);
size = (insn >> 20) & 3;
+ vec_size = q ? 16 : 8;
+ rd_ofs = neon_reg_offset(rd, 0);
+ rn_ofs = neon_reg_offset(rn, 0);
+ rm_ofs = neon_reg_offset(rm, 0);
+
if ((insn & (1 << 23)) == 0) {
/* Three register same length. */
op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
return 1;
}
if (!u) { /* SHA-1 */
- if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
+ if (!dc_isar_feature(aa32_sha1, s)) {
return 1;
}
ptr1 = vfp_reg_ptr(true, rd);
gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
tcg_temp_free_i32(tmp4);
} else { /* SHA-256 */
- if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) {
+ if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
return 1;
}
ptr1 = vfp_reg_ptr(true, rd);
q, rd, rn, rm);
}
return 1;
+
+ case NEON_3R_LOGIC: /* Logic ops. */
+ switch ((u << 2) | size) {
+ case 0: /* VAND */
+ tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ break;
+ case 1: /* VBIC */
+ tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ break;
+ case 2: /* VORR */
+ tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ break;
+ case 3: /* VORN */
+ tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ break;
+ case 4: /* VEOR */
+ tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ break;
+ case 5: /* VBSL */
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size, &bsl_op);
+ break;
+ case 6: /* VBIT */
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size, &bit_op);
+ break;
+ case 7: /* VBIF */
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size, &bif_op);
+ break;
+ }
+ return 0;
+
+ case NEON_3R_VADD_VSUB:
+ if (u) {
+ tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ } else {
+ tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ }
+ return 0;
+
+ case NEON_3R_VQADD:
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, vec_size, vec_size,
+ (u ? uqadd_op : sqadd_op) + size);
+ break;
+
+ case NEON_3R_VQSUB:
+ tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+ rn_ofs, rm_ofs, vec_size, vec_size,
+ (u ? uqsub_op : sqsub_op) + size);
+ break;
+
+ case NEON_3R_VMUL: /* VMUL */
+ if (u) {
+ /* Polynomial case allows only P8 and is handled below. */
+ if (size != 0) {
+ return 1;
+ }
+ } else {
+ tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ return 0;
+ }
+ break;
+
+ case NEON_3R_VML: /* VMLA, VMLS */
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
+ u ? &mls_op[size] : &mla_op[size]);
+ return 0;
+
+ case NEON_3R_VTST_VCEQ:
+ if (u) { /* VCEQ */
+ tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ } else { /* VTST */
+ tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size, &cmtst_op[size]);
+ }
+ return 0;
+
+ case NEON_3R_VCGT:
+ tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
+ rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
+ return 0;
+
+ case NEON_3R_VCGE:
+ tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
+ rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
+ return 0;
+
+ case NEON_3R_VMAX:
+ if (u) {
+ tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ } else {
+ tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ }
+ return 0;
+ case NEON_3R_VMIN:
+ if (u) {
+ tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ } else {
+ tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
+ vec_size, vec_size);
+ }
+ return 0;
}
- if (size == 3 && op != NEON_3R_LOGIC) {
+
+ if (size == 3) {
/* 64-bit element instructions. */
for (pass = 0; pass < (q ? 2 : 1); pass++) {
neon_load_reg64(cpu_V0, rn + pass);
neon_load_reg64(cpu_V1, rm + pass);
switch (op) {
- case NEON_3R_VQADD:
- if (u) {
- gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- } else {
- gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- }
- break;
- case NEON_3R_VQSUB:
- if (u) {
- gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- } else {
- gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
- cpu_V0, cpu_V1);
- }
- break;
case NEON_3R_VSHL:
if (u) {
gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
cpu_V1, cpu_V0);
}
break;
- case NEON_3R_VADD_VSUB:
- if (u) {
- tcg_gen_sub_i64(CPU_V001);
- } else {
- tcg_gen_add_i64(CPU_V001);
- }
- break;
default:
abort();
}
return 1;
}
break;
- case NEON_3R_VMUL:
- if (u && (size != 0)) {
- /* UNDEF on invalid size for polynomial subcase */
- return 1;
- }
- break;
case NEON_3R_VFM_VQRDMLSH:
if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
return 1;
case NEON_3R_VHADD:
GEN_NEON_INTEGER_OP(hadd);
break;
- case NEON_3R_VQADD:
- GEN_NEON_INTEGER_OP_ENV(qadd);
- break;
case NEON_3R_VRHADD:
GEN_NEON_INTEGER_OP(rhadd);
break;
- case NEON_3R_LOGIC: /* Logic ops. */
- switch ((u << 2) | size) {
- case 0: /* VAND */
- tcg_gen_and_i32(tmp, tmp, tmp2);
- break;
- case 1: /* BIC */
- tcg_gen_andc_i32(tmp, tmp, tmp2);
- break;
- case 2: /* VORR */
- tcg_gen_or_i32(tmp, tmp, tmp2);
- break;
- case 3: /* VORN */
- tcg_gen_orc_i32(tmp, tmp, tmp2);
- break;
- case 4: /* VEOR */
- tcg_gen_xor_i32(tmp, tmp, tmp2);
- break;
- case 5: /* VBSL */
- tmp3 = neon_load_reg(rd, pass);
- gen_neon_bsl(tmp, tmp, tmp2, tmp3);
- tcg_temp_free_i32(tmp3);
- break;
- case 6: /* VBIT */
- tmp3 = neon_load_reg(rd, pass);
- gen_neon_bsl(tmp, tmp, tmp3, tmp2);
- tcg_temp_free_i32(tmp3);
- break;
- case 7: /* VBIF */
- tmp3 = neon_load_reg(rd, pass);
- gen_neon_bsl(tmp, tmp3, tmp, tmp2);
- tcg_temp_free_i32(tmp3);
- break;
- }
- break;
case NEON_3R_VHSUB:
GEN_NEON_INTEGER_OP(hsub);
break;
- case NEON_3R_VQSUB:
- GEN_NEON_INTEGER_OP_ENV(qsub);
- break;
- case NEON_3R_VCGT:
- GEN_NEON_INTEGER_OP(cgt);
- break;
- case NEON_3R_VCGE:
- GEN_NEON_INTEGER_OP(cge);
- break;
case NEON_3R_VSHL:
GEN_NEON_INTEGER_OP(shl);
break;
case NEON_3R_VQRSHL:
GEN_NEON_INTEGER_OP_ENV(qrshl);
break;
- case NEON_3R_VMAX:
- GEN_NEON_INTEGER_OP(max);
- break;
- case NEON_3R_VMIN:
- GEN_NEON_INTEGER_OP(min);
- break;
case NEON_3R_VABD:
GEN_NEON_INTEGER_OP(abd);
break;
- case NEON_3R_VABA:
- GEN_NEON_INTEGER_OP(abd);
- tcg_temp_free_i32(tmp2);
- tmp2 = neon_load_reg(rd, pass);
- gen_neon_add(size, tmp, tmp2);
- break;
- case NEON_3R_VADD_VSUB:
- if (!u) { /* VADD */
- gen_neon_add(size, tmp, tmp2);
- } else { /* VSUB */
- switch (size) {
- case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- }
- break;
- case NEON_3R_VTST_VCEQ:
- if (!u) { /* VTST */
- switch (size) {
- case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
- default: abort();
- }
- } else { /* VCEQ */
- switch (size) {
- case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
- default: abort();
- }
- }
- break;
- case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
- switch (size) {
- case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
+ case NEON_3R_VABA:
+ GEN_NEON_INTEGER_OP(abd);
tcg_temp_free_i32(tmp2);
tmp2 = neon_load_reg(rd, pass);
- if (u) { /* VMLS */
- gen_neon_rsb(size, tmp, tmp2);
- } else { /* VMLA */
- gen_neon_add(size, tmp, tmp2);
- }
+ gen_neon_add(size, tmp, tmp2);
break;
case NEON_3R_VMUL:
- if (u) { /* polynomial */
- gen_helper_neon_mul_p8(tmp, tmp, tmp2);
- } else { /* Integer */
- switch (size) {
- case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
- case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
- default: abort();
- }
- }
+ /* VMUL.P8; other cases already eliminated. */
+ gen_helper_neon_mul_p8(tmp, tmp, tmp2);
break;
case NEON_3R_VPMAX:
GEN_NEON_INTEGER_OP(pmax);
size--;
}
shift = (insn >> 16) & ((1 << (3 + size)) - 1);
- /* To avoid excessive duplication of ops we implement shift
- by immediate using the variable shift operations. */
if (op < 8) {
/* Shift by immediate:
VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
}
/* Right shifts are encoded as N - shift, where N is the
element size in bits. */
- if (op <= 4)
+ if (op <= 4) {
shift = shift - (1 << (size + 3));
+ }
+
+ switch (op) {
+ case 0: /* VSHR */
+ /* Right shift comes here negative. */
+ shift = -shift;
+ /* Shifts larger than the element size are architecturally
+ * valid. Unsigned results in all zeros; signed results
+ * in all sign bits.
+ */
+ if (!u) {
+ tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
+ MIN(shift, (8 << size) - 1),
+ vec_size, vec_size);
+ } else if (shift >= 8 << size) {
+ tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
+ } else {
+ tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
+ }
+ return 0;
+
+ case 1: /* VSRA */
+ /* Right shift comes here negative. */
+ shift = -shift;
+ /* Shifts larger than the element size are architecturally
+ * valid. Unsigned results in all zeros; signed results
+ * in all sign bits.
+ */
+ if (!u) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
+ MIN(shift, (8 << size) - 1),
+ &ssra_op[size]);
+ } else if (shift >= 8 << size) {
+ /* rd += 0 */
+ } else {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
+ shift, &usra_op[size]);
+ }
+ return 0;
+
+ case 4: /* VSRI */
+ if (!u) {
+ return 1;
+ }
+ /* Right shift comes here negative. */
+ shift = -shift;
+ /* Shift out of range leaves destination unchanged. */
+ if (shift < 8 << size) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
+ shift, &sri_op[size]);
+ }
+ return 0;
+
+ case 5: /* VSHL, VSLI */
+ if (u) { /* VSLI */
+ /* Shift out of range leaves destination unchanged. */
+ if (shift < 8 << size) {
+ tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
+ vec_size, shift, &sli_op[size]);
+ }
+ } else { /* VSHL */
+ /* Shifts larger than the element size are
+ * architecturally valid and results in zero.
+ */
+ if (shift >= 8 << size) {
+ tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
+ } else {
+ tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
+ vec_size, vec_size);
+ }
+ }
+ return 0;
+ }
+
if (size == 3) {
count = q + 1;
} else {
count = q ? 4: 2;
}
- switch (size) {
- case 0:
- imm = (uint8_t) shift;
- imm |= imm << 8;
- imm |= imm << 16;
- break;
- case 1:
- imm = (uint16_t) shift;
- imm |= imm << 16;
- break;
- case 2:
- case 3:
- imm = shift;
- break;
- default:
- abort();
- }
+
+ /* To avoid excessive duplication of ops we implement shift
+ * by immediate using the variable shift operations.
+ */
+ imm = dup_const(size, shift);
for (pass = 0; pass < count; pass++) {
if (size == 3) {
neon_load_reg64(cpu_V0, rm + pass);
tcg_gen_movi_i64(cpu_V1, imm);
switch (op) {
- case 0: /* VSHR */
- case 1: /* VSRA */
- if (u)
- gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
- else
- gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
- break;
case 2: /* VRSHR */
case 3: /* VRSRA */
if (u)
else
gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
break;
- case 4: /* VSRI */
- case 5: /* VSHL, VSLI */
- gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
- break;
case 6: /* VQSHLU */
gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
cpu_V0, cpu_V1);
cpu_V0, cpu_V1);
}
break;
+ default:
+ g_assert_not_reached();
}
- if (op == 1 || op == 3) {
+ if (op == 3) {
/* Accumulate. */
neon_load_reg64(cpu_V1, rd + pass);
tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
- } else if (op == 4 || (op == 5 && u)) {
- /* Insert */
- neon_load_reg64(cpu_V1, rd + pass);
- uint64_t mask;
- if (shift < -63 || shift > 63) {
- mask = 0;
- } else {
- if (op == 4) {
- mask = 0xffffffffffffffffull >> -shift;
- } else {
- mask = 0xffffffffffffffffull << shift;
- }
- }
- tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
}
neon_store_reg64(cpu_V0, rd + pass);
} else { /* size < 3 */
tmp2 = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp2, imm);
switch (op) {
- case 0: /* VSHR */
- case 1: /* VSRA */
- GEN_NEON_INTEGER_OP(shl);
- break;
case 2: /* VRSHR */
case 3: /* VRSRA */
GEN_NEON_INTEGER_OP(rshl);
break;
- case 4: /* VSRI */
- case 5: /* VSHL, VSLI */
- switch (size) {
- case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
- case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
- case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
- default: abort();
- }
- break;
case 6: /* VQSHLU */
switch (size) {
case 0:
case 7: /* VQSHL */
GEN_NEON_INTEGER_OP_ENV(qshl);
break;
+ default:
+ g_assert_not_reached();
}
tcg_temp_free_i32(tmp2);
- if (op == 1 || op == 3) {
+ if (op == 3) {
/* Accumulate. */
tmp2 = neon_load_reg(rd, pass);
gen_neon_add(size, tmp, tmp2);
tcg_temp_free_i32(tmp2);
- } else if (op == 4 || (op == 5 && u)) {
- /* Insert */
- switch (size) {
- case 0:
- if (op == 4)
- mask = 0xff >> -shift;
- else
- mask = (uint8_t)(0xff << shift);
- mask |= mask << 8;
- mask |= mask << 16;
- break;
- case 1:
- if (op == 4)
- mask = 0xffff >> -shift;
- else
- mask = (uint16_t)(0xffff << shift);
- mask |= mask << 16;
- break;
- case 2:
- if (shift < -31 || shift > 31) {
- mask = 0;
- } else {
- if (op == 4)
- mask = 0xffffffffu >> -shift;
- else
- mask = 0xffffffffu << shift;
- }
- break;
- default:
- abort();
- }
- tmp2 = neon_load_reg(rd, pass);
- tcg_gen_andi_i32(tmp, tmp, mask);
- tcg_gen_andi_i32(tmp2, tmp2, ~mask);
- tcg_gen_or_i32(tmp, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
}
neon_store_reg(rd, pass, tmp);
}
return 1;
}
} else { /* (insn & 0x00380080) == 0 */
- int invert;
+ int invert, reg_ofs, vec_size;
+
if (q && (rd & 1)) {
return 1;
}
break;
case 14:
imm |= (imm << 8) | (imm << 16) | (imm << 24);
- if (invert)
+ if (invert) {
imm = ~imm;
+ }
break;
case 15:
if (invert) {
| ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
break;
}
- if (invert)
+ if (invert) {
imm = ~imm;
+ }
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- if (op & 1 && op < 12) {
- tmp = neon_load_reg(rd, pass);
- if (invert) {
- /* The immediate value has already been inverted, so
- BIC becomes AND. */
- tcg_gen_andi_i32(tmp, tmp, imm);
- } else {
- tcg_gen_ori_i32(tmp, tmp, imm);
- }
+ reg_ofs = neon_reg_offset(rd, 0);
+ vec_size = q ? 16 : 8;
+
+ if (op & 1 && op < 12) {
+ if (invert) {
+ /* The immediate value has already been inverted,
+ * so BIC becomes AND.
+ */
+ tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
+ vec_size, vec_size);
} else {
- /* VMOV, VMVN. */
- tmp = tcg_temp_new_i32();
- if (op == 14 && invert) {
+ tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
+ vec_size, vec_size);
+ }
+ } else {
+ /* VMOV, VMVN. */
+ if (op == 14 && invert) {
+ TCGv_i64 t64 = tcg_temp_new_i64();
+
+ for (pass = 0; pass <= q; ++pass) {
+ uint64_t val = 0;
int n;
- uint32_t val;
- val = 0;
- for (n = 0; n < 4; n++) {
- if (imm & (1 << (n + (pass & 1) * 4)))
- val |= 0xff << (n * 8);
+
+ for (n = 0; n < 8; n++) {
+ if (imm & (1 << (n + pass * 8))) {
+ val |= 0xffull << (n * 8);
+ }
}
- tcg_gen_movi_i32(tmp, val);
- } else {
- tcg_gen_movi_i32(tmp, imm);
+ tcg_gen_movi_i64(t64, val);
+ neon_store_reg64(t64, rd + pass);
}
+ tcg_temp_free_i64(t64);
+ } else {
+ tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
}
- neon_store_reg(rd, pass, tmp);
}
}
} else { /* (insn & 0x00800010 == 0x00800000) */
if (op == 14 && size == 2) {
TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
- if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
+ if (!dc_isar_feature(aa32_pmull, s)) {
return 1;
}
tcg_rn = tcg_temp_new_i64();
{
NeonGenThreeOpEnvFn *fn;
- if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
+ if (!dc_isar_feature(aa32_rdm, s)) {
return 1;
}
if (u && ((rd | rn) & 1)) {
TCGv_ptr fpst;
TCGv_i32 ahp;
- if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
+ if (!dc_isar_feature(aa32_fp16_spconv, s) ||
q || (rm & 1)) {
return 1;
}
{
TCGv_ptr fpst;
TCGv_i32 ahp;
- if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
+ if (!dc_isar_feature(aa32_fp16_spconv, s) ||
q || (rd & 1)) {
return 1;
}
break;
}
case NEON_2RM_AESE: case NEON_2RM_AESMC:
- if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
- || ((rm | rd) & 1)) {
+ if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
return 1;
}
ptr1 = vfp_reg_ptr(true, rd);
tcg_temp_free_i32(tmp3);
break;
case NEON_2RM_SHA1H:
- if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)
- || ((rm | rd) & 1)) {
+ if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
return 1;
}
ptr1 = vfp_reg_ptr(true, rd);
}
/* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
if (q) {
- if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) {
+ if (!dc_isar_feature(aa32_sha2, s)) {
return 1;
}
- } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
+ } else if (!dc_isar_feature(aa32_sha1, s)) {
return 1;
}
ptr1 = vfp_reg_ptr(true, rd);
tcg_temp_free_ptr(ptr1);
tcg_temp_free_ptr(ptr2);
break;
+
+ case NEON_2RM_VMVN:
+ tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
+ break;
+ case NEON_2RM_VNEG:
+ tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
+ break;
+
default:
elementwise:
for (pass = 0; pass < (q ? 4 : 2); pass++) {
case NEON_2RM_VCNT:
gen_helper_neon_cnt_u8(tmp, tmp);
break;
- case NEON_2RM_VMVN:
- tcg_gen_not_i32(tmp, tmp);
- break;
case NEON_2RM_VQABS:
switch (size) {
case 0:
default: abort();
}
break;
- case NEON_2RM_VNEG:
- tmp2 = tcg_const_i32(0);
- gen_neon_rsb(size, tmp, tmp2);
- tcg_temp_free_i32(tmp2);
- break;
case NEON_2RM_VCGT0_F:
{
TCGv_ptr fpstatus = get_fpstatus_ptr(1);
tcg_temp_free_i32(tmp);
} else if ((insn & 0x380) == 0) {
/* VDUP */
+ int element;
+ TCGMemOp size;
+
if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
return 1;
}
- if (insn & (1 << 19)) {
- tmp = neon_load_reg(rm, 1);
- } else {
- tmp = neon_load_reg(rm, 0);
- }
if (insn & (1 << 16)) {
- gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
+ size = MO_8;
+ element = (insn >> 17) & 7;
} else if (insn & (1 << 17)) {
- if ((insn >> 18) & 1)
- gen_neon_dup_high16(tmp);
- else
- gen_neon_dup_low16(tmp);
- }
- for (pass = 0; pass < (q ? 4 : 2); pass++) {
- tmp2 = tcg_temp_new_i32();
- tcg_gen_mov_i32(tmp2, tmp);
- neon_store_reg(rd, pass, tmp2);
+ size = MO_16;
+ element = (insn >> 18) & 3;
+ } else {
+ size = MO_32;
+ element = (insn >> 19) & 1;
}
- tcg_temp_free_i32(tmp);
+ tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
+ neon_element_offset(rm, element, size),
+ q ? 16 : 8, q ? 16 : 8);
} else {
return 1;
}
gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
int rd, rn, rm, opr_sz;
int data = 0;
- bool q;
-
- q = extract32(insn, 6, 1);
- VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
- VFP_DREG_M(rm, insn);
- if ((rd | rn | rm) & q) {
- return 1;
- }
+ int off_rn, off_rm;
+ bool is_long = false, q = extract32(insn, 6, 1);
+ bool ptr_is_env = false;
if ((insn & 0xfe200f10) == 0xfc200800) {
/* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
int size = extract32(insn, 20, 1);
data = extract32(insn, 23, 2); /* rot */
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
- || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
+ if (!dc_isar_feature(aa32_vcma, s)
+ || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
return 1;
}
fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
/* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
int size = extract32(insn, 20, 1);
data = extract32(insn, 24, 1); /* rot */
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
- || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
+ if (!dc_isar_feature(aa32_vcma, s)
+ || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
return 1;
}
fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
} else if ((insn & 0xfeb00f00) == 0xfc200d00) {
/* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
bool u = extract32(insn, 4, 1);
- if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
+ if (!dc_isar_feature(aa32_dp, s)) {
return 1;
}
fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
+ } else if ((insn & 0xff300f10) == 0xfc200810) {
+ /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
+ int is_s = extract32(insn, 23, 1);
+ if (!dc_isar_feature(aa32_fhm, s)) {
+ return 1;
+ }
+ is_long = true;
+ data = is_s; /* is_2 == 0 */
+ fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
+ ptr_is_env = true;
} else {
return 1;
}
+ VFP_DREG_D(rd, insn);
+ if (rd & q) {
+ return 1;
+ }
+ if (q || !is_long) {
+ VFP_DREG_N(rn, insn);
+ VFP_DREG_M(rm, insn);
+ if ((rn | rm) & q & !is_long) {
+ return 1;
+ }
+ off_rn = vfp_reg_offset(1, rn);
+ off_rm = vfp_reg_offset(1, rm);
+ } else {
+ rn = VFP_SREG_N(insn);
+ rm = VFP_SREG_M(insn);
+ off_rn = vfp_reg_offset(0, rn);
+ off_rm = vfp_reg_offset(0, rm);
+ }
+
if (s->fp_excp_el) {
gen_exception_insn(s, 4, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+ syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
return 0;
}
if (!s->vfp_enabled) {
opr_sz = (1 + q) * 8;
if (fn_gvec_ptr) {
- TCGv_ptr fpst = get_fpstatus_ptr(1);
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), fpst,
+ TCGv_ptr ptr;
+ if (ptr_is_env) {
+ ptr = cpu_env;
+ } else {
+ ptr = get_fpstatus_ptr(1);
+ }
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
opr_sz, opr_sz, data, fn_gvec_ptr);
- tcg_temp_free_ptr(fpst);
+ if (!ptr_is_env) {
+ tcg_temp_free_ptr(ptr);
+ }
} else {
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm),
+ tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
opr_sz, opr_sz, data, fn_gvec);
}
return 0;
gen_helper_gvec_3 *fn_gvec = NULL;
gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
int rd, rn, rm, opr_sz, data;
- bool q;
-
- q = extract32(insn, 6, 1);
- VFP_DREG_D(rd, insn);
- VFP_DREG_N(rn, insn);
- if ((rd | rn) & q) {
- return 1;
- }
+ int off_rn, off_rm;
+ bool is_long = false, q = extract32(insn, 6, 1);
+ bool ptr_is_env = false;
if ((insn & 0xff000f10) == 0xfe000800) {
/* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
int size = extract32(insn, 23, 1);
int index;
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {
+ if (!dc_isar_feature(aa32_vcma, s)) {
return 1;
}
if (size == 0) {
- if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+ if (!dc_isar_feature(aa32_fp16_arith, s)) {
return 1;
}
/* For fp16, rm is just Vm, and index is M. */
} else if ((insn & 0xffb00f00) == 0xfe200d00) {
/* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
int u = extract32(insn, 4, 1);
- if (!arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
+
+ if (!dc_isar_feature(aa32_dp, s)) {
return 1;
}
fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
/* rm is just Vm, and index is M. */
data = extract32(insn, 5, 1); /* index */
rm = extract32(insn, 0, 4);
+ } else if ((insn & 0xffa00f10) == 0xfe000810) {
+ /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
+ int is_s = extract32(insn, 20, 1);
+ int vm20 = extract32(insn, 0, 3);
+ int vm3 = extract32(insn, 3, 1);
+ int m = extract32(insn, 5, 1);
+ int index;
+
+ if (!dc_isar_feature(aa32_fhm, s)) {
+ return 1;
+ }
+ if (q) {
+ rm = vm20;
+ index = m * 2 + vm3;
+ } else {
+ rm = vm20 * 2 + m;
+ index = vm3;
+ }
+ is_long = true;
+ data = (index << 2) | is_s; /* is_2 == 0 */
+ fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
+ ptr_is_env = true;
} else {
return 1;
}
+ VFP_DREG_D(rd, insn);
+ if (rd & q) {
+ return 1;
+ }
+ if (q || !is_long) {
+ VFP_DREG_N(rn, insn);
+ if (rn & q & !is_long) {
+ return 1;
+ }
+ off_rn = vfp_reg_offset(1, rn);
+ off_rm = vfp_reg_offset(1, rm);
+ } else {
+ rn = VFP_SREG_N(insn);
+ off_rn = vfp_reg_offset(0, rn);
+ off_rm = vfp_reg_offset(0, rm);
+ }
if (s->fp_excp_el) {
gen_exception_insn(s, 4, EXCP_UDEF,
- syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+ syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
return 0;
}
if (!s->vfp_enabled) {
opr_sz = (1 + q) * 8;
if (fn_gvec_ptr) {
- TCGv_ptr fpst = get_fpstatus_ptr(1);
- tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm), fpst,
+ TCGv_ptr ptr;
+ if (ptr_is_env) {
+ ptr = cpu_env;
+ } else {
+ ptr = get_fpstatus_ptr(1);
+ }
+ tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
opr_sz, opr_sz, data, fn_gvec_ptr);
- tcg_temp_free_ptr(fpst);
+ if (!ptr_is_env) {
+ tcg_temp_free_ptr(ptr);
+ }
} else {
- tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
- vfp_reg_offset(1, rn),
- vfp_reg_offset(1, rm),
+ tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
opr_sz, opr_sz, data, fn_gvec);
}
return 0;
*/
gen_goto_tb(s, 0, s->pc & ~1);
return;
+ case 7: /* sb */
+ if ((insn & 0xf) || !dc_isar_feature(aa32_sb, s)) {
+ goto illegal_op;
+ }
+ /*
+ * TODO: There is no speculation barrier opcode
+ * for TCG; MB and end the TB instead.
+ */
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+ gen_goto_tb(s, 0, s->pc & ~1);
+ return;
default:
goto illegal_op;
}
* op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
* Bits 8, 10 and 11 should be zero.
*/
- if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 ||
- (c & 0xd) != 0) {
+ if (!dc_isar_feature(aa32_crc32, s) || op1 == 0x3 || (c & 0xd) != 0) {
goto illegal_op;
}
rd = (insn >> 12) & 0xf;
if (insn & (1 << 23)) {
/* load/store exclusive */
+ bool is_ld = extract32(insn, 20, 1);
+ bool is_lasr = !extract32(insn, 8, 1);
int op2 = (insn >> 8) & 3;
op1 = (insn >> 21) & 0x3;
addr = tcg_temp_local_new_i32();
load_reg_var(s, addr, rn);
- /* Since the emulation does not have barriers,
- the acquire/release semantics need no special
- handling */
+ if (is_lasr && !is_ld) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ }
+
if (op2 == 0) {
- if (insn & (1 << 20)) {
+ if (is_ld) {
tmp = tcg_temp_new_i32();
switch (op1) {
case 0: /* lda */
}
tcg_temp_free_i32(tmp);
}
- } else if (insn & (1 << 20)) {
+ } else if (is_ld) {
switch (op1) {
case 0: /* ldrex */
gen_load_exclusive(s, rd, 15, addr, 2);
}
}
tcg_temp_free_i32(addr);
+
+ if (is_lasr && is_ld) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ }
} else if ((insn & 0x00300f00) == 0) {
/* 0bcccc_0001_0x00_xxxx_xxxx_0000_1001_xxxx
* - SWP, SWPB
case 1:
case 3:
/* SDIV, UDIV */
- if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) {
+ if (!dc_isar_feature(arm_div, s)) {
goto illegal_op;
}
if (((insn >> 5) & 7) || (rd != 15)) {
} else if (i == rn) {
loaded_var = tmp;
loaded_base = 1;
- } else if (rn == 15 && exc_return) {
+ } else if (i == 15 && exc_return) {
store_pc_exc_ret(s, tmp);
} else {
store_reg_from_load(s, i, tmp);
* 0b1111_1001_x11x_xxxx_xxxx_xxxx_xxxx_xxxx
* - load/store dual (pre-indexed)
*/
+ bool wback = extract32(insn, 21, 1);
+
if (rn == 15) {
if (insn & (1 << 21)) {
/* UNPREDICTABLE */
addr = load_reg(s, rn);
}
offset = (insn & 0xff) * 4;
- if ((insn & (1 << 23)) == 0)
+ if ((insn & (1 << 23)) == 0) {
offset = -offset;
+ }
+
+ if (s->v8m_stackcheck && rn == 13 && wback) {
+ /*
+ * Here 'addr' is the current SP; if offset is +ve we're
+ * moving SP up, else down. It is UNKNOWN whether the limit
+ * check triggers when SP starts below the limit and ends
+ * up above it; check whichever of the current and final
+ * SP is lower, so QEMU will trigger in that situation.
+ */
+ if ((int32_t)offset < 0) {
+ TCGv_i32 newsp = tcg_temp_new_i32();
+
+ tcg_gen_addi_i32(newsp, addr, offset);
+ gen_helper_v8m_stackcheck(cpu_env, newsp);
+ tcg_temp_free_i32(newsp);
+ } else {
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+ }
+
if (insn & (1 << 24)) {
tcg_gen_addi_i32(addr, addr, offset);
offset = 0;
gen_aa32_st32(s, tmp, addr, get_mem_index(s));
tcg_temp_free_i32(tmp);
}
- if (insn & (1 << 21)) {
+ if (wback) {
/* Base writeback. */
tcg_gen_addi_i32(addr, addr, offset - 4);
store_reg(s, rn, addr);
tcg_gen_addi_i32(tmp, tmp, s->pc);
store_reg(s, 15, tmp);
} else {
+ bool is_lasr = false;
+ bool is_ld = extract32(insn, 20, 1);
int op2 = (insn >> 6) & 0x3;
op = (insn >> 4) & 0x3;
switch (op2) {
case 3:
/* Load-acquire/store-release exclusive */
ARCH(8);
+ is_lasr = true;
break;
}
+
+ if (is_lasr && !is_ld) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
+ }
+
addr = tcg_temp_local_new_i32();
load_reg_var(s, addr, rn);
if (!(op2 & 1)) {
- if (insn & (1 << 20)) {
+ if (is_ld) {
tmp = tcg_temp_new_i32();
switch (op) {
case 0: /* ldab */
}
tcg_temp_free_i32(tmp);
}
- } else if (insn & (1 << 20)) {
+ } else if (is_ld) {
gen_load_exclusive(s, rs, rd, addr, op);
} else {
gen_store_exclusive(s, rm, rs, rd, addr, op);
}
tcg_temp_free_i32(addr);
+
+ if (is_lasr && is_ld) {
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
+ }
}
} else {
/* Load/store multiple, RFE, SRS. */
} else {
int i, loaded_base = 0;
TCGv_i32 loaded_var;
+ bool wback = extract32(insn, 21, 1);
/* Load/store multiple. */
addr = load_reg(s, rn);
offset = 0;
if (insn & (1 << i))
offset += 4;
}
+
if (insn & (1 << 24)) {
tcg_gen_addi_i32(addr, addr, -offset);
}
+ if (s->v8m_stackcheck && rn == 13 && wback) {
+ /*
+ * If the writeback is incrementing SP rather than
+ * decrementing it, and the initial SP is below the
+ * stack limit but the final written-back SP would
+ * be above, then then we must not perform any memory
+ * accesses, but it is IMPDEF whether we generate
+ * an exception. We choose to do so in this case.
+ * At this point 'addr' is the lowest address, so
+ * either the original SP (if incrementing) or our
+ * final SP (if decrementing), so that's what we check.
+ */
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+
loaded_var = NULL;
for (i = 0; i < 16; i++) {
if ((insn & (1 << i)) == 0)
if (loaded_base) {
store_reg(s, rn, loaded_var);
}
- if (insn & (1 << 21)) {
+ if (wback) {
/* Base register writeback. */
if (insn & (1 << 24)) {
tcg_gen_addi_i32(addr, addr, -offset);
if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
goto illegal_op;
tcg_temp_free_i32(tmp2);
- if (rd != 15) {
+ if (rd == 13 &&
+ ((op == 2 && rn == 15) ||
+ (op == 8 && rn == 13) ||
+ (op == 13 && rn == 13))) {
+ /* MOV SP, ... or ADD SP, SP, ... or SUB SP, SP, ... */
+ store_sp_checked(s, tmp);
+ } else if (rd != 15) {
store_reg(s, rd, tmp);
} else {
tcg_temp_free_i32(tmp);
tmp2 = load_reg(s, rm);
if ((insn & 0x70) != 0)
goto illegal_op;
+ /*
+ * 0b1111_1010_0xxx_xxxx_1111_xxxx_0000_xxxx:
+ * - MOV, MOVS (register-shifted register), flagsetting
+ */
op = (insn >> 21) & 3;
logic_cc = (insn & (1 << 20)) != 0;
gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
case 0x28:
case 0x29:
case 0x2a:
- if (!arm_dc_feature(s, ARM_FEATURE_CRC)) {
+ if (!dc_isar_feature(aa32_crc32, s)) {
goto illegal_op;
}
break;
tmp2 = load_reg(s, rm);
if ((op & 0x50) == 0x10) {
/* sdiv, udiv */
- if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) {
+ if (!dc_isar_feature(thumb_div, s)) {
goto illegal_op;
}
if (op & 0x20)
case 6: case 7: case 14: case 15:
/* Coprocessor. */
if (arm_dc_feature(s, ARM_FEATURE_M)) {
- /* We don't currently implement M profile FP support,
- * so this entire space should give a NOCP fault, with
- * the exception of the v8M VLLDM and VLSTM insns, which
- * must be NOPs in Secure state and UNDEF in Nonsecure state.
+ /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
+ if (extract32(insn, 24, 2) == 3) {
+ goto illegal_op; /* op0 = 0b11 : unallocated */
+ }
+
+ /*
+ * Decode VLLDM and VLSTM first: these are nonstandard because:
+ * * if there is no FPU then these insns must NOP in
+ * Secure state and UNDEF in Nonsecure state
+ * * if there is an FPU then these insns do not have
+ * the usual behaviour that disas_vfp_insn() provides of
+ * being controlled by CPACR/NSACR enable bits or the
+ * lazy-stacking logic.
*/
if (arm_dc_feature(s, ARM_FEATURE_V8) &&
(insn & 0xffa00f00) == 0xec200a00) {
/* Just NOP since FP support is not implemented */
break;
}
+ if (arm_dc_feature(s, ARM_FEATURE_VFP) &&
+ ((insn >> 8) & 0xe) == 10) {
+ /* FP, and the CPU supports it */
+ if (disas_vfp_insn(s, insn)) {
+ goto illegal_op;
+ }
+ break;
+ }
+
/* All other insns: NOCP */
gen_exception_insn(s, 4, EXCP_NOCP, syn_uncategorized(),
default_exception_el(s));
*/
gen_goto_tb(s, 0, s->pc & ~1);
break;
+ case 7: /* sb */
+ if ((insn & 0xf) || !dc_isar_feature(aa32_sb, s)) {
+ goto illegal_op;
+ }
+ /*
+ * TODO: There is no speculation barrier opcode
+ * for TCG; MB and end the TB instead.
+ */
+ tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+ gen_goto_tb(s, 0, s->pc & ~1);
+ break;
default:
goto illegal_op;
}
gen_jmp(s, s->pc + offset);
}
} else {
- /* Data processing immediate. */
+ /*
+ * 0b1111_0xxx_xxxx_0xxx_xxxx_xxxx
+ * - Data-processing (modified immediate, plain binary immediate)
+ */
if (insn & (1 << 25)) {
+ /*
+ * 0b1111_0x1x_xxxx_0xxx_xxxx_xxxx
+ * - Data-processing (plain binary immediate)
+ */
if (insn & (1 << 24)) {
if (insn & (1 << 20))
goto illegal_op;
tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, imm);
}
+ store_reg(s, rd, tmp);
} else {
/* Add/sub 12-bit immediate. */
if (rn == 15) {
offset += imm;
tmp = tcg_temp_new_i32();
tcg_gen_movi_i32(tmp, offset);
+ store_reg(s, rd, tmp);
} else {
tmp = load_reg(s, rn);
if (insn & (1 << 23))
tcg_gen_subi_i32(tmp, tmp, imm);
else
tcg_gen_addi_i32(tmp, tmp, imm);
+ if (rn == 13 && rd == 13) {
+ /* ADD SP, SP, imm or SUB SP, SP, imm */
+ store_sp_checked(s, tmp);
+ } else {
+ store_reg(s, rd, tmp);
+ }
}
}
- store_reg(s, rd, tmp);
}
} else {
+ /*
+ * 0b1111_0x0x_xxxx_0xxx_xxxx_xxxx
+ * - Data-processing (modified immediate)
+ */
int shifter_out = 0;
/* modified 12-bit immediate. */
shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
goto illegal_op;
tcg_temp_free_i32(tmp2);
rd = (insn >> 8) & 0xf;
- if (rd != 15) {
+ if (rd == 13 && rn == 13
+ && (op == 8 || op == 13)) {
+ /* ADD(S) SP, SP, imm or SUB(S) SP, SP, imm */
+ store_sp_checked(s, tmp);
+ } else if (rd != 15) {
store_reg(s, rd, tmp);
} else {
tcg_temp_free_i32(tmp);
imm = -imm;
/* Fall through. */
case 0xf: /* Pre-increment. */
- tcg_gen_addi_i32(addr, addr, imm);
writeback = 1;
break;
default:
issinfo = writeback ? ISSInvalid : rs;
+ if (s->v8m_stackcheck && rn == 13 && writeback) {
+ /*
+ * Stackcheck. Here we know 'addr' is the current SP;
+ * if imm is +ve we're moving SP up, else down. It is
+ * UNKNOWN whether the limit check triggers when SP starts
+ * below the limit and ends up above it; we chose to do so.
+ */
+ if ((int32_t)imm < 0) {
+ TCGv_i32 newsp = tcg_temp_new_i32();
+
+ tcg_gen_addi_i32(newsp, addr, imm);
+ gen_helper_v8m_stackcheck(cpu_env, newsp);
+ tcg_temp_free_i32(newsp);
+ } else {
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+ }
+
+ if (writeback && !postinc) {
+ tcg_gen_addi_i32(addr, addr, imm);
+ }
+
if (insn & (1 << 20)) {
/* Load. */
tmp = tcg_temp_new_i32();
rd = insn & 7;
op = (insn >> 11) & 3;
if (op == 3) {
- /* add/subtract */
+ /*
+ * 0b0001_1xxx_xxxx_xxxx
+ * - Add, subtract (three low registers)
+ * - Add, subtract (two low registers and immediate)
+ */
rn = (insn >> 3) & 7;
tmp = load_reg(s, rn);
if (insn & (1 << 10)) {
}
break;
case 2: case 3:
- /* arithmetic large immediate */
+ /*
+ * 0b001x_xxxx_xxxx_xxxx
+ * - Add, subtract, compare, move (one low register and immediate)
+ */
op = (insn >> 11) & 3;
rd = (insn >> 8) & 0x7;
if (op == 0) { /* mov */
tmp2 = load_reg(s, rm);
tcg_gen_add_i32(tmp, tmp, tmp2);
tcg_temp_free_i32(tmp2);
- store_reg(s, rd, tmp);
+ if (rd == 13) {
+ /* ADD SP, SP, reg */
+ store_sp_checked(s, tmp);
+ } else {
+ store_reg(s, rd, tmp);
+ }
break;
case 1: /* cmp */
tmp = load_reg(s, rd);
break;
case 2: /* mov/cpy */
tmp = load_reg(s, rm);
- store_reg(s, rd, tmp);
+ if (rd == 13) {
+ /* MOV SP, reg */
+ store_sp_checked(s, tmp);
+ } else {
+ store_reg(s, rd, tmp);
+ }
break;
case 3:
{
break;
}
- /* data processing register */
+ /*
+ * 0b0100_00xx_xxxx_xxxx
+ * - Data-processing (two low registers)
+ */
rd = insn & 7;
rm = (insn >> 3) & 7;
op = (insn >> 6) & 0xf;
break;
case 10:
- /* add to high reg */
+ /*
+ * 0b1010_xxxx_xxxx_xxxx
+ * - Add PC/SP (immediate)
+ */
rd = (insn >> 8) & 7;
if (insn & (1 << 11)) {
/* SP */
op = (insn >> 8) & 0xf;
switch (op) {
case 0:
- /* adjust stack pointer */
+ /*
+ * 0b1011_0000_xxxx_xxxx
+ * - ADD (SP plus immediate)
+ * - SUB (SP minus immediate)
+ */
tmp = load_reg(s, 13);
val = (insn & 0x7f) * 4;
if (insn & (1 << 7))
val = -(int32_t)val;
tcg_gen_addi_i32(tmp, tmp, val);
- store_reg(s, 13, tmp);
+ store_sp_checked(s, tmp);
break;
case 2: /* sign/zero extend. */
store_reg(s, rd, tmp);
break;
case 4: case 5: case 0xc: case 0xd:
- /* push/pop */
+ /*
+ * 0b1011_x10x_xxxx_xxxx
+ * - push/pop
+ */
addr = load_reg(s, 13);
if (insn & (1 << 8))
offset = 4;
if ((insn & (1 << 11)) == 0) {
tcg_gen_addi_i32(addr, addr, -offset);
}
+
+ if (s->v8m_stackcheck) {
+ /*
+ * Here 'addr' is the lower of "old SP" and "new SP";
+ * if this is a pop that starts below the limit and ends
+ * above it, it is UNKNOWN whether the limit check triggers;
+ * we choose to trigger.
+ */
+ gen_helper_v8m_stackcheck(cpu_env, addr);
+ }
+
for (i = 0; i < 8; i++) {
if (insn & (1 << i)) {
if (insn & (1 << 11)) {
DisasContext *dc = container_of(dcbase, DisasContext, base);
CPUARMState *env = cs->env_ptr;
ARMCPU *cpu = arm_env_get_cpu(env);
+ uint32_t tb_flags = dc->base.tb->flags;
+ uint32_t condexec, core_mmu_idx;
+ dc->isar = &cpu->isar;
dc->pc = dc->base.pc_first;
dc->condjmp = 0;
*/
dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
!arm_el_is_aa64(env, 3);
- dc->thumb = ARM_TBFLAG_THUMB(dc->base.tb->flags);
- dc->sctlr_b = ARM_TBFLAG_SCTLR_B(dc->base.tb->flags);
- dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
- dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(dc->base.tb->flags) & 0xf) << 1;
- dc->condexec_cond = ARM_TBFLAG_CONDEXEC(dc->base.tb->flags) >> 4;
- dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
+ dc->thumb = FIELD_EX32(tb_flags, TBFLAG_A32, THUMB);
+ dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
+ dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
+ condexec = FIELD_EX32(tb_flags, TBFLAG_A32, CONDEXEC);
+ dc->condexec_mask = (condexec & 0xf) << 1;
+ dc->condexec_cond = condexec >> 4;
+ core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
+ dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
#if !defined(CONFIG_USER_ONLY)
dc->user = (dc->current_el == 0);
#endif
- dc->ns = ARM_TBFLAG_NS(dc->base.tb->flags);
- dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
- dc->vfp_enabled = ARM_TBFLAG_VFPEN(dc->base.tb->flags);
- dc->vec_len = ARM_TBFLAG_VECLEN(dc->base.tb->flags);
- dc->vec_stride = ARM_TBFLAG_VECSTRIDE(dc->base.tb->flags);
- dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(dc->base.tb->flags);
- dc->v7m_handler_mode = ARM_TBFLAG_HANDLER(dc->base.tb->flags);
+ dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
+ dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
+ dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
+ dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
+ if (arm_feature(env, ARM_FEATURE_XSCALE)) {
+ dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
+ dc->vec_stride = 0;
+ } else {
+ dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
+ dc->c15_cpar = 0;
+ }
+ dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_A32, HANDLER);
dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
regime_is_secure(env, dc->mmu_idx);
+ dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_A32, STACKCHECK);
+ dc->v8m_fpccr_s_wrong = FIELD_EX32(tb_flags, TBFLAG_A32, FPCCR_S_WRONG);
+ dc->v7m_new_fp_ctxt_needed =
+ FIELD_EX32(tb_flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED);
+ dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_A32, LSPACT);
dc->cp_regs = cpu->cp_regs;
dc->features = env->features;
* emit code to generate a software step exception
* end the TB
*/
- dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
- dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
+ dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
+ dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
dc->is_ldex = false;
dc->ss_same_el = false; /* Can't be true since EL_d must be AArch64 */
tcg_gen_movi_i32(tmp, 0);
store_cpu_field(tmp, condexec_bits);
}
- tcg_clear_temp_count();
}
static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
};
/* generate intermediate code for basic block 'tb'. */
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb)
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
{
DisasContext dc;
const TranslatorOps *ops = &arm_translator_ops;
- if (ARM_TBFLAG_THUMB(tb->flags)) {
+ if (FIELD_EX32(tb->flags, TBFLAG_A32, THUMB)) {
ops = &thumb_translator_ops;
}
#ifdef TARGET_AARCH64
- if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
+ if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
ops = &aarch64_translator_ops;
}
#endif
- translator_loop(ops, &dc.base, cpu, tb);
+ translator_loop(ops, &dc.base, cpu, tb, max_insns);
}
-static const char *cpu_mode_names[16] = {
- "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt",
- "???", "???", "hyp", "und", "???", "???", "???", "sys"
-};
-
-void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
- int flags)
+void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
{
ARMCPU *cpu = ARM_CPU(cs);
CPUARMState *env = &cpu->env;
int i;
if (is_a64(env)) {
- aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags);
+ aarch64_cpu_dump_state(cs, f, flags);
return;
}
for(i=0;i<16;i++) {
- cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
+ qemu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
if ((i % 4) == 3)
- cpu_fprintf(f, "\n");
+ qemu_fprintf(f, "\n");
else
- cpu_fprintf(f, " ");
+ qemu_fprintf(f, " ");
}
if (arm_feature(env, ARM_FEATURE_M)) {
}
}
- cpu_fprintf(f, "XPSR=%08x %c%c%c%c %c %s%s\n",
- xpsr,
- xpsr & XPSR_N ? 'N' : '-',
- xpsr & XPSR_Z ? 'Z' : '-',
- xpsr & XPSR_C ? 'C' : '-',
- xpsr & XPSR_V ? 'V' : '-',
- xpsr & XPSR_T ? 'T' : 'A',
- ns_status,
- mode);
+ qemu_fprintf(f, "XPSR=%08x %c%c%c%c %c %s%s\n",
+ xpsr,
+ xpsr & XPSR_N ? 'N' : '-',
+ xpsr & XPSR_Z ? 'Z' : '-',
+ xpsr & XPSR_C ? 'C' : '-',
+ xpsr & XPSR_V ? 'V' : '-',
+ xpsr & XPSR_T ? 'T' : 'A',
+ ns_status,
+ mode);
} else {
uint32_t psr = cpsr_read(env);
const char *ns_status = "";
ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
}
- cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%s%d\n",
- psr,
- psr & CPSR_N ? 'N' : '-',
- psr & CPSR_Z ? 'Z' : '-',
- psr & CPSR_C ? 'C' : '-',
- psr & CPSR_V ? 'V' : '-',
- psr & CPSR_T ? 'T' : 'A',
- ns_status,
- cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
+ qemu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%s%d\n",
+ psr,
+ psr & CPSR_N ? 'N' : '-',
+ psr & CPSR_Z ? 'Z' : '-',
+ psr & CPSR_C ? 'C' : '-',
+ psr & CPSR_V ? 'V' : '-',
+ psr & CPSR_T ? 'T' : 'A',
+ ns_status,
+ aarch32_mode_name(psr), (psr & 0x10) ? 32 : 26);
}
if (flags & CPU_DUMP_FPU) {
}
for (i = 0; i < numvfpregs; i++) {
uint64_t v = *aa32_vfp_dreg(env, i);
- cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
- i * 2, (uint32_t)v,
- i * 2 + 1, (uint32_t)(v >> 32),
- i, v);
+ qemu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
+ i * 2, (uint32_t)v,
+ i * 2 + 1, (uint32_t)(v >> 32),
+ i, v);
}
- cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
+ qemu_fprintf(f, "FPSCR: %08x\n", vfp_get_fpscr(env));
}
}