X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/e7a1d6c52a3ac6e76e5653c830b2545e0a4043d3..ccd380876b79c3b46f85720c1be8e2cd40509460:/target-arm/translate-a64.c diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 8752e7e16c..668916575e 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -26,6 +26,7 @@ #include "tcg-op.h" #include "qemu/log.h" #include "translate.h" +#include "internals.h" #include "qemu/host-utils.h" #include "exec/gen-icount.h" @@ -73,14 +74,17 @@ typedef struct AArch64DecodeTable { } AArch64DecodeTable; /* Function prototype for gen_ functions for calling Neon helpers */ +typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32); typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32); typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32); typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64); +typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64); typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64); typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64); typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32); typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); +typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); /* initialize TCG globals. */ void a64_translate_init(void) @@ -210,7 +214,7 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) if (use_goto_tb(s, n, dest)) { tcg_gen_goto_tb(n); gen_a64_set_pc_im(dest); - tcg_gen_exit_tb((tcg_target_long)tb + n); + tcg_gen_exit_tb((intptr_t)tb + n); s->is_jmp = DISAS_TB_JUMP; } else { gen_a64_set_pc_im(dest); @@ -1080,9 +1084,11 @@ static void handle_hint(DisasContext *s, uint32_t insn, switch (selector) { case 0: /* NOP */ return; + case 3: /* WFI */ + s->is_jmp = DISAS_WFI; + return; case 1: /* YIELD */ case 2: /* WFE */ - case 3: /* WFI */ case 4: /* SEV */ case 5: /* SEVL */ /* we treat all as NOP at least for now */ @@ -1126,7 +1132,30 @@ static void handle_sync(DisasContext *s, uint32_t insn, static void handle_msr_i(DisasContext *s, uint32_t insn, unsigned int op1, unsigned int op2, unsigned int crm) { - unsupported_encoding(s, insn); + int op = op1 << 3 | op2; + switch (op) { + case 0x05: /* SPSel */ + if (s->current_pl == 0) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x1e: /* DAIFSet */ + case 0x1f: /* DAIFClear */ + { + TCGv_i32 tcg_imm = tcg_const_i32(crm); + TCGv_i32 tcg_op = tcg_const_i32(op); + gen_a64_set_pc_im(s->pc - 4); + gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm); + tcg_temp_free_i32(tcg_imm); + tcg_temp_free_i32(tcg_op); + s->is_jmp = DISAS_UPDATE; + break; + } + default: + unallocated_encoding(s); + return; + } } static void gen_get_nzcv(TCGv_i64 tcg_rt) @@ -1231,6 +1260,13 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, gen_set_nzcv(tcg_rt); } return; + case ARM_CP_CURRENTEL: + /* Reads as current EL value from pstate, which is + * guaranteed to be constant by the tb flags. + */ + tcg_rt = cpu_reg(s, rt); + tcg_gen_movi_i64(tcg_rt, s->current_pl << 2); + return; default: break; } @@ -3064,12 +3100,11 @@ static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn) /* non-flag setting ops may use SP */ if (!setflags) { - tcg_rn = read_cpu_reg_sp(s, rn, sf); tcg_rd = cpu_reg_sp(s, rd); } else { - tcg_rn = read_cpu_reg(s, rn, sf); tcg_rd = cpu_reg(s, rd); } + tcg_rn = read_cpu_reg_sp(s, rn, sf); tcg_rm = read_cpu_reg(s, rm, sf); ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3); @@ -5796,6 +5831,21 @@ static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src, } } +/* SRI: shift right with insert */ +static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src, + int size, int shift) +{ + int esize = 8 << size; + + /* shift count same as element size is valid but does nothing; + * special case to avoid potential shift by 64. + */ + if (shift != esize) { + tcg_gen_shri_i64(tcg_src, tcg_src, shift); + tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift); + } +} + /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */ static void handle_scalar_simd_shri(DisasContext *s, bool is_u, int immh, int immb, @@ -5806,6 +5856,7 @@ static void handle_scalar_simd_shri(DisasContext *s, int shift = 2 * (8 << size) - immhb; bool accumulate = false; bool round = false; + bool insert = false; TCGv_i64 tcg_rn; TCGv_i64 tcg_rd; TCGv_i64 tcg_round; @@ -5825,6 +5876,9 @@ static void handle_scalar_simd_shri(DisasContext *s, case 0x06: /* SRSRA / URSRA (accum + rounding) */ accumulate = round = true; break; + case 0x08: /* SRI */ + insert = true; + break; } if (round) { @@ -5835,10 +5889,14 @@ static void handle_scalar_simd_shri(DisasContext *s, } tcg_rn = read_fp_dreg(s, rn); - tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); + tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64(); - handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, - accumulate, is_u, size, shift); + if (insert) { + handle_shri_with_ins(tcg_rd, tcg_rn, size, shift); + } else { + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + accumulate, is_u, size, shift); + } write_fp_dreg(s, rd, tcg_rd); @@ -5876,160 +5934,570 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert, tcg_temp_free_i64(tcg_rd); } -/* C3.6.9 AdvSIMD scalar shift by immediate - * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 - * +-----+---+-------------+------+------+--------+---+------+------+ - * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | - * +-----+---+-------------+------+------+--------+---+------+------+ - * - * This is the scalar version so it works on a fixed sized registers - */ -static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) +/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with + * (signed/unsigned) narrowing */ +static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q, + bool is_u_shift, bool is_u_narrow, + int immh, int immb, int opcode, + int rn, int rd) { - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int opcode = extract32(insn, 11, 5); - int immb = extract32(insn, 16, 3); - int immh = extract32(insn, 19, 4); - bool is_u = extract32(insn, 29, 1); + int immhb = immh << 3 | immb; + int size = 32 - clz32(immh) - 1; + int esize = 8 << size; + int shift = (2 * esize) - immhb; + int elements = is_scalar ? 1 : (64 / esize); + bool round = extract32(opcode, 0, 1); + TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN); + TCGv_i64 tcg_rn, tcg_rd, tcg_round; + TCGv_i32 tcg_rd_narrowed; + TCGv_i64 tcg_final; + + static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = { + { gen_helper_neon_narrow_sat_s8, + gen_helper_neon_unarrow_sat8 }, + { gen_helper_neon_narrow_sat_s16, + gen_helper_neon_unarrow_sat16 }, + { gen_helper_neon_narrow_sat_s32, + gen_helper_neon_unarrow_sat32 }, + { NULL, NULL }, + }; + static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = { + gen_helper_neon_narrow_sat_u8, + gen_helper_neon_narrow_sat_u16, + gen_helper_neon_narrow_sat_u32, + NULL + }; + NeonGenNarrowEnvFn *narrowfn; - switch (opcode) { - case 0x00: /* SSHR / USHR */ - case 0x02: /* SSRA / USRA */ - case 0x04: /* SRSHR / URSHR */ - case 0x06: /* SRSRA / URSRA */ - handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); - break; - case 0x0a: /* SHL / SLI */ - handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); - break; - default: - unsupported_encoding(s, insn); - break; - } -} + int i; -/* C3.6.10 AdvSIMD scalar three different - * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 - * +-----+---+-----------+------+---+------+--------+-----+------+------+ - * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | - * +-----+---+-----------+------+---+------+--------+-----+------+------+ - */ -static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) -{ - bool is_u = extract32(insn, 29, 1); - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 4); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); + assert(size < 4); - if (is_u) { + if (extract32(immh, 3, 1)) { unallocated_encoding(s); return; } - switch (opcode) { - case 0x9: /* SQDMLAL, SQDMLAL2 */ - case 0xb: /* SQDMLSL, SQDMLSL2 */ - case 0xd: /* SQDMULL, SQDMULL2 */ - if (size == 0 || size == 3) { - unallocated_encoding(s); - return; - } - break; - default: - unallocated_encoding(s); - return; + if (is_u_shift) { + narrowfn = unsigned_narrow_fns[size]; + } else { + narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0]; } - if (size == 2) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); + tcg_rn = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + tcg_rd_narrowed = tcg_temp_new_i32(); + tcg_final = tcg_const_i64(0); - read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); - read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); + if (round) { + uint64_t round_const = 1ULL << (shift - 1); + tcg_round = tcg_const_i64(round_const); + } else { + TCGV_UNUSED_I64(tcg_round); + } - tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res); + for (i = 0; i < elements; i++) { + read_vec_element(s, tcg_rn, rn, i, ldop); + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + false, is_u_shift, size+1, shift); + narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd); + tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed); + tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + } - switch (opcode) { - case 0xd: /* SQDMULL, SQDMULL2 */ - break; - case 0xb: /* SQDMLSL, SQDMLSL2 */ - tcg_gen_neg_i64(tcg_res, tcg_res); + if (!is_q) { + clear_vec_high(s, rd); + write_vec_element(s, tcg_final, rd, 0, MO_64); + } else { + write_vec_element(s, tcg_final, rd, 1, MO_64); + } + + if (round) { + tcg_temp_free_i64(tcg_round); + } + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i32(tcg_rd_narrowed); + tcg_temp_free_i64(tcg_final); + return; +} + +/* SQSHLU, UQSHL, SQSHL: saturating left shifts */ +static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q, + bool src_unsigned, bool dst_unsigned, + int immh, int immb, int rn, int rd) +{ + int immhb = immh << 3 | immb; + int size = 32 - clz32(immh) - 1; + int shift = immhb - (8 << size); + int pass; + + assert(immh != 0); + assert(!(scalar && is_q)); + + if (!scalar) { + if (!is_q && extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + /* Since we use the variable-shift helpers we must + * replicate the shift count into each element of + * the tcg_shift value. + */ + switch (size) { + case 0: + shift |= shift << 8; /* fall through */ - case 0x9: /* SQDMLAL, SQDMLAL2 */ - read_vec_element(s, tcg_op1, rd, 0, MO_64); - gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, - tcg_res, tcg_op1); + case 1: + shift |= shift << 16; + break; + case 2: + case 3: break; default: g_assert_not_reached(); } + } - write_fp_dreg(s, rd, tcg_res); - - tcg_temp_free_i64(tcg_op1); - tcg_temp_free_i64(tcg_op2); - tcg_temp_free_i64(tcg_res); - } else { - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); + if (size == 3) { + TCGv_i64 tcg_shift = tcg_const_i64(shift); + static NeonGenTwo64OpEnvFn * const fns[2][2] = { + { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 }, + { NULL, gen_helper_neon_qshl_u64 }, + }; + NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned]; + int maxpass = is_q ? 2 : 1; - read_vec_element_i32(s, tcg_op1, rn, 0, MO_16); - read_vec_element_i32(s, tcg_op2, rm, 0, MO_16); + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); - gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); - gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res); + read_vec_element(s, tcg_op, rn, pass, MO_64); + genfn(tcg_op, cpu_env, tcg_op, tcg_shift); + write_vec_element(s, tcg_op, rd, pass, MO_64); - switch (opcode) { - case 0xd: /* SQDMULL, SQDMULL2 */ - break; - case 0xb: /* SQDMLSL, SQDMLSL2 */ - gen_helper_neon_negl_u32(tcg_res, tcg_res); - /* fall through */ - case 0x9: /* SQDMLAL, SQDMLAL2 */ - { - TCGv_i64 tcg_op3 = tcg_temp_new_i64(); - read_vec_element(s, tcg_op3, rd, 0, MO_32); - gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, - tcg_res, tcg_op3); - tcg_temp_free_i64(tcg_op3); - break; + tcg_temp_free_i64(tcg_op); } - default: - g_assert_not_reached(); + tcg_temp_free_i64(tcg_shift); + + if (!is_q) { + clear_vec_high(s, rd); } + } else { + TCGv_i32 tcg_shift = tcg_const_i32(shift); + static NeonGenTwoOpEnvFn * const fns[2][2][3] = { + { + { gen_helper_neon_qshl_s8, + gen_helper_neon_qshl_s16, + gen_helper_neon_qshl_s32 }, + { gen_helper_neon_qshlu_s8, + gen_helper_neon_qshlu_s16, + gen_helper_neon_qshlu_s32 } + }, { + { NULL, NULL, NULL }, + { gen_helper_neon_qshl_u8, + gen_helper_neon_qshl_u16, + gen_helper_neon_qshl_u32 } + } + }; + NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size]; + TCGMemOp memop = scalar ? size : MO_32; + int maxpass = scalar ? 1 : is_q ? 4 : 2; - tcg_gen_ext32u_i64(tcg_res, tcg_res); - write_fp_dreg(s, rd, tcg_res); + for (pass = 0; pass < maxpass; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); - tcg_temp_free_i32(tcg_op1); - tcg_temp_free_i32(tcg_op2); - tcg_temp_free_i64(tcg_res); + read_vec_element_i32(s, tcg_op, rn, pass, memop); + genfn(tcg_op, cpu_env, tcg_op, tcg_shift); + if (scalar) { + switch (size) { + case 0: + tcg_gen_ext8u_i32(tcg_op, tcg_op); + break; + case 1: + tcg_gen_ext16u_i32(tcg_op, tcg_op); + break; + case 2: + break; + default: + g_assert_not_reached(); + } + write_fp_sreg(s, rd, tcg_op); + } else { + write_vec_element_i32(s, tcg_op, rd, pass, MO_32); + } + + tcg_temp_free_i32(tcg_op); + } + tcg_temp_free_i32(tcg_shift); + + if (!is_q && !scalar) { + clear_vec_high(s, rd); + } } } -static void handle_3same_64(DisasContext *s, int opcode, bool u, - TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) +/* Common vector code for handling integer to FP conversion */ +static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn, + int elements, int is_signed, + int fracbits, int size) { - /* Handle 64x64->64 opcodes which are shared between the scalar - * and vector 3-same groups. We cover every opcode where size == 3 - * is valid in either the three-reg-same (integer, not pairwise) - * or scalar-three-reg-same groups. (Some opcodes are not yet - * implemented.) - */ - TCGCond cond; + bool is_double = size == 3 ? true : false; + TCGv_ptr tcg_fpst = get_fpstatus_ptr(); + TCGv_i32 tcg_shift = tcg_const_i32(fracbits); + TCGv_i64 tcg_int = tcg_temp_new_i64(); + TCGMemOp mop = size | (is_signed ? MO_SIGN : 0); + int pass; - switch (opcode) { - case 0x1: /* SQADD */ - if (u) { - gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + for (pass = 0; pass < elements; pass++) { + read_vec_element(s, tcg_int, rn, pass, mop); + + if (is_double) { + TCGv_i64 tcg_double = tcg_temp_new_i64(); + if (is_signed) { + gen_helper_vfp_sqtod(tcg_double, tcg_int, + tcg_shift, tcg_fpst); + } else { + gen_helper_vfp_uqtod(tcg_double, tcg_int, + tcg_shift, tcg_fpst); + } + if (elements == 1) { + write_fp_dreg(s, rd, tcg_double); + } else { + write_vec_element(s, tcg_double, rd, pass, MO_64); + } + tcg_temp_free_i64(tcg_double); } else { - gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + TCGv_i32 tcg_single = tcg_temp_new_i32(); + if (is_signed) { + gen_helper_vfp_sqtos(tcg_single, tcg_int, + tcg_shift, tcg_fpst); + } else { + gen_helper_vfp_uqtos(tcg_single, tcg_int, + tcg_shift, tcg_fpst); + } + if (elements == 1) { + write_fp_sreg(s, rd, tcg_single); + } else { + write_vec_element_i32(s, tcg_single, rd, pass, MO_32); + } + tcg_temp_free_i32(tcg_single); + } + } + + if (!is_double && elements == 2) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_int); + tcg_temp_free_ptr(tcg_fpst); + tcg_temp_free_i32(tcg_shift); +} + +/* UCVTF/SCVTF - Integer to FP conversion */ +static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, + bool is_q, bool is_u, + int immh, int immb, int opcode, + int rn, int rd) +{ + bool is_double = extract32(immh, 3, 1); + int size = is_double ? MO_64 : MO_32; + int elements; + int immhb = immh << 3 | immb; + int fracbits = (is_double ? 128 : 64) - immhb; + + if (!extract32(immh, 2, 2)) { + unallocated_encoding(s); + return; + } + + if (is_scalar) { + elements = 1; + } else { + elements = is_double ? 2 : is_q ? 4 : 2; + if (is_double && !is_q) { + unallocated_encoding(s); + return; + } + } + /* immh == 0 would be a failure of the decode logic */ + g_assert(immh); + + handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); +} + +/* FCVTZS, FVCVTZU - FP to fixedpoint conversion */ +static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, + bool is_q, bool is_u, + int immh, int immb, int rn, int rd) +{ + bool is_double = extract32(immh, 3, 1); + int immhb = immh << 3 | immb; + int fracbits = (is_double ? 128 : 64) - immhb; + int pass; + TCGv_ptr tcg_fpstatus; + TCGv_i32 tcg_rmode, tcg_shift; + + if (!extract32(immh, 2, 2)) { + unallocated_encoding(s); + return; + } + + if (!is_scalar && !is_q && is_double) { + unallocated_encoding(s); + return; + } + + assert(!(is_scalar && is_q)); + + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_fpstatus = get_fpstatus_ptr(); + tcg_shift = tcg_const_i32(fracbits); + + if (is_double) { + int maxpass = is_scalar ? 1 : is_q ? 2 : 1; + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + if (is_u) { + gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } + write_vec_element(s, tcg_op, rd, pass, MO_64); + tcg_temp_free_i64(tcg_op); + } + if (!is_q) { + clear_vec_high(s, rd); + } + } else { + int maxpass = is_scalar ? 1 : is_q ? 4 : 2; + for (pass = 0; pass < maxpass; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + if (is_u) { + gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } else { + gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); + } + if (is_scalar) { + write_fp_sreg(s, rd, tcg_op); + } else { + write_vec_element_i32(s, tcg_op, rd, pass, MO_32); + } + tcg_temp_free_i32(tcg_op); + } + if (!is_q && !is_scalar) { + clear_vec_high(s, rd); + } + } + + tcg_temp_free_ptr(tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); +} + +/* C3.6.9 AdvSIMD scalar shift by immediate + * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 + * +-----+---+-------------+------+------+--------+---+------+------+ + * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd | + * +-----+---+-------------+------+------+--------+---+------+------+ + * + * This is the scalar version so it works on a fixed sized registers + */ +static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn) +{ + int rd = extract32(insn, 0, 5); + int rn = extract32(insn, 5, 5); + int opcode = extract32(insn, 11, 5); + int immb = extract32(insn, 16, 3); + int immh = extract32(insn, 19, 4); + bool is_u = extract32(insn, 29, 1); + + if (immh == 0) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0x08: /* SRI */ + if (!is_u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x00: /* SSHR / USHR */ + case 0x02: /* SSRA / USRA */ + case 0x04: /* SRSHR / URSHR */ + case 0x06: /* SRSRA / URSRA */ + handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd); + break; + case 0x0a: /* SHL / SLI */ + handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd); + break; + case 0x1c: /* SCVTF, UCVTF */ + handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb, + opcode, rn, rd); + break; + case 0x10: /* SQSHRUN, SQSHRUN2 */ + case 0x11: /* SQRSHRUN, SQRSHRUN2 */ + if (!is_u) { + unallocated_encoding(s); + return; + } + handle_vec_simd_sqshrn(s, true, false, false, true, + immh, immb, opcode, rn, rd); + break; + case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */ + case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */ + handle_vec_simd_sqshrn(s, true, false, is_u, is_u, + immh, immb, opcode, rn, rd); + break; + case 0xc: /* SQSHLU */ + if (!is_u) { + unallocated_encoding(s); + return; + } + handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd); + break; + case 0xe: /* SQSHL, UQSHL */ + handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd); + break; + case 0x1f: /* FCVTZS, FCVTZU */ + handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd); + break; + default: + unallocated_encoding(s); + break; + } +} + +/* C3.6.10 AdvSIMD scalar three different + * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 + * +-----+---+-----------+------+---+------+--------+-----+------+------+ + * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd | + * +-----+---+-----------+------+---+------+--------+-----+------+------+ + */ +static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) +{ + bool is_u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 4); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + if (is_u) { + unallocated_encoding(s); + return; + } + + switch (opcode) { + case 0x9: /* SQDMLAL, SQDMLAL2 */ + case 0xb: /* SQDMLSL, SQDMLSL2 */ + case 0xd: /* SQDMULL, SQDMULL2 */ + if (size == 0 || size == 3) { + unallocated_encoding(s); + return; + } + break; + default: + unallocated_encoding(s); + return; + } + + if (size == 2) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN); + read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN); + + tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2); + gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res); + + switch (opcode) { + case 0xd: /* SQDMULL, SQDMULL2 */ + break; + case 0xb: /* SQDMLSL, SQDMLSL2 */ + tcg_gen_neg_i64(tcg_res, tcg_res); + /* fall through */ + case 0x9: /* SQDMLAL, SQDMLAL2 */ + read_vec_element(s, tcg_op1, rd, 0, MO_64); + gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, + tcg_res, tcg_op1); + break; + default: + g_assert_not_reached(); + } + + write_fp_dreg(s, rd, tcg_res); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res); + } else { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element_i32(s, tcg_op1, rn, 0, MO_16); + read_vec_element_i32(s, tcg_op2, rm, 0, MO_16); + + gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2); + gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res); + + switch (opcode) { + case 0xd: /* SQDMULL, SQDMULL2 */ + break; + case 0xb: /* SQDMLSL, SQDMLSL2 */ + gen_helper_neon_negl_u32(tcg_res, tcg_res); + /* fall through */ + case 0x9: /* SQDMLAL, SQDMLAL2 */ + { + TCGv_i64 tcg_op3 = tcg_temp_new_i64(); + read_vec_element(s, tcg_op3, rd, 0, MO_32); + gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, + tcg_res, tcg_op3); + tcg_temp_free_i64(tcg_op3); + break; + } + default: + g_assert_not_reached(); + } + + tcg_gen_ext32u_i64(tcg_res, tcg_res); + write_fp_dreg(s, rd, tcg_res); + + tcg_temp_free_i32(tcg_op1); + tcg_temp_free_i32(tcg_op2); + tcg_temp_free_i64(tcg_res); + } +} + +static void handle_3same_64(DisasContext *s, int opcode, bool u, + TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) +{ + /* Handle 64x64->64 opcodes which are shared between the scalar + * and vector 3-same groups. We cover every opcode where size == 3 + * is valid in either the three-reg-same (integer, not pairwise) + * or scalar-three-reg-same groups. (Some opcodes are not yet + * implemented.) + */ + TCGCond cond; + + switch (opcode) { + case 0x1: /* SQADD */ + if (u) { + gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm); + } else { + gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm); } break; case 0x5: /* SQSUB */ @@ -6451,21 +6919,38 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) } static void handle_2misc_64(DisasContext *s, int opcode, bool u, - TCGv_i64 tcg_rd, TCGv_i64 tcg_rn) + TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, + TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus) { /* Handle 64->64 opcodes which are shared between the scalar and * vector 2-reg-misc groups. We cover every integer opcode where size == 3 * is valid in either group and also the double-precision fp ops. + * The caller only need provide tcg_rmode and tcg_fpstatus if the op + * requires them. */ TCGCond cond; switch (opcode) { + case 0x4: /* CLS, CLZ */ + if (u) { + gen_helper_clz64(tcg_rd, tcg_rn); + } else { + gen_helper_cls64(tcg_rd, tcg_rn); + } + break; case 0x5: /* NOT */ /* This opcode is shared with CNT and RBIT but we have earlier * enforced that size == 3 if and only if this is the NOT insn. */ tcg_gen_not_i64(tcg_rd, tcg_rn); break; + case 0x7: /* SQABS, SQNEG */ + if (u) { + gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn); + } else { + gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn); + } + break; case 0xa: /* CMLT */ /* 64 bit integer comparison against zero, result is * test ? (2^64 - 1) : 0. We implement via setcond(!test) and @@ -6499,6 +6984,42 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u, case 0x6f: /* FNEG */ gen_helper_vfp_negd(tcg_rd, tcg_rn); break; + case 0x7f: /* FSQRT */ + gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env); + break; + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x18: /* FRINTN */ + case 0x19: /* FRINTM */ + case 0x38: /* FRINTP */ + case 0x39: /* FRINTZ */ + case 0x58: /* FRINTA */ + case 0x79: /* FRINTI */ + gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus); + break; + case 0x59: /* FRINTX */ + gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus); + break; default: g_assert_not_reached(); } @@ -6597,20 +7118,303 @@ static void handle_2misc_fcmp_zero(DisasContext *s, int opcode, genfn(tcg_res, tcg_op, tcg_zero, fpst); } if (is_scalar) { - write_fp_sreg(s, rd, tcg_res); - } else { - write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + write_fp_sreg(s, rd, tcg_res); + } else { + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + } + } + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_zero); + tcg_temp_free_i32(tcg_op); + if (!is_q && !is_scalar) { + clear_vec_high(s, rd); + } + } + + tcg_temp_free_ptr(fpst); +} + +static void handle_2misc_reciprocal(DisasContext *s, int opcode, + bool is_scalar, bool is_u, bool is_q, + int size, int rn, int rd) +{ + bool is_double = (size == 3); + TCGv_ptr fpst = get_fpstatus_ptr(); + + if (is_double) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + int pass; + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + read_vec_element(s, tcg_op, rn, pass, MO_64); + switch (opcode) { + case 0x3d: /* FRECPE */ + gen_helper_recpe_f64(tcg_res, tcg_op, fpst); + break; + case 0x3f: /* FRECPX */ + gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); + break; + case 0x7d: /* FRSQRTE */ + gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); + break; + default: + g_assert_not_reached(); + } + write_vec_element(s, tcg_res, rd, pass, MO_64); + } + if (is_scalar) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_res); + tcg_temp_free_i64(tcg_op); + } else { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + int pass, maxpasses; + + if (is_scalar) { + maxpasses = 1; + } else { + maxpasses = is_q ? 4 : 2; + } + + for (pass = 0; pass < maxpasses; pass++) { + read_vec_element_i32(s, tcg_op, rn, pass, MO_32); + + switch (opcode) { + case 0x3c: /* URECPE */ + gen_helper_recpe_u32(tcg_res, tcg_op, fpst); + break; + case 0x3d: /* FRECPE */ + gen_helper_recpe_f32(tcg_res, tcg_op, fpst); + break; + case 0x3f: /* FRECPX */ + gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); + break; + case 0x7d: /* FRSQRTE */ + gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); + break; + default: + g_assert_not_reached(); + } + + if (is_scalar) { + write_fp_sreg(s, rd, tcg_res); + } else { + write_vec_element_i32(s, tcg_res, rd, pass, MO_32); + } + } + tcg_temp_free_i32(tcg_res); + tcg_temp_free_i32(tcg_op); + if (!is_q && !is_scalar) { + clear_vec_high(s, rd); + } + } + tcg_temp_free_ptr(fpst); +} + +static void handle_2misc_narrow(DisasContext *s, bool scalar, + int opcode, bool u, bool is_q, + int size, int rn, int rd) +{ + /* Handle 2-reg-misc ops which are narrowing (so each 2*size element + * in the source becomes a size element in the destination). + */ + int pass; + TCGv_i32 tcg_res[2]; + int destelt = is_q ? 2 : 0; + int passes = scalar ? 1 : 2; + + if (scalar) { + tcg_res[1] = tcg_const_i32(0); + } + + for (pass = 0; pass < passes; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + NeonGenNarrowFn *genfn = NULL; + NeonGenNarrowEnvFn *genenvfn = NULL; + + if (scalar) { + read_vec_element(s, tcg_op, rn, pass, size + 1); + } else { + read_vec_element(s, tcg_op, rn, pass, MO_64); + } + tcg_res[pass] = tcg_temp_new_i32(); + + switch (opcode) { + case 0x12: /* XTN, SQXTUN */ + { + static NeonGenNarrowFn * const xtnfns[3] = { + gen_helper_neon_narrow_u8, + gen_helper_neon_narrow_u16, + tcg_gen_trunc_i64_i32, + }; + static NeonGenNarrowEnvFn * const sqxtunfns[3] = { + gen_helper_neon_unarrow_sat8, + gen_helper_neon_unarrow_sat16, + gen_helper_neon_unarrow_sat32, + }; + if (u) { + genenvfn = sqxtunfns[size]; + } else { + genfn = xtnfns[size]; + } + break; + } + case 0x14: /* SQXTN, UQXTN */ + { + static NeonGenNarrowEnvFn * const fns[3][2] = { + { gen_helper_neon_narrow_sat_s8, + gen_helper_neon_narrow_sat_u8 }, + { gen_helper_neon_narrow_sat_s16, + gen_helper_neon_narrow_sat_u16 }, + { gen_helper_neon_narrow_sat_s32, + gen_helper_neon_narrow_sat_u32 }, + }; + genenvfn = fns[size][u]; + break; + } + case 0x16: /* FCVTN, FCVTN2 */ + /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */ + if (size == 2) { + gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env); + } else { + TCGv_i32 tcg_lo = tcg_temp_new_i32(); + TCGv_i32 tcg_hi = tcg_temp_new_i32(); + tcg_gen_trunc_i64_i32(tcg_lo, tcg_op); + gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env); + tcg_gen_shri_i64(tcg_op, tcg_op, 32); + tcg_gen_trunc_i64_i32(tcg_hi, tcg_op); + gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env); + tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); + tcg_temp_free_i32(tcg_lo); + tcg_temp_free_i32(tcg_hi); + } + break; + case 0x56: /* FCVTXN, FCVTXN2 */ + /* 64 bit to 32 bit float conversion + * with von Neumann rounding (round to odd) + */ + assert(size == 2); + gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env); + break; + default: + g_assert_not_reached(); + } + + if (genfn) { + genfn(tcg_res[pass], tcg_op); + } else if (genenvfn) { + genenvfn(tcg_res[pass], cpu_env, tcg_op); + } + + tcg_temp_free_i64(tcg_op); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); + tcg_temp_free_i32(tcg_res[pass]); + } + if (!is_q) { + clear_vec_high(s, rd); + } +} + +/* Remaining saturating accumulating ops */ +static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, + bool is_q, int size, int rn, int rd) +{ + bool is_double = (size == 3); + + if (is_double) { + TCGv_i64 tcg_rn = tcg_temp_new_i64(); + TCGv_i64 tcg_rd = tcg_temp_new_i64(); + int pass; + + for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { + read_vec_element(s, tcg_rn, rn, pass, MO_64); + read_vec_element(s, tcg_rd, rd, pass, MO_64); + + if (is_u) { /* USQADD */ + gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd); + } else { /* SUQADD */ + gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd); + } + write_vec_element(s, tcg_rd, rd, pass, MO_64); + } + if (is_scalar) { + clear_vec_high(s, rd); + } + + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i64(tcg_rn); + } else { + TCGv_i32 tcg_rn = tcg_temp_new_i32(); + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + int pass, maxpasses; + + if (is_scalar) { + maxpasses = 1; + } else { + maxpasses = is_q ? 4 : 2; + } + + for (pass = 0; pass < maxpasses; pass++) { + if (is_scalar) { + read_vec_element_i32(s, tcg_rn, rn, pass, size); + read_vec_element_i32(s, tcg_rd, rd, pass, size); + } else { + read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); + read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); + } + + if (is_u) { /* USQADD */ + switch (size) { + case 0: + gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 1: + gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 2: + gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + default: + g_assert_not_reached(); + } + } else { /* SUQADD */ + switch (size) { + case 0: + gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 1: + gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + case 2: + gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd); + break; + default: + g_assert_not_reached(); + } + } + + if (is_scalar) { + TCGv_i64 tcg_zero = tcg_const_i64(0); + write_vec_element(s, tcg_zero, rd, 0, MO_64); + tcg_temp_free_i64(tcg_zero); } + write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); } - tcg_temp_free_i32(tcg_res); - tcg_temp_free_i32(tcg_zero); - tcg_temp_free_i32(tcg_op); - if (!is_q && !is_scalar) { + + if (!is_q) { clear_vec_high(s, rd); } - } - tcg_temp_free_ptr(fpst); + tcg_temp_free_i32(tcg_rd); + tcg_temp_free_i32(tcg_rn); + } } /* C3.6.12 AdvSIMD scalar two reg misc @@ -6626,8 +7430,17 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) int opcode = extract32(insn, 12, 5); int size = extract32(insn, 22, 2); bool u = extract32(insn, 29, 1); + bool is_fcvt = false; + int rmode; + TCGv_i32 tcg_rmode; + TCGv_ptr tcg_fpstatus; switch (opcode) { + case 0x3: /* USQADD / SUQADD*/ + handle_2misc_satacc(s, true, u, false, size, rn, rd); + return; + case 0x7: /* SQABS / SQNEG */ + break; case 0xa: /* CMLT */ if (u) { unallocated_encoding(s); @@ -6642,6 +7455,19 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) return; } break; + case 0x12: /* SQXTUN */ + if (u) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x14: /* SQXTN, UQXTN */ + if (size == 3) { + unallocated_encoding(s); + return; + } + handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd); + return; case 0xc ... 0xf: case 0x16 ... 0x1d: case 0x1f: @@ -6658,23 +7484,41 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) case 0x6d: /* FCMLE (zero) */ handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd); return; + case 0x1d: /* SCVTF */ + case 0x5d: /* UCVTF */ + { + bool is_signed = (opcode == 0x1d); + handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size); + return; + } + case 0x3d: /* FRECPE */ + case 0x3f: /* FRECPX */ + case 0x7d: /* FRSQRTE */ + handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); + return; case 0x1a: /* FCVTNS */ case 0x1b: /* FCVTMS */ - case 0x1c: /* FCVTAS */ - case 0x1d: /* SCVTF */ case 0x3a: /* FCVTPS */ case 0x3b: /* FCVTZS */ - case 0x3d: /* FRECPE */ - case 0x3f: /* FRECPX */ - case 0x56: /* FCVTXN, FCVTXN2 */ case 0x5a: /* FCVTNU */ case 0x5b: /* FCVTMU */ - case 0x5c: /* FCVTAU */ - case 0x5d: /* UCVTF */ case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ - case 0x7d: /* FRSQRTE */ - unsupported_encoding(s, insn); + is_fcvt = true; + rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); + break; + case 0x1c: /* FCVTAS */ + case 0x5c: /* FCVTAU */ + /* TIEAWAY doesn't fit in the usual rounding mode encoding */ + is_fcvt = true; + rmode = FPROUNDING_TIEAWAY; + break; + case 0x56: /* FCVTXN, FCVTXN2 */ + if (size == 2) { + unallocated_encoding(s); + return; + } + handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); return; default: unallocated_encoding(s); @@ -6682,26 +7526,81 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } break; default: - /* Other categories of encoding in this class: - * + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64 - * + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2: - * narrowing saturate ops: size 64/32/16 -> 32/16/8 - */ - unsupported_encoding(s, insn); + unallocated_encoding(s); return; } + if (is_fcvt) { + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_fpstatus = get_fpstatus_ptr(); + } else { + TCGV_UNUSED_I32(tcg_rmode); + TCGV_UNUSED_PTR(tcg_fpstatus); + } + if (size == 3) { TCGv_i64 tcg_rn = read_fp_dreg(s, rn); TCGv_i64 tcg_rd = tcg_temp_new_i64(); - handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn); + handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus); write_fp_dreg(s, rd, tcg_rd); tcg_temp_free_i64(tcg_rd); tcg_temp_free_i64(tcg_rn); } else { - /* the 'size might not be 64' ops aren't implemented yet */ - g_assert_not_reached(); + TCGv_i32 tcg_rn = tcg_temp_new_i32(); + TCGv_i32 tcg_rd = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_rn, rn, 0, size); + + switch (opcode) { + case 0x7: /* SQABS, SQNEG */ + { + NeonGenOneOpEnvFn *genfn; + static NeonGenOneOpEnvFn * const fns[3][2] = { + { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, + { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, + { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 }, + }; + genfn = fns[size][u]; + genfn(tcg_rd, cpu_env, tcg_rn); + break; + } + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + default: + g_assert_not_reached(); + } + + write_fp_sreg(s, rd, tcg_rd); + tcg_temp_free_i32(tcg_rd); + tcg_temp_free_i32(tcg_rn); + } + + if (is_fcvt) { + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + tcg_temp_free_ptr(tcg_fpstatus); } } @@ -6714,6 +7613,7 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, int shift = 2 * (8 << size) - immhb; bool accumulate = false; bool round = false; + bool insert = false; int dsize = is_q ? 128 : 64; int esize = 8 << size; int elements = dsize/esize; @@ -6743,6 +7643,9 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, case 0x06: /* SRSRA / URSRA (accum + rounding) */ accumulate = round = true; break; + case 0x08: /* SRI */ + insert = true; + break; } if (round) { @@ -6754,12 +7657,16 @@ static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, for (i = 0; i < elements; i++) { read_vec_element(s, tcg_rn, rn, i, memop); - if (accumulate) { + if (accumulate || insert) { read_vec_element(s, tcg_rd, rd, i, memop); } - handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, - accumulate, is_u, size, shift); + if (insert) { + handle_shri_with_ins(tcg_rd, tcg_rn, size, shift); + } else { + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + accumulate, is_u, size, shift); + } write_vec_element(s, tcg_rd, rd, i, size); } @@ -6846,6 +7753,62 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u, } } +/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */ +static void handle_vec_simd_shrn(DisasContext *s, bool is_q, + int immh, int immb, int opcode, int rn, int rd) +{ + int immhb = immh << 3 | immb; + int size = 32 - clz32(immh) - 1; + int dsize = 64; + int esize = 8 << size; + int elements = dsize/esize; + int shift = (2 * esize) - immhb; + bool round = extract32(opcode, 0, 1); + TCGv_i64 tcg_rn, tcg_rd, tcg_final; + TCGv_i64 tcg_round; + int i; + + if (extract32(immh, 3, 1)) { + unallocated_encoding(s); + return; + } + + tcg_rn = tcg_temp_new_i64(); + tcg_rd = tcg_temp_new_i64(); + tcg_final = tcg_temp_new_i64(); + read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64); + + if (round) { + uint64_t round_const = 1ULL << (shift - 1); + tcg_round = tcg_const_i64(round_const); + } else { + TCGV_UNUSED_I64(tcg_round); + } + + for (i = 0; i < elements; i++) { + read_vec_element(s, tcg_rn, rn, i, size+1); + handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round, + false, true, size+1, shift); + + tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize); + } + + if (!is_q) { + clear_vec_high(s, rd); + write_vec_element(s, tcg_final, rd, 0, MO_64); + } else { + write_vec_element(s, tcg_final, rd, 1, MO_64); + } + + if (round) { + tcg_temp_free_i64(tcg_round); + } + tcg_temp_free_i64(tcg_rn); + tcg_temp_free_i64(tcg_rd); + tcg_temp_free_i64(tcg_final); + return; +} + /* C3.6.14 AdvSIMD shift by immediate * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0 @@ -6864,6 +7827,12 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) bool is_q = extract32(insn, 30, 1); switch (opcode) { + case 0x08: /* SRI */ + if (!is_u) { + unallocated_encoding(s); + return; + } + /* fall through */ case 0x00: /* SSHR / USHR */ case 0x02: /* SSRA / USRA (accumulate) */ case 0x04: /* SRSHR / URSHR (rounding) */ @@ -6873,15 +7842,42 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn) case 0x0a: /* SHL / SLI */ handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd); break; + case 0x10: /* SHRN */ + case 0x11: /* RSHRN / SQRSHRUN */ + if (is_u) { + handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb, + opcode, rn, rd); + } else { + handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd); + } + break; + case 0x12: /* SQSHRN / UQSHRN */ + case 0x13: /* SQRSHRN / UQRSHRN */ + handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb, + opcode, rn, rd); + break; case 0x14: /* SSHLL / USHLL */ handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd); break; + case 0x1c: /* SCVTF / UCVTF */ + handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb, + opcode, rn, rd); + break; + case 0xc: /* SQSHLU */ + if (!is_u) { + unallocated_encoding(s); + return; + } + handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd); + break; + case 0xe: /* SQSHL, UQSHL */ + handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd); + break; + case 0x1f: /* FCVTZS/ FCVTZU */ + handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd); + return; default: - /* We don't currently implement any of the Narrow or saturating shifts; - * nor do we implement the fixed-point conversions in this - * encoding group (SCVTF, FCVTZS, UCVTF, FCVTZU). - */ - unsupported_encoding(s, insn); + unallocated_encoding(s); return; } } @@ -7092,6 +8088,10 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size, gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, tcg_passres, tcg_passres); break; + case 14: /* PMULL */ + assert(size == 0); + gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2); + break; default: g_assert_not_reached(); } @@ -7211,6 +8211,30 @@ static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size, } } +static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm) +{ + /* PMULL of 64 x 64 -> 128 is an odd special case because it + * is the only three-reg-diff instruction which produces a + * 128-bit wide result from a single operation. However since + * it's possible to calculate the two halves more or less + * separately we just use two helper calls. + */ + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + TCGv_i64 tcg_res = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, is_q, MO_64); + read_vec_element(s, tcg_op2, rm, is_q, MO_64); + gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2); + write_vec_element(s, tcg_res, rd, 0, MO_64); + gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2); + write_vec_element(s, tcg_res, rd, 1, MO_64); + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + tcg_temp_free_i64(tcg_res); +} + /* C3.6.15 AdvSIMD three different * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 * +---+---+---+-----------+------+---+------+--------+-----+------+------+ @@ -7261,8 +8285,15 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - unsupported_encoding(s, insn); - break; + if (size == 3) { + if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)) { + unallocated_encoding(s); + return; + } + handle_pmull_64(s, is_q, rd, rn, rm); + return; + } + goto is_widening; case 9: /* SQDMLAL, SQDMLAL2 */ case 11: /* SQDMLSL, SQDMLSL2 */ case 13: /* SQDMULL, SQDMULL2 */ @@ -7283,6 +8314,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } + is_widening: handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm); break; default: @@ -7894,7 +8926,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) genfn = fns[size][is_sub]; read_vec_element_i32(s, tcg_op1, rd, pass, MO_32); - genfn(tcg_res, tcg_res, tcg_op1); + genfn(tcg_res, tcg_op1, tcg_res); } write_vec_element_i32(s, tcg_res, rd, pass, MO_32); @@ -7959,76 +8991,48 @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) } } -static void handle_2misc_narrow(DisasContext *s, int opcode, bool u, bool is_q, - int size, int rn, int rd) -{ - /* Handle 2-reg-misc ops which are narrowing (so each 2*size element - * in the source becomes a size element in the destination). - */ - int pass; - TCGv_i32 tcg_res[2]; - int destelt = is_q ? 2 : 0; - - for (pass = 0; pass < 2; pass++) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); - NeonGenNarrowFn *genfn = NULL; - NeonGenNarrowEnvFn *genenvfn = NULL; - - read_vec_element(s, tcg_op, rn, pass, MO_64); - tcg_res[pass] = tcg_temp_new_i32(); - - switch (opcode) { - case 0x12: /* XTN, SQXTUN */ - { - static NeonGenNarrowFn * const xtnfns[3] = { - gen_helper_neon_narrow_u8, - gen_helper_neon_narrow_u16, - tcg_gen_trunc_i64_i32, - }; - static NeonGenNarrowEnvFn * const sqxtunfns[3] = { - gen_helper_neon_unarrow_sat8, - gen_helper_neon_unarrow_sat16, - gen_helper_neon_unarrow_sat32, - }; - if (u) { - genenvfn = sqxtunfns[size]; - } else { - genfn = xtnfns[size]; - } - break; - } - case 0x14: /* SQXTN, UQXTN */ - { - static NeonGenNarrowEnvFn * const fns[3][2] = { - { gen_helper_neon_narrow_sat_s8, - gen_helper_neon_narrow_sat_u8 }, - { gen_helper_neon_narrow_sat_s16, - gen_helper_neon_narrow_sat_u16 }, - { gen_helper_neon_narrow_sat_s32, - gen_helper_neon_narrow_sat_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - default: - g_assert_not_reached(); - } - - if (genfn) { - genfn(tcg_res[pass], tcg_op); - } else { - genenvfn(tcg_res[pass], cpu_env, tcg_op); +static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q, + int size, int rn, int rd) +{ + /* Handle 2-reg-misc ops which are widening (so each size element + * in the source becomes a 2*size element in the destination. + * The only instruction like this is FCVTL. + */ + int pass; + + if (size == 3) { + /* 32 -> 64 bit fp conversion */ + TCGv_i64 tcg_res[2]; + int srcelt = is_q ? 2 : 0; + + for (pass = 0; pass < 2; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + tcg_res[pass] = tcg_temp_new_i64(); + + read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32); + gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env); + tcg_temp_free_i32(tcg_op); } + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } + } else { + /* 16 -> 32 bit fp conversion */ + int srcelt = is_q ? 4 : 0; + TCGv_i32 tcg_res[4]; - tcg_temp_free_i64(tcg_op); - } + for (pass = 0; pass < 4; pass++) { + tcg_res[pass] = tcg_temp_new_i32(); - for (pass = 0; pass < 2; pass++) { - write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32); - tcg_temp_free_i32(tcg_res[pass]); - } - if (!is_q) { - clear_vec_high(s, rd); + read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16); + gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass], + cpu_env); + } + for (pass = 0; pass < 4; pass++) { + write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); + tcg_temp_free_i32(tcg_res[pass]); + } } } @@ -8101,6 +9105,108 @@ static void handle_rev(DisasContext *s, int opcode, bool u, } } +static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u, + bool is_q, int size, int rn, int rd) +{ + /* Implement the pairwise operations from 2-misc: + * SADDLP, UADDLP, SADALP, UADALP. + * These all add pairs of elements in the input to produce a + * double-width result element in the output (possibly accumulating). + */ + bool accum = (opcode == 0x6); + int maxpass = is_q ? 2 : 1; + int pass; + TCGv_i64 tcg_res[2]; + + if (size == 2) { + /* 32 + 32 -> 64 op */ + TCGMemOp memop = size + (u ? 0 : MO_SIGN); + + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op1 = tcg_temp_new_i64(); + TCGv_i64 tcg_op2 = tcg_temp_new_i64(); + + tcg_res[pass] = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op1, rn, pass * 2, memop); + read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop); + tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); + if (accum) { + read_vec_element(s, tcg_op1, rd, pass, MO_64); + tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1); + } + + tcg_temp_free_i64(tcg_op1); + tcg_temp_free_i64(tcg_op2); + } + } else { + for (pass = 0; pass < maxpass; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + NeonGenOneOpFn *genfn; + static NeonGenOneOpFn * const fns[2][2] = { + { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 }, + { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 }, + }; + + genfn = fns[size][u]; + + tcg_res[pass] = tcg_temp_new_i64(); + + read_vec_element(s, tcg_op, rn, pass, MO_64); + genfn(tcg_res[pass], tcg_op); + + if (accum) { + read_vec_element(s, tcg_op, rd, pass, MO_64); + if (size == 0) { + gen_helper_neon_addl_u16(tcg_res[pass], + tcg_res[pass], tcg_op); + } else { + gen_helper_neon_addl_u32(tcg_res[pass], + tcg_res[pass], tcg_op); + } + } + tcg_temp_free_i64(tcg_op); + } + } + if (!is_q) { + tcg_res[1] = tcg_const_i64(0); + } + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } +} + +static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd) +{ + /* Implement SHLL and SHLL2 */ + int pass; + int part = is_q ? 2 : 0; + TCGv_i64 tcg_res[2]; + + for (pass = 0; pass < 2; pass++) { + static NeonGenWidenFn * const widenfns[3] = { + gen_helper_neon_widen_u8, + gen_helper_neon_widen_u16, + tcg_gen_extu_i32_i64, + }; + NeonGenWidenFn *widenfn = widenfns[size]; + TCGv_i32 tcg_op = tcg_temp_new_i32(); + + read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32); + tcg_res[pass] = tcg_temp_new_i64(); + widenfn(tcg_res[pass], tcg_op); + tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size); + + tcg_temp_free_i32(tcg_op); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } +} + /* C3.6.17 AdvSIMD two reg misc * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 * +---+---+---+-----------+------+-----------+--------+-----+------+------+ @@ -8115,6 +9221,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) bool is_q = extract32(insn, 30, 1); int rn = extract32(insn, 5, 5); int rd = extract32(insn, 0, 5); + bool need_fpstatus = false; + bool need_rmode = false; + int rmode = -1; + TCGv_i32 tcg_rmode; + TCGv_ptr tcg_fpstatus; switch (opcode) { case 0x0: /* REV64, REV32 */ @@ -8141,23 +9252,28 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) unallocated_encoding(s); return; } - handle_2misc_narrow(s, opcode, u, is_q, size, rn, rd); + handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd); return; - case 0x2: /* SADDLP, UADDLP */ case 0x4: /* CLS, CLZ */ + if (size == 3) { + unallocated_encoding(s); + return; + } + break; + case 0x2: /* SADDLP, UADDLP */ case 0x6: /* SADALP, UADALP */ if (size == 3) { unallocated_encoding(s); return; } - unsupported_encoding(s, insn); + handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd); return; case 0x13: /* SHLL, SHLL2 */ if (u == 0 || size == 3) { unallocated_encoding(s); return; } - unsupported_encoding(s, insn); + handle_shll(s, is_q, size, rn, rd); return; case 0xa: /* CMLT */ if (u == 1) { @@ -8174,13 +9290,18 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } break; case 0x3: /* SUQADD, USQADD */ - case 0x7: /* SQABS, SQNEG */ if (size == 3 && !is_q) { unallocated_encoding(s); return; } - unsupported_encoding(s, insn); + handle_2misc_satacc(s, false, u, is_q, size, rn, rd); return; + case 0x7: /* SQABS, SQNEG */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; case 0xc ... 0xf: case 0x16 ... 0x1d: case 0x1f: @@ -8188,8 +9309,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) /* Floating point: U, size[1] and opcode indicate operation; * size[0] indicates single or double precision. */ + int is_double = extract32(size, 0, 1); opcode |= (extract32(size, 1, 1) << 5) | (u << 6); - size = extract32(size, 0, 1) ? 3 : 2; + size = is_double ? 3 : 2; switch (opcode) { case 0x2f: /* FABS */ case 0x6f: /* FNEG */ @@ -8198,6 +9320,18 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) return; } break; + case 0x1d: /* SCVTF */ + case 0x5d: /* UCVTF */ + { + bool is_signed = (opcode == 0x1d) ? true : false; + int elements = is_double ? 2 : is_q ? 4 : 2; + if (is_double && !is_q) { + unallocated_encoding(s); + return; + } + handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size); + return; + } case 0x2c: /* FCMGT (zero) */ case 0x2d: /* FCMEQ (zero) */ case 0x2e: /* FCMLT (zero) */ @@ -8209,35 +9343,98 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd); return; - case 0x16: /* FCVTN, FCVTN2 */ - case 0x17: /* FCVTL, FCVTL2 */ - case 0x18: /* FRINTN */ - case 0x19: /* FRINTM */ + case 0x7f: /* FSQRT */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; case 0x1a: /* FCVTNS */ case 0x1b: /* FCVTMS */ - case 0x1c: /* FCVTAS */ - case 0x1d: /* SCVTF */ - case 0x38: /* FRINTP */ - case 0x39: /* FRINTZ */ case 0x3a: /* FCVTPS */ case 0x3b: /* FCVTZS */ - case 0x3c: /* URECPE */ - case 0x3d: /* FRECPE */ - case 0x56: /* FCVTXN, FCVTXN2 */ - case 0x58: /* FRINTA */ - case 0x59: /* FRINTX */ case 0x5a: /* FCVTNU */ case 0x5b: /* FCVTMU */ - case 0x5c: /* FCVTAU */ - case 0x5d: /* UCVTF */ - case 0x79: /* FRINTI */ case 0x7a: /* FCVTPU */ case 0x7b: /* FCVTZU */ - case 0x7c: /* URSQRTE */ + need_fpstatus = true; + need_rmode = true; + rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x5c: /* FCVTAU */ + case 0x1c: /* FCVTAS */ + need_fpstatus = true; + need_rmode = true; + rmode = FPROUNDING_TIEAWAY; + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x3c: /* URECPE */ + if (size == 3) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x3d: /* FRECPE */ case 0x7d: /* FRSQRTE */ - case 0x7f: /* FSQRT */ - unsupported_encoding(s, insn); + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); + return; + case 0x56: /* FCVTXN, FCVTXN2 */ + if (size == 2) { + unallocated_encoding(s); + return; + } + /* fall through */ + case 0x16: /* FCVTN, FCVTN2 */ + /* handle_2misc_narrow does a 2*size -> size operation, but these + * instructions encode the source size rather than dest size. + */ + handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd); return; + case 0x17: /* FCVTL, FCVTL2 */ + handle_2misc_widening(s, opcode, is_q, size, rn, rd); + return; + case 0x18: /* FRINTN */ + case 0x19: /* FRINTM */ + case 0x38: /* FRINTP */ + case 0x39: /* FRINTZ */ + need_rmode = true; + rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1); + /* fall through */ + case 0x59: /* FRINTX */ + case 0x79: /* FRINTI */ + need_fpstatus = true; + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x58: /* FRINTA */ + need_rmode = true; + rmode = FPROUNDING_TIEAWAY; + need_fpstatus = true; + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } + break; + case 0x7c: /* URSQRTE */ + if (size == 3) { + unallocated_encoding(s); + return; + } + need_fpstatus = true; + break; default: unallocated_encoding(s); return; @@ -8249,6 +9446,18 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) return; } + if (need_fpstatus) { + tcg_fpstatus = get_fpstatus_ptr(); + } else { + TCGV_UNUSED_PTR(tcg_fpstatus); + } + if (need_rmode) { + tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode)); + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + } else { + TCGV_UNUSED_I32(tcg_rmode); + } + if (size == 3) { /* All 64-bit element operations can be shared with scalar 2misc */ int pass; @@ -8259,7 +9468,8 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) read_vec_element(s, tcg_op, rn, pass, MO_64); - handle_2misc_64(s, opcode, u, tcg_res, tcg_op); + handle_2misc_64(s, opcode, u, tcg_res, tcg_op, + tcg_rmode, tcg_fpstatus); write_vec_element(s, tcg_res, rd, pass, MO_64); @@ -8295,6 +9505,20 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x9: /* CMEQ, CMLE */ cond = u ? TCG_COND_LE : TCG_COND_EQ; goto do_cmop; + case 0x4: /* CLS */ + if (u) { + gen_helper_clz32(tcg_res, tcg_op); + } else { + gen_helper_cls32(tcg_res, tcg_op); + } + break; + case 0x7: /* SQABS, SQNEG */ + if (u) { + gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op); + } else { + gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op); + } + break; case 0xb: /* ABS, NEG */ if (u) { tcg_gen_neg_i32(tcg_res, tcg_op); @@ -8312,6 +9536,47 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x6f: /* FNEG */ gen_helper_vfp_negs(tcg_res, tcg_op); break; + case 0x7f: /* FSQRT */ + gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env); + break; + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_tosls(tcg_res, tcg_op, + tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + { + TCGv_i32 tcg_shift = tcg_const_i32(0); + gen_helper_vfp_touls(tcg_res, tcg_op, + tcg_shift, tcg_fpstatus); + tcg_temp_free_i32(tcg_shift); + break; + } + case 0x18: /* FRINTN */ + case 0x19: /* FRINTM */ + case 0x38: /* FRINTP */ + case 0x39: /* FRINTZ */ + case 0x58: /* FRINTA */ + case 0x79: /* FRINTI */ + gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus); + break; + case 0x59: /* FRINTX */ + gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); + break; + case 0x7c: /* URSQRTE */ + gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus); + break; default: g_assert_not_reached(); } @@ -8328,6 +9593,17 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) gen_helper_neon_cnt_u8(tcg_res, tcg_op); } break; + case 0x7: /* SQABS, SQNEG */ + { + NeonGenOneOpEnvFn *genfn; + static NeonGenOneOpEnvFn * const fns[2][2] = { + { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 }, + { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 }, + }; + genfn = fns[size][u]; + genfn(tcg_res, cpu_env, tcg_op); + break; + } case 0x8: /* CMGT, CMGE */ case 0x9: /* CMEQ, CMLE */ case 0xa: /* CMLT */ @@ -8375,6 +9651,21 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } } break; + case 0x4: /* CLS, CLZ */ + if (u) { + if (size == 0) { + gen_helper_neon_clz_u8(tcg_res, tcg_op); + } else { + gen_helper_neon_clz_u16(tcg_res, tcg_op); + } + } else { + if (size == 0) { + gen_helper_neon_cls_s8(tcg_res, tcg_op); + } else { + gen_helper_neon_cls_s16(tcg_res, tcg_op); + } + } + break; default: g_assert_not_reached(); } @@ -8389,6 +9680,14 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) if (!is_q) { clear_vec_high(s, rd); } + + if (need_rmode) { + gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env); + tcg_temp_free_i32(tcg_rmode); + } + if (need_fpstatus) { + tcg_temp_free_ptr(tcg_fpstatus); + } } /* C3.6.13 AdvSIMD scalar x indexed element @@ -9006,13 +10305,14 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu, dc->condexec_mask = 0; dc->condexec_cond = 0; #if !defined(CONFIG_USER_ONLY) - dc->user = 0; + dc->user = (ARM_TBFLAG_AA64_EL(tb->flags) == 0); #endif dc->vfp_enabled = 0; dc->vec_len = 0; dc->vec_stride = 0; dc->cp_regs = cpu->cp_regs; dc->current_pl = arm_current_pl(env); + dc->features = env->features; init_tmp_a64_array(dc); @@ -9029,8 +10329,8 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu, tcg_clear_temp_count(); do { - if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) { - QTAILQ_FOREACH(bp, &env->breakpoints, entry) { + if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) { + QTAILQ_FOREACH(bp, &cs->breakpoints, entry) { if (bp->pc == dc->pc) { gen_exception_insn(dc, 0, EXCP_DEBUG); /* Advance PC so that clearing the breakpoint will @@ -9117,6 +10417,7 @@ void gen_intermediate_code_internal_a64(ARMCPU *cpu, /* This is a special case because we don't want to just halt the CPU * if trying to debug across a WFI. */ + gen_a64_set_pc_im(dc->pc); gen_helper_wfi(cpu_env); break; }