4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include "qemu/osdep.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg-op.h"
28 #include "tcg/tcg-op-gvec.h"
30 #include "qemu/bitops.h"
32 #include "hw/semihosting/semihost.h"
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
37 #include "trace-tcg.h"
41 #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J dc_isar_feature(aa32_jazelle, s)
46 #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
54 #include "translate.h"
56 #if defined(CONFIG_USER_ONLY)
59 #define IS_USER(s) (s->user)
62 /* We reuse the same 64-bit temporaries for efficiency. */
63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
64 static TCGv_i32 cpu_R[16];
65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
66 TCGv_i64 cpu_exclusive_addr;
67 TCGv_i64 cpu_exclusive_val;
69 #include "exec/gen-icount.h"
71 static const char * const regnames[] =
72 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
73 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
75 /* Function prototypes for gen_ functions calling Neon helpers. */
76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
78 /* Function prototypes for gen_ functions for fix point conversions */
79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
81 /* initialize TCG globals. */
82 void arm_translate_init(void)
86 for (i = 0; i < 16; i++) {
87 cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
88 offsetof(CPUARMState, regs[i]),
91 cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
92 cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
93 cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
94 cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
96 cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
97 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
98 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
99 offsetof(CPUARMState, exclusive_val), "exclusive_val");
101 a64_translate_init();
104 /* Flags for the disas_set_da_iss info argument:
105 * lower bits hold the Rt register number, higher bits are flags.
107 typedef enum ISSInfo {
110 ISSInvalid = (1 << 5),
111 ISSIsAcqRel = (1 << 6),
112 ISSIsWrite = (1 << 7),
113 ISSIs16Bit = (1 << 8),
116 /* Save the syndrome information for a Data Abort */
117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
120 int sas = memop & MO_SIZE;
121 bool sse = memop & MO_SIGN;
122 bool is_acqrel = issinfo & ISSIsAcqRel;
123 bool is_write = issinfo & ISSIsWrite;
124 bool is_16bit = issinfo & ISSIs16Bit;
125 int srt = issinfo & ISSRegMask;
127 if (issinfo & ISSInvalid) {
128 /* Some callsites want to conditionally provide ISS info,
129 * eg "only if this was not a writeback"
135 /* For AArch32, insns where the src/dest is R15 never generate
136 * ISS information. Catching that here saves checking at all
142 syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
143 0, 0, 0, is_write, 0, is_16bit);
144 disas_set_insn_syndrome(s, syn);
147 static inline int get_a32_user_mem_index(DisasContext *s)
149 /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
151 * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
152 * otherwise, access as if at PL0.
154 switch (s->mmu_idx) {
155 case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */
156 case ARMMMUIdx_E10_0:
157 case ARMMMUIdx_E10_1:
158 case ARMMMUIdx_E10_1_PAN:
159 return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
161 case ARMMMUIdx_SE10_0:
162 case ARMMMUIdx_SE10_1:
163 case ARMMMUIdx_SE10_1_PAN:
164 return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
165 case ARMMMUIdx_MUser:
166 case ARMMMUIdx_MPriv:
167 return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
168 case ARMMMUIdx_MUserNegPri:
169 case ARMMMUIdx_MPrivNegPri:
170 return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
171 case ARMMMUIdx_MSUser:
172 case ARMMMUIdx_MSPriv:
173 return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
174 case ARMMMUIdx_MSUserNegPri:
175 case ARMMMUIdx_MSPrivNegPri:
176 return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
178 g_assert_not_reached();
182 static inline TCGv_i32 load_cpu_offset(int offset)
184 TCGv_i32 tmp = tcg_temp_new_i32();
185 tcg_gen_ld_i32(tmp, cpu_env, offset);
189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
193 tcg_gen_st_i32(var, cpu_env, offset);
194 tcg_temp_free_i32(var);
197 #define store_cpu_field(var, name) \
198 store_cpu_offset(var, offsetof(CPUARMState, name))
200 /* The architectural value of PC. */
201 static uint32_t read_pc(DisasContext *s)
203 return s->pc_curr + (s->thumb ? 4 : 8);
206 /* Set a variable to the value of a CPU register. */
207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
210 tcg_gen_movi_i32(var, read_pc(s));
212 tcg_gen_mov_i32(var, cpu_R[reg]);
216 /* Create a new temporary and set it to the value of a CPU register. */
217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
219 TCGv_i32 tmp = tcg_temp_new_i32();
220 load_reg_var(s, tmp, reg);
225 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
226 * This is used for load/store for which use of PC implies (literal),
227 * or ADD that implies ADR.
229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
231 TCGv_i32 tmp = tcg_temp_new_i32();
234 tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
236 tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
241 /* Set a CPU register. The source must be a temporary and will be
243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
246 /* In Thumb mode, we must ignore bit 0.
247 * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
248 * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
249 * We choose to ignore [1:0] in ARM mode for all architecture versions.
251 tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
252 s->base.is_jmp = DISAS_JUMP;
254 tcg_gen_mov_i32(cpu_R[reg], var);
255 tcg_temp_free_i32(var);
259 * Variant of store_reg which applies v8M stack-limit checks before updating
260 * SP. If the check fails this will result in an exception being taken.
261 * We disable the stack checks for CONFIG_USER_ONLY because we have
262 * no idea what the stack limits should be in that case.
263 * If stack checking is not being done this just acts like store_reg().
265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
267 #ifndef CONFIG_USER_ONLY
268 if (s->v8m_stackcheck) {
269 gen_helper_v8m_stackcheck(cpu_env, var);
272 store_reg(s, 13, var);
275 /* Value extensions. */
276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
287 TCGv_i32 tmp_mask = tcg_const_i32(mask);
288 gen_helper_cpsr_write(cpu_env, var, tmp_mask);
289 tcg_temp_free_i32(tmp_mask);
291 /* Set NZCV flags from the high 4 bits of var. */
292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
294 static void gen_exception_internal(int excp)
296 TCGv_i32 tcg_excp = tcg_const_i32(excp);
298 assert(excp_is_internal(excp));
299 gen_helper_exception_internal(cpu_env, tcg_excp);
300 tcg_temp_free_i32(tcg_excp);
303 static void gen_step_complete_exception(DisasContext *s)
305 /* We just completed step of an insn. Move from Active-not-pending
306 * to Active-pending, and then also take the swstep exception.
307 * This corresponds to making the (IMPDEF) choice to prioritize
308 * swstep exceptions over asynchronous exceptions taken to an exception
309 * level where debug is disabled. This choice has the advantage that
310 * we do not need to maintain internal state corresponding to the
311 * ISV/EX syndrome bits between completion of the step and generation
312 * of the exception, and our syndrome information is always correct.
315 gen_swstep_exception(s, 1, s->is_ldex);
316 s->base.is_jmp = DISAS_NORETURN;
319 static void gen_singlestep_exception(DisasContext *s)
321 /* Generate the right kind of exception for singlestep, which is
322 * either the architectural singlestep or EXCP_DEBUG for QEMU's
323 * gdb singlestepping.
326 gen_step_complete_exception(s);
328 gen_exception_internal(EXCP_DEBUG);
332 static inline bool is_singlestepping(DisasContext *s)
334 /* Return true if we are singlestepping either because of
335 * architectural singlestep or QEMU gdbstub singlestep. This does
336 * not include the command line '-singlestep' mode which is rather
337 * misnamed as it only means "one instruction per TB" and doesn't
338 * affect the code we generate.
340 return s->base.singlestep_enabled || s->ss_active;
343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
345 TCGv_i32 tmp1 = tcg_temp_new_i32();
346 TCGv_i32 tmp2 = tcg_temp_new_i32();
347 tcg_gen_ext16s_i32(tmp1, a);
348 tcg_gen_ext16s_i32(tmp2, b);
349 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
350 tcg_temp_free_i32(tmp2);
351 tcg_gen_sari_i32(a, a, 16);
352 tcg_gen_sari_i32(b, b, 16);
353 tcg_gen_mul_i32(b, b, a);
354 tcg_gen_mov_i32(a, tmp1);
355 tcg_temp_free_i32(tmp1);
358 /* Byteswap each halfword. */
359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
361 TCGv_i32 tmp = tcg_temp_new_i32();
362 TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
363 tcg_gen_shri_i32(tmp, var, 8);
364 tcg_gen_and_i32(tmp, tmp, mask);
365 tcg_gen_and_i32(var, var, mask);
366 tcg_gen_shli_i32(var, var, 8);
367 tcg_gen_or_i32(dest, var, tmp);
368 tcg_temp_free_i32(mask);
369 tcg_temp_free_i32(tmp);
372 /* Byteswap low halfword and sign extend. */
373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
375 tcg_gen_ext16u_i32(var, var);
376 tcg_gen_bswap16_i32(var, var);
377 tcg_gen_ext16s_i32(dest, var);
380 /* 32x32->64 multiply. Marks inputs as dead. */
381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
383 TCGv_i32 lo = tcg_temp_new_i32();
384 TCGv_i32 hi = tcg_temp_new_i32();
387 tcg_gen_mulu2_i32(lo, hi, a, b);
388 tcg_temp_free_i32(a);
389 tcg_temp_free_i32(b);
391 ret = tcg_temp_new_i64();
392 tcg_gen_concat_i32_i64(ret, lo, hi);
393 tcg_temp_free_i32(lo);
394 tcg_temp_free_i32(hi);
399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
401 TCGv_i32 lo = tcg_temp_new_i32();
402 TCGv_i32 hi = tcg_temp_new_i32();
405 tcg_gen_muls2_i32(lo, hi, a, b);
406 tcg_temp_free_i32(a);
407 tcg_temp_free_i32(b);
409 ret = tcg_temp_new_i64();
410 tcg_gen_concat_i32_i64(ret, lo, hi);
411 tcg_temp_free_i32(lo);
412 tcg_temp_free_i32(hi);
417 /* Swap low and high halfwords. */
418 static void gen_swap_half(TCGv_i32 var)
420 tcg_gen_rotri_i32(var, var, 16);
423 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
424 tmp = (t0 ^ t1) & 0x8000;
427 t0 = (t0 + t1) ^ tmp;
430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
432 TCGv_i32 tmp = tcg_temp_new_i32();
433 tcg_gen_xor_i32(tmp, t0, t1);
434 tcg_gen_andi_i32(tmp, tmp, 0x8000);
435 tcg_gen_andi_i32(t0, t0, ~0x8000);
436 tcg_gen_andi_i32(t1, t1, ~0x8000);
437 tcg_gen_add_i32(t0, t0, t1);
438 tcg_gen_xor_i32(dest, t0, tmp);
439 tcg_temp_free_i32(tmp);
442 /* Set N and Z flags from var. */
443 static inline void gen_logic_CC(TCGv_i32 var)
445 tcg_gen_mov_i32(cpu_NF, var);
446 tcg_gen_mov_i32(cpu_ZF, var);
449 /* dest = T0 + T1 + CF. */
450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
452 tcg_gen_add_i32(dest, t0, t1);
453 tcg_gen_add_i32(dest, dest, cpu_CF);
456 /* dest = T0 - T1 + CF - 1. */
457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
459 tcg_gen_sub_i32(dest, t0, t1);
460 tcg_gen_add_i32(dest, dest, cpu_CF);
461 tcg_gen_subi_i32(dest, dest, 1);
464 /* dest = T0 + T1. Compute C, N, V and Z flags */
465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
467 TCGv_i32 tmp = tcg_temp_new_i32();
468 tcg_gen_movi_i32(tmp, 0);
469 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
470 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
471 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
472 tcg_gen_xor_i32(tmp, t0, t1);
473 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
474 tcg_temp_free_i32(tmp);
475 tcg_gen_mov_i32(dest, cpu_NF);
478 /* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
481 TCGv_i32 tmp = tcg_temp_new_i32();
482 if (TCG_TARGET_HAS_add2_i32) {
483 tcg_gen_movi_i32(tmp, 0);
484 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
485 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
487 TCGv_i64 q0 = tcg_temp_new_i64();
488 TCGv_i64 q1 = tcg_temp_new_i64();
489 tcg_gen_extu_i32_i64(q0, t0);
490 tcg_gen_extu_i32_i64(q1, t1);
491 tcg_gen_add_i64(q0, q0, q1);
492 tcg_gen_extu_i32_i64(q1, cpu_CF);
493 tcg_gen_add_i64(q0, q0, q1);
494 tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
495 tcg_temp_free_i64(q0);
496 tcg_temp_free_i64(q1);
498 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
499 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
500 tcg_gen_xor_i32(tmp, t0, t1);
501 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
502 tcg_temp_free_i32(tmp);
503 tcg_gen_mov_i32(dest, cpu_NF);
506 /* dest = T0 - T1. Compute C, N, V and Z flags */
507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
510 tcg_gen_sub_i32(cpu_NF, t0, t1);
511 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
512 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
513 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
514 tmp = tcg_temp_new_i32();
515 tcg_gen_xor_i32(tmp, t0, t1);
516 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
517 tcg_temp_free_i32(tmp);
518 tcg_gen_mov_i32(dest, cpu_NF);
521 /* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
524 TCGv_i32 tmp = tcg_temp_new_i32();
525 tcg_gen_not_i32(tmp, t1);
526 gen_adc_CC(dest, t0, tmp);
527 tcg_temp_free_i32(tmp);
530 #define GEN_SHIFT(name) \
531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
533 TCGv_i32 tmp1, tmp2, tmp3; \
534 tmp1 = tcg_temp_new_i32(); \
535 tcg_gen_andi_i32(tmp1, t1, 0xff); \
536 tmp2 = tcg_const_i32(0); \
537 tmp3 = tcg_const_i32(0x1f); \
538 tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
539 tcg_temp_free_i32(tmp3); \
540 tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
541 tcg_gen_##name##_i32(dest, tmp2, tmp1); \
542 tcg_temp_free_i32(tmp2); \
543 tcg_temp_free_i32(tmp1); \
549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
552 tmp1 = tcg_temp_new_i32();
553 tcg_gen_andi_i32(tmp1, t1, 0xff);
554 tmp2 = tcg_const_i32(0x1f);
555 tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
556 tcg_temp_free_i32(tmp2);
557 tcg_gen_sar_i32(dest, t0, tmp1);
558 tcg_temp_free_i32(tmp1);
561 static void shifter_out_im(TCGv_i32 var, int shift)
563 tcg_gen_extract_i32(cpu_CF, var, shift, 1);
566 /* Shift by immediate. Includes special handling for shift == 0. */
567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
568 int shift, int flags)
574 shifter_out_im(var, 32 - shift);
575 tcg_gen_shli_i32(var, var, shift);
581 tcg_gen_shri_i32(cpu_CF, var, 31);
583 tcg_gen_movi_i32(var, 0);
586 shifter_out_im(var, shift - 1);
587 tcg_gen_shri_i32(var, var, shift);
594 shifter_out_im(var, shift - 1);
597 tcg_gen_sari_i32(var, var, shift);
599 case 3: /* ROR/RRX */
602 shifter_out_im(var, shift - 1);
603 tcg_gen_rotri_i32(var, var, shift); break;
605 TCGv_i32 tmp = tcg_temp_new_i32();
606 tcg_gen_shli_i32(tmp, cpu_CF, 31);
608 shifter_out_im(var, 0);
609 tcg_gen_shri_i32(var, var, 1);
610 tcg_gen_or_i32(var, var, tmp);
611 tcg_temp_free_i32(tmp);
616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
617 TCGv_i32 shift, int flags)
621 case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
622 case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
623 case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
624 case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
629 gen_shl(var, var, shift);
632 gen_shr(var, var, shift);
635 gen_sar(var, var, shift);
637 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
638 tcg_gen_rotr_i32(var, var, shift); break;
641 tcg_temp_free_i32(shift);
645 * Generate a conditional based on ARM condition code cc.
646 * This is common between ARM and Aarch64 targets.
648 void arm_test_cc(DisasCompare *cmp, int cc)
679 case 8: /* hi: C && !Z */
680 case 9: /* ls: !C || Z -> !(C && !Z) */
682 value = tcg_temp_new_i32();
684 /* CF is 1 for C, so -CF is an all-bits-set mask for C;
685 ZF is non-zero for !Z; so AND the two subexpressions. */
686 tcg_gen_neg_i32(value, cpu_CF);
687 tcg_gen_and_i32(value, value, cpu_ZF);
690 case 10: /* ge: N == V -> N ^ V == 0 */
691 case 11: /* lt: N != V -> N ^ V != 0 */
692 /* Since we're only interested in the sign bit, == 0 is >= 0. */
694 value = tcg_temp_new_i32();
696 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
699 case 12: /* gt: !Z && N == V */
700 case 13: /* le: Z || N != V */
702 value = tcg_temp_new_i32();
704 /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
705 * the sign bit then AND with ZF to yield the result. */
706 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
707 tcg_gen_sari_i32(value, value, 31);
708 tcg_gen_andc_i32(value, cpu_ZF, value);
711 case 14: /* always */
712 case 15: /* always */
713 /* Use the ALWAYS condition, which will fold early.
714 * It doesn't matter what we use for the value. */
715 cond = TCG_COND_ALWAYS;
720 fprintf(stderr, "Bad condition code 0x%x\n", cc);
725 cond = tcg_invert_cond(cond);
731 cmp->value_global = global;
734 void arm_free_cc(DisasCompare *cmp)
736 if (!cmp->value_global) {
737 tcg_temp_free_i32(cmp->value);
741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
743 tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
746 void arm_gen_test_cc(int cc, TCGLabel *label)
749 arm_test_cc(&cmp, cc);
750 arm_jump_cc(&cmp, label);
754 static inline void gen_set_condexec(DisasContext *s)
756 if (s->condexec_mask) {
757 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
758 TCGv_i32 tmp = tcg_temp_new_i32();
759 tcg_gen_movi_i32(tmp, val);
760 store_cpu_field(tmp, condexec_bits);
764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
766 tcg_gen_movi_i32(cpu_R[15], val);
769 /* Set PC and Thumb state from var. var is marked as dead. */
770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
772 s->base.is_jmp = DISAS_JUMP;
773 tcg_gen_andi_i32(cpu_R[15], var, ~1);
774 tcg_gen_andi_i32(var, var, 1);
775 store_cpu_field(var, thumb);
779 * Set PC and Thumb state from var. var is marked as dead.
780 * For M-profile CPUs, include logic to detect exception-return
781 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
782 * and BX reg, and no others, and happens only for code in Handler mode.
783 * The Security Extension also requires us to check for the FNC_RETURN
784 * which signals a function return from non-secure state; this can happen
785 * in both Handler and Thread mode.
786 * To avoid having to do multiple comparisons in inline generated code,
787 * we make the check we do here loose, so it will match for EXC_RETURN
788 * in Thread mode. For system emulation do_v7m_exception_exit() checks
789 * for these spurious cases and returns without doing anything (giving
790 * the same behaviour as for a branch to a non-magic address).
792 * In linux-user mode it is unclear what the right behaviour for an
793 * attempted FNC_RETURN should be, because in real hardware this will go
794 * directly to Secure code (ie not the Linux kernel) which will then treat
795 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
796 * attempt behave the way it would on a CPU without the security extension,
797 * which is to say "like a normal branch". That means we can simply treat
798 * all branches as normal with no magic address behaviour.
800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
802 /* Generate the same code here as for a simple bx, but flag via
803 * s->base.is_jmp that we need to do the rest of the work later.
806 #ifndef CONFIG_USER_ONLY
807 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
808 (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
809 s->base.is_jmp = DISAS_BX_EXCRET;
814 static inline void gen_bx_excret_final_code(DisasContext *s)
816 /* Generate the code to finish possible exception return and end the TB */
817 TCGLabel *excret_label = gen_new_label();
820 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
821 /* Covers FNC_RETURN and EXC_RETURN magic */
822 min_magic = FNC_RETURN_MIN_MAGIC;
824 /* EXC_RETURN magic only */
825 min_magic = EXC_RETURN_MIN_MAGIC;
828 /* Is the new PC value in the magic range indicating exception return? */
829 tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
830 /* No: end the TB as we would for a DISAS_JMP */
831 if (is_singlestepping(s)) {
832 gen_singlestep_exception(s);
834 tcg_gen_exit_tb(NULL, 0);
836 gen_set_label(excret_label);
837 /* Yes: this is an exception return.
838 * At this point in runtime env->regs[15] and env->thumb will hold
839 * the exception-return magic number, which do_v7m_exception_exit()
840 * will read. Nothing else will be able to see those values because
841 * the cpu-exec main loop guarantees that we will always go straight
842 * from raising the exception to the exception-handling code.
844 * gen_ss_advance(s) does nothing on M profile currently but
845 * calling it is conceptually the right thing as we have executed
846 * this instruction (compare SWI, HVC, SMC handling).
849 gen_exception_internal(EXCP_EXCEPTION_EXIT);
852 static inline void gen_bxns(DisasContext *s, int rm)
854 TCGv_i32 var = load_reg(s, rm);
856 /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
857 * we need to sync state before calling it, but:
858 * - we don't need to do gen_set_pc_im() because the bxns helper will
859 * always set the PC itself
860 * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
861 * unless it's outside an IT block or the last insn in an IT block,
862 * so we know that condexec == 0 (already set at the top of the TB)
863 * is correct in the non-UNPREDICTABLE cases, and we can choose
864 * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
866 gen_helper_v7m_bxns(cpu_env, var);
867 tcg_temp_free_i32(var);
868 s->base.is_jmp = DISAS_EXIT;
871 static inline void gen_blxns(DisasContext *s, int rm)
873 TCGv_i32 var = load_reg(s, rm);
875 /* We don't need to sync condexec state, for the same reason as bxns.
876 * We do however need to set the PC, because the blxns helper reads it.
877 * The blxns helper may throw an exception.
879 gen_set_pc_im(s, s->base.pc_next);
880 gen_helper_v7m_blxns(cpu_env, var);
881 tcg_temp_free_i32(var);
882 s->base.is_jmp = DISAS_EXIT;
885 /* Variant of store_reg which uses branch&exchange logic when storing
886 to r15 in ARM architecture v7 and above. The source must be a temporary
887 and will be marked as dead. */
888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
890 if (reg == 15 && ENABLE_ARCH_7) {
893 store_reg(s, reg, var);
897 /* Variant of store_reg which uses branch&exchange logic when storing
898 * to r15 in ARM architecture v5T and above. This is used for storing
899 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
900 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
903 if (reg == 15 && ENABLE_ARCH_5) {
904 gen_bx_excret(s, var);
906 store_reg(s, reg, var);
910 #ifdef CONFIG_USER_ONLY
911 #define IS_USER_ONLY 1
913 #define IS_USER_ONLY 0
916 /* Abstractions of "generate code to do a guest load/store for
917 * AArch32", where a vaddr is always 32 bits (and is zero
918 * extended if we're a 64 bit core) and data is also
919 * 32 bits unless specifically doing a 64 bit access.
920 * These functions work like tcg_gen_qemu_{ld,st}* except
921 * that the address argument is TCGv_i32 rather than TCGv.
924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
926 TCGv addr = tcg_temp_new();
927 tcg_gen_extu_i32_tl(addr, a32);
929 /* Not needed for user-mode BE32, where we use MO_BE instead. */
930 if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
931 tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
937 int index, MemOp opc)
941 if (arm_dc_feature(s, ARM_FEATURE_M) &&
942 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
946 addr = gen_aa32_addr(s, a32, opc);
947 tcg_gen_qemu_ld_i32(val, addr, index, opc);
951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
952 int index, MemOp opc)
956 if (arm_dc_feature(s, ARM_FEATURE_M) &&
957 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
961 addr = gen_aa32_addr(s, a32, opc);
962 tcg_gen_qemu_st_i32(val, addr, index, opc);
966 #define DO_GEN_LD(SUFF, OPC) \
967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
968 TCGv_i32 a32, int index) \
970 gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
973 #define DO_GEN_ST(SUFF, OPC) \
974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
975 TCGv_i32 a32, int index) \
977 gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
982 /* Not needed for user-mode BE32, where we use MO_BE instead. */
983 if (!IS_USER_ONLY && s->sctlr_b) {
984 tcg_gen_rotri_i64(val, val, 32);
988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
989 int index, MemOp opc)
991 TCGv addr = gen_aa32_addr(s, a32, opc);
992 tcg_gen_qemu_ld_i64(val, addr, index, opc);
993 gen_aa32_frob64(s, val);
997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
998 TCGv_i32 a32, int index)
1000 gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004 int index, MemOp opc)
1006 TCGv addr = gen_aa32_addr(s, a32, opc);
1008 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1009 if (!IS_USER_ONLY && s->sctlr_b) {
1010 TCGv_i64 tmp = tcg_temp_new_i64();
1011 tcg_gen_rotri_i64(tmp, val, 32);
1012 tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013 tcg_temp_free_i64(tmp);
1015 tcg_gen_qemu_st_i64(val, addr, index, opc);
1017 tcg_temp_free(addr);
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021 TCGv_i32 a32, int index)
1023 gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1035 /* The pre HVC helper handles cases when HVC gets trapped
1036 * as an undefined insn by runtime configuration (ie before
1037 * the insn really executes).
1039 gen_set_pc_im(s, s->pc_curr);
1040 gen_helper_pre_hvc(cpu_env);
1041 /* Otherwise we will treat this as a real exception which
1042 * happens after execution of the insn. (The distinction matters
1043 * for the PC value reported to the exception handler and also
1044 * for single stepping.)
1047 gen_set_pc_im(s, s->base.pc_next);
1048 s->base.is_jmp = DISAS_HVC;
1051 static inline void gen_smc(DisasContext *s)
1053 /* As with HVC, we may take an exception either before or after
1054 * the insn executes.
1058 gen_set_pc_im(s, s->pc_curr);
1059 tmp = tcg_const_i32(syn_aa32_smc());
1060 gen_helper_pre_smc(cpu_env, tmp);
1061 tcg_temp_free_i32(tmp);
1062 gen_set_pc_im(s, s->base.pc_next);
1063 s->base.is_jmp = DISAS_SMC;
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1068 gen_set_condexec(s);
1069 gen_set_pc_im(s, pc);
1070 gen_exception_internal(excp);
1071 s->base.is_jmp = DISAS_NORETURN;
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075 int syn, uint32_t target_el)
1077 gen_set_condexec(s);
1078 gen_set_pc_im(s, pc);
1079 gen_exception(excp, syn, target_el);
1080 s->base.is_jmp = DISAS_NORETURN;
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1087 gen_set_condexec(s);
1088 gen_set_pc_im(s, s->pc_curr);
1089 tcg_syn = tcg_const_i32(syn);
1090 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091 tcg_temp_free_i32(tcg_syn);
1092 s->base.is_jmp = DISAS_NORETURN;
1095 static void unallocated_encoding(DisasContext *s)
1097 /* Unallocated and reserved encodings are uncategorized */
1098 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099 default_exception_el(s));
1102 /* Force a TB lookup after an instruction that changes the CPU state. */
1103 static inline void gen_lookup_tb(DisasContext *s)
1105 tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106 s->base.is_jmp = DISAS_EXIT;
1109 static inline void gen_hlt(DisasContext *s, int imm)
1111 /* HLT. This has two purposes.
1112 * Architecturally, it is an external halting debug instruction.
1113 * Since QEMU doesn't implement external debug, we treat this as
1114 * it is required for halting debug disabled: it will UNDEF.
1115 * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116 * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117 * must trigger semihosting even for ARMv7 and earlier, where
1118 * HLT was an undefined encoding.
1119 * In system mode, we don't allow userspace access to
1120 * semihosting, to provide some semblance of security
1121 * (and for consistency with our 32-bit semihosting).
1123 if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125 s->current_el != 0 &&
1127 (imm == (s->thumb ? 0x3c : 0xf000))) {
1128 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1132 unallocated_encoding(s);
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1137 TCGv_ptr statusptr = tcg_temp_new_ptr();
1140 offset = offsetof(CPUARMState, vfp.standard_fp_status);
1142 offset = offsetof(CPUARMState, vfp.fp_status);
1144 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1151 return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1153 long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1155 ofs += offsetof(CPU_DoubleU, l.upper);
1157 ofs += offsetof(CPU_DoubleU, l.lower);
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164 zero is the least significant end of the register. */
1166 neon_reg_offset (int reg, int n)
1170 return vfp_reg_offset(0, sreg);
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174 * where 0 is the least significant end of the register.
1177 neon_element_offset(int reg, int element, MemOp size)
1179 int element_size = 1 << size;
1180 int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182 /* Calculate the offset assuming fully little-endian,
1183 * then XOR to account for the order of the 8-byte units.
1185 if (element_size < 8) {
1186 ofs ^= 8 - element_size;
1189 return neon_reg_offset(reg, 0) + ofs;
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1194 TCGv_i32 tmp = tcg_temp_new_i32();
1195 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1201 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1205 tcg_gen_ld8u_i32(var, cpu_env, offset);
1208 tcg_gen_ld16u_i32(var, cpu_env, offset);
1211 tcg_gen_ld_i32(var, cpu_env, offset);
1214 g_assert_not_reached();
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1220 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1224 tcg_gen_ld8u_i64(var, cpu_env, offset);
1227 tcg_gen_ld16u_i64(var, cpu_env, offset);
1230 tcg_gen_ld32u_i64(var, cpu_env, offset);
1233 tcg_gen_ld_i64(var, cpu_env, offset);
1236 g_assert_not_reached();
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1242 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243 tcg_temp_free_i32(var);
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1248 long offset = neon_element_offset(reg, ele, size);
1252 tcg_gen_st8_i32(var, cpu_env, offset);
1255 tcg_gen_st16_i32(var, cpu_env, offset);
1258 tcg_gen_st_i32(var, cpu_env, offset);
1261 g_assert_not_reached();
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1267 long offset = neon_element_offset(reg, ele, size);
1271 tcg_gen_st8_i64(var, cpu_env, offset);
1274 tcg_gen_st16_i64(var, cpu_env, offset);
1277 tcg_gen_st32_i64(var, cpu_env, offset);
1280 tcg_gen_st_i64(var, cpu_env, offset);
1283 g_assert_not_reached();
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1289 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1294 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1299 tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1304 tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1309 TCGv_ptr ret = tcg_temp_new_ptr();
1310 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1314 #define ARM_CP_RW_BIT (1 << 20)
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1322 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1327 tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1332 TCGv_i32 var = tcg_temp_new_i32();
1333 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1339 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340 tcg_temp_free_i32(var);
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1345 iwmmxt_store_reg(cpu_M0, rn);
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1350 iwmmxt_load_reg(cpu_M0, rn);
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1355 iwmmxt_load_reg(cpu_V1, rn);
1356 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1361 iwmmxt_load_reg(cpu_V1, rn);
1362 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1367 iwmmxt_load_reg(cpu_V1, rn);
1368 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1374 iwmmxt_load_reg(cpu_V1, rn); \
1375 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1381 iwmmxt_load_reg(cpu_V1, rn); \
1382 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1393 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1451 static void gen_op_iwmmxt_set_mup(void)
1454 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455 tcg_gen_ori_i32(tmp, tmp, 2);
1456 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1459 static void gen_op_iwmmxt_set_cup(void)
1462 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463 tcg_gen_ori_i32(tmp, tmp, 1);
1464 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1469 TCGv_i32 tmp = tcg_temp_new_i32();
1470 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1476 iwmmxt_load_reg(cpu_V1, rn);
1477 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1488 rd = (insn >> 16) & 0xf;
1489 tmp = load_reg(s, rd);
1491 offset = (insn & 0xff) << ((insn >> 7) & 2);
1492 if (insn & (1 << 24)) {
1494 if (insn & (1 << 23))
1495 tcg_gen_addi_i32(tmp, tmp, offset);
1497 tcg_gen_addi_i32(tmp, tmp, -offset);
1498 tcg_gen_mov_i32(dest, tmp);
1499 if (insn & (1 << 21))
1500 store_reg(s, rd, tmp);
1502 tcg_temp_free_i32(tmp);
1503 } else if (insn & (1 << 21)) {
1505 tcg_gen_mov_i32(dest, tmp);
1506 if (insn & (1 << 23))
1507 tcg_gen_addi_i32(tmp, tmp, offset);
1509 tcg_gen_addi_i32(tmp, tmp, -offset);
1510 store_reg(s, rd, tmp);
1511 } else if (!(insn & (1 << 23)))
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1518 int rd = (insn >> 0) & 0xf;
1521 if (insn & (1 << 8)) {
1522 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1525 tmp = iwmmxt_load_creg(rd);
1528 tmp = tcg_temp_new_i32();
1529 iwmmxt_load_reg(cpu_V0, rd);
1530 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1532 tcg_gen_andi_i32(tmp, tmp, mask);
1533 tcg_gen_mov_i32(dest, tmp);
1534 tcg_temp_free_i32(tmp);
1538 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1539 (ie. an undefined instruction). */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1543 int rdhi, rdlo, rd0, rd1, i;
1545 TCGv_i32 tmp, tmp2, tmp3;
1547 if ((insn & 0x0e000e00) == 0x0c000000) {
1548 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1550 rdlo = (insn >> 12) & 0xf;
1551 rdhi = (insn >> 16) & 0xf;
1552 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1553 iwmmxt_load_reg(cpu_V0, wrd);
1554 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556 } else { /* TMCRR */
1557 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558 iwmmxt_store_reg(cpu_V0, wrd);
1559 gen_op_iwmmxt_set_mup();
1564 wrd = (insn >> 12) & 0xf;
1565 addr = tcg_temp_new_i32();
1566 if (gen_iwmmxt_address(s, insn, addr)) {
1567 tcg_temp_free_i32(addr);
1570 if (insn & ARM_CP_RW_BIT) {
1571 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1572 tmp = tcg_temp_new_i32();
1573 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574 iwmmxt_store_creg(wrd, tmp);
1577 if (insn & (1 << 8)) {
1578 if (insn & (1 << 22)) { /* WLDRD */
1579 gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1581 } else { /* WLDRW wRd */
1582 tmp = tcg_temp_new_i32();
1583 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1586 tmp = tcg_temp_new_i32();
1587 if (insn & (1 << 22)) { /* WLDRH */
1588 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589 } else { /* WLDRB */
1590 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1594 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595 tcg_temp_free_i32(tmp);
1597 gen_op_iwmmxt_movq_wRn_M0(wrd);
1600 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1601 tmp = iwmmxt_load_creg(wrd);
1602 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1604 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605 tmp = tcg_temp_new_i32();
1606 if (insn & (1 << 8)) {
1607 if (insn & (1 << 22)) { /* WSTRD */
1608 gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609 } else { /* WSTRW wRd */
1610 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1614 if (insn & (1 << 22)) { /* WSTRH */
1615 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617 } else { /* WSTRB */
1618 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1623 tcg_temp_free_i32(tmp);
1625 tcg_temp_free_i32(addr);
1629 if ((insn & 0x0f000000) != 0x0e000000)
1632 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633 case 0x000: /* WOR */
1634 wrd = (insn >> 12) & 0xf;
1635 rd0 = (insn >> 0) & 0xf;
1636 rd1 = (insn >> 16) & 0xf;
1637 gen_op_iwmmxt_movq_M0_wRn(rd0);
1638 gen_op_iwmmxt_orq_M0_wRn(rd1);
1639 gen_op_iwmmxt_setpsr_nz();
1640 gen_op_iwmmxt_movq_wRn_M0(wrd);
1641 gen_op_iwmmxt_set_mup();
1642 gen_op_iwmmxt_set_cup();
1644 case 0x011: /* TMCR */
1647 rd = (insn >> 12) & 0xf;
1648 wrd = (insn >> 16) & 0xf;
1650 case ARM_IWMMXT_wCID:
1651 case ARM_IWMMXT_wCASF:
1653 case ARM_IWMMXT_wCon:
1654 gen_op_iwmmxt_set_cup();
1656 case ARM_IWMMXT_wCSSF:
1657 tmp = iwmmxt_load_creg(wrd);
1658 tmp2 = load_reg(s, rd);
1659 tcg_gen_andc_i32(tmp, tmp, tmp2);
1660 tcg_temp_free_i32(tmp2);
1661 iwmmxt_store_creg(wrd, tmp);
1663 case ARM_IWMMXT_wCGR0:
1664 case ARM_IWMMXT_wCGR1:
1665 case ARM_IWMMXT_wCGR2:
1666 case ARM_IWMMXT_wCGR3:
1667 gen_op_iwmmxt_set_cup();
1668 tmp = load_reg(s, rd);
1669 iwmmxt_store_creg(wrd, tmp);
1675 case 0x100: /* WXOR */
1676 wrd = (insn >> 12) & 0xf;
1677 rd0 = (insn >> 0) & 0xf;
1678 rd1 = (insn >> 16) & 0xf;
1679 gen_op_iwmmxt_movq_M0_wRn(rd0);
1680 gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681 gen_op_iwmmxt_setpsr_nz();
1682 gen_op_iwmmxt_movq_wRn_M0(wrd);
1683 gen_op_iwmmxt_set_mup();
1684 gen_op_iwmmxt_set_cup();
1686 case 0x111: /* TMRC */
1689 rd = (insn >> 12) & 0xf;
1690 wrd = (insn >> 16) & 0xf;
1691 tmp = iwmmxt_load_creg(wrd);
1692 store_reg(s, rd, tmp);
1694 case 0x300: /* WANDN */
1695 wrd = (insn >> 12) & 0xf;
1696 rd0 = (insn >> 0) & 0xf;
1697 rd1 = (insn >> 16) & 0xf;
1698 gen_op_iwmmxt_movq_M0_wRn(rd0);
1699 tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700 gen_op_iwmmxt_andq_M0_wRn(rd1);
1701 gen_op_iwmmxt_setpsr_nz();
1702 gen_op_iwmmxt_movq_wRn_M0(wrd);
1703 gen_op_iwmmxt_set_mup();
1704 gen_op_iwmmxt_set_cup();
1706 case 0x200: /* WAND */
1707 wrd = (insn >> 12) & 0xf;
1708 rd0 = (insn >> 0) & 0xf;
1709 rd1 = (insn >> 16) & 0xf;
1710 gen_op_iwmmxt_movq_M0_wRn(rd0);
1711 gen_op_iwmmxt_andq_M0_wRn(rd1);
1712 gen_op_iwmmxt_setpsr_nz();
1713 gen_op_iwmmxt_movq_wRn_M0(wrd);
1714 gen_op_iwmmxt_set_mup();
1715 gen_op_iwmmxt_set_cup();
1717 case 0x810: case 0xa10: /* WMADD */
1718 wrd = (insn >> 12) & 0xf;
1719 rd0 = (insn >> 0) & 0xf;
1720 rd1 = (insn >> 16) & 0xf;
1721 gen_op_iwmmxt_movq_M0_wRn(rd0);
1722 if (insn & (1 << 21))
1723 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1725 gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726 gen_op_iwmmxt_movq_wRn_M0(wrd);
1727 gen_op_iwmmxt_set_mup();
1729 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
1730 wrd = (insn >> 12) & 0xf;
1731 rd0 = (insn >> 16) & 0xf;
1732 rd1 = (insn >> 0) & 0xf;
1733 gen_op_iwmmxt_movq_M0_wRn(rd0);
1734 switch ((insn >> 22) & 3) {
1736 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1739 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1742 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1747 gen_op_iwmmxt_movq_wRn_M0(wrd);
1748 gen_op_iwmmxt_set_mup();
1749 gen_op_iwmmxt_set_cup();
1751 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
1752 wrd = (insn >> 12) & 0xf;
1753 rd0 = (insn >> 16) & 0xf;
1754 rd1 = (insn >> 0) & 0xf;
1755 gen_op_iwmmxt_movq_M0_wRn(rd0);
1756 switch ((insn >> 22) & 3) {
1758 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1761 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1764 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1769 gen_op_iwmmxt_movq_wRn_M0(wrd);
1770 gen_op_iwmmxt_set_mup();
1771 gen_op_iwmmxt_set_cup();
1773 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
1774 wrd = (insn >> 12) & 0xf;
1775 rd0 = (insn >> 16) & 0xf;
1776 rd1 = (insn >> 0) & 0xf;
1777 gen_op_iwmmxt_movq_M0_wRn(rd0);
1778 if (insn & (1 << 22))
1779 gen_op_iwmmxt_sadw_M0_wRn(rd1);
1781 gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782 if (!(insn & (1 << 20)))
1783 gen_op_iwmmxt_addl_M0_wRn(wrd);
1784 gen_op_iwmmxt_movq_wRn_M0(wrd);
1785 gen_op_iwmmxt_set_mup();
1787 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
1788 wrd = (insn >> 12) & 0xf;
1789 rd0 = (insn >> 16) & 0xf;
1790 rd1 = (insn >> 0) & 0xf;
1791 gen_op_iwmmxt_movq_M0_wRn(rd0);
1792 if (insn & (1 << 21)) {
1793 if (insn & (1 << 20))
1794 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1796 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1798 if (insn & (1 << 20))
1799 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1801 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1803 gen_op_iwmmxt_movq_wRn_M0(wrd);
1804 gen_op_iwmmxt_set_mup();
1806 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
1807 wrd = (insn >> 12) & 0xf;
1808 rd0 = (insn >> 16) & 0xf;
1809 rd1 = (insn >> 0) & 0xf;
1810 gen_op_iwmmxt_movq_M0_wRn(rd0);
1811 if (insn & (1 << 21))
1812 gen_op_iwmmxt_macsw_M0_wRn(rd1);
1814 gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815 if (!(insn & (1 << 20))) {
1816 iwmmxt_load_reg(cpu_V1, wrd);
1817 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1819 gen_op_iwmmxt_movq_wRn_M0(wrd);
1820 gen_op_iwmmxt_set_mup();
1822 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
1823 wrd = (insn >> 12) & 0xf;
1824 rd0 = (insn >> 16) & 0xf;
1825 rd1 = (insn >> 0) & 0xf;
1826 gen_op_iwmmxt_movq_M0_wRn(rd0);
1827 switch ((insn >> 22) & 3) {
1829 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1832 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1835 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1840 gen_op_iwmmxt_movq_wRn_M0(wrd);
1841 gen_op_iwmmxt_set_mup();
1842 gen_op_iwmmxt_set_cup();
1844 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
1845 wrd = (insn >> 12) & 0xf;
1846 rd0 = (insn >> 16) & 0xf;
1847 rd1 = (insn >> 0) & 0xf;
1848 gen_op_iwmmxt_movq_M0_wRn(rd0);
1849 if (insn & (1 << 22)) {
1850 if (insn & (1 << 20))
1851 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1853 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1855 if (insn & (1 << 20))
1856 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1858 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1860 gen_op_iwmmxt_movq_wRn_M0(wrd);
1861 gen_op_iwmmxt_set_mup();
1862 gen_op_iwmmxt_set_cup();
1864 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
1865 wrd = (insn >> 12) & 0xf;
1866 rd0 = (insn >> 16) & 0xf;
1867 rd1 = (insn >> 0) & 0xf;
1868 gen_op_iwmmxt_movq_M0_wRn(rd0);
1869 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870 tcg_gen_andi_i32(tmp, tmp, 7);
1871 iwmmxt_load_reg(cpu_V1, rd1);
1872 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873 tcg_temp_free_i32(tmp);
1874 gen_op_iwmmxt_movq_wRn_M0(wrd);
1875 gen_op_iwmmxt_set_mup();
1877 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
1878 if (((insn >> 6) & 3) == 3)
1880 rd = (insn >> 12) & 0xf;
1881 wrd = (insn >> 16) & 0xf;
1882 tmp = load_reg(s, rd);
1883 gen_op_iwmmxt_movq_M0_wRn(wrd);
1884 switch ((insn >> 6) & 3) {
1886 tmp2 = tcg_const_i32(0xff);
1887 tmp3 = tcg_const_i32((insn & 7) << 3);
1890 tmp2 = tcg_const_i32(0xffff);
1891 tmp3 = tcg_const_i32((insn & 3) << 4);
1894 tmp2 = tcg_const_i32(0xffffffff);
1895 tmp3 = tcg_const_i32((insn & 1) << 5);
1901 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902 tcg_temp_free_i32(tmp3);
1903 tcg_temp_free_i32(tmp2);
1904 tcg_temp_free_i32(tmp);
1905 gen_op_iwmmxt_movq_wRn_M0(wrd);
1906 gen_op_iwmmxt_set_mup();
1908 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
1909 rd = (insn >> 12) & 0xf;
1910 wrd = (insn >> 16) & 0xf;
1911 if (rd == 15 || ((insn >> 22) & 3) == 3)
1913 gen_op_iwmmxt_movq_M0_wRn(wrd);
1914 tmp = tcg_temp_new_i32();
1915 switch ((insn >> 22) & 3) {
1917 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1920 tcg_gen_ext8s_i32(tmp, tmp);
1922 tcg_gen_andi_i32(tmp, tmp, 0xff);
1926 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1929 tcg_gen_ext16s_i32(tmp, tmp);
1931 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1935 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1939 store_reg(s, rd, tmp);
1941 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
1942 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1944 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945 switch ((insn >> 22) & 3) {
1947 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1950 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1953 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1956 tcg_gen_shli_i32(tmp, tmp, 28);
1958 tcg_temp_free_i32(tmp);
1960 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
1961 if (((insn >> 6) & 3) == 3)
1963 rd = (insn >> 12) & 0xf;
1964 wrd = (insn >> 16) & 0xf;
1965 tmp = load_reg(s, rd);
1966 switch ((insn >> 6) & 3) {
1968 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1971 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1974 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1977 tcg_temp_free_i32(tmp);
1978 gen_op_iwmmxt_movq_wRn_M0(wrd);
1979 gen_op_iwmmxt_set_mup();
1981 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
1982 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1984 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985 tmp2 = tcg_temp_new_i32();
1986 tcg_gen_mov_i32(tmp2, tmp);
1987 switch ((insn >> 22) & 3) {
1989 for (i = 0; i < 7; i ++) {
1990 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991 tcg_gen_and_i32(tmp, tmp, tmp2);
1995 for (i = 0; i < 3; i ++) {
1996 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997 tcg_gen_and_i32(tmp, tmp, tmp2);
2001 tcg_gen_shli_i32(tmp2, tmp2, 16);
2002 tcg_gen_and_i32(tmp, tmp, tmp2);
2006 tcg_temp_free_i32(tmp2);
2007 tcg_temp_free_i32(tmp);
2009 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
2010 wrd = (insn >> 12) & 0xf;
2011 rd0 = (insn >> 16) & 0xf;
2012 gen_op_iwmmxt_movq_M0_wRn(rd0);
2013 switch ((insn >> 22) & 3) {
2015 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2018 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2021 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2026 gen_op_iwmmxt_movq_wRn_M0(wrd);
2027 gen_op_iwmmxt_set_mup();
2029 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
2030 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2032 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033 tmp2 = tcg_temp_new_i32();
2034 tcg_gen_mov_i32(tmp2, tmp);
2035 switch ((insn >> 22) & 3) {
2037 for (i = 0; i < 7; i ++) {
2038 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039 tcg_gen_or_i32(tmp, tmp, tmp2);
2043 for (i = 0; i < 3; i ++) {
2044 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045 tcg_gen_or_i32(tmp, tmp, tmp2);
2049 tcg_gen_shli_i32(tmp2, tmp2, 16);
2050 tcg_gen_or_i32(tmp, tmp, tmp2);
2054 tcg_temp_free_i32(tmp2);
2055 tcg_temp_free_i32(tmp);
2057 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
2058 rd = (insn >> 12) & 0xf;
2059 rd0 = (insn >> 16) & 0xf;
2060 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2062 gen_op_iwmmxt_movq_M0_wRn(rd0);
2063 tmp = tcg_temp_new_i32();
2064 switch ((insn >> 22) & 3) {
2066 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2069 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2072 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2075 store_reg(s, rd, tmp);
2077 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
2078 case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079 wrd = (insn >> 12) & 0xf;
2080 rd0 = (insn >> 16) & 0xf;
2081 rd1 = (insn >> 0) & 0xf;
2082 gen_op_iwmmxt_movq_M0_wRn(rd0);
2083 switch ((insn >> 22) & 3) {
2085 if (insn & (1 << 21))
2086 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2088 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2091 if (insn & (1 << 21))
2092 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2094 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2097 if (insn & (1 << 21))
2098 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2100 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2105 gen_op_iwmmxt_movq_wRn_M0(wrd);
2106 gen_op_iwmmxt_set_mup();
2107 gen_op_iwmmxt_set_cup();
2109 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
2110 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111 wrd = (insn >> 12) & 0xf;
2112 rd0 = (insn >> 16) & 0xf;
2113 gen_op_iwmmxt_movq_M0_wRn(rd0);
2114 switch ((insn >> 22) & 3) {
2116 if (insn & (1 << 21))
2117 gen_op_iwmmxt_unpacklsb_M0();
2119 gen_op_iwmmxt_unpacklub_M0();
2122 if (insn & (1 << 21))
2123 gen_op_iwmmxt_unpacklsw_M0();
2125 gen_op_iwmmxt_unpackluw_M0();
2128 if (insn & (1 << 21))
2129 gen_op_iwmmxt_unpacklsl_M0();
2131 gen_op_iwmmxt_unpacklul_M0();
2136 gen_op_iwmmxt_movq_wRn_M0(wrd);
2137 gen_op_iwmmxt_set_mup();
2138 gen_op_iwmmxt_set_cup();
2140 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
2141 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142 wrd = (insn >> 12) & 0xf;
2143 rd0 = (insn >> 16) & 0xf;
2144 gen_op_iwmmxt_movq_M0_wRn(rd0);
2145 switch ((insn >> 22) & 3) {
2147 if (insn & (1 << 21))
2148 gen_op_iwmmxt_unpackhsb_M0();
2150 gen_op_iwmmxt_unpackhub_M0();
2153 if (insn & (1 << 21))
2154 gen_op_iwmmxt_unpackhsw_M0();
2156 gen_op_iwmmxt_unpackhuw_M0();
2159 if (insn & (1 << 21))
2160 gen_op_iwmmxt_unpackhsl_M0();
2162 gen_op_iwmmxt_unpackhul_M0();
2167 gen_op_iwmmxt_movq_wRn_M0(wrd);
2168 gen_op_iwmmxt_set_mup();
2169 gen_op_iwmmxt_set_cup();
2171 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2172 case 0x214: case 0x614: case 0xa14: case 0xe14:
2173 if (((insn >> 22) & 3) == 0)
2175 wrd = (insn >> 12) & 0xf;
2176 rd0 = (insn >> 16) & 0xf;
2177 gen_op_iwmmxt_movq_M0_wRn(rd0);
2178 tmp = tcg_temp_new_i32();
2179 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180 tcg_temp_free_i32(tmp);
2183 switch ((insn >> 22) & 3) {
2185 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2188 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2191 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2194 tcg_temp_free_i32(tmp);
2195 gen_op_iwmmxt_movq_wRn_M0(wrd);
2196 gen_op_iwmmxt_set_mup();
2197 gen_op_iwmmxt_set_cup();
2199 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2200 case 0x014: case 0x414: case 0x814: case 0xc14:
2201 if (((insn >> 22) & 3) == 0)
2203 wrd = (insn >> 12) & 0xf;
2204 rd0 = (insn >> 16) & 0xf;
2205 gen_op_iwmmxt_movq_M0_wRn(rd0);
2206 tmp = tcg_temp_new_i32();
2207 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208 tcg_temp_free_i32(tmp);
2211 switch ((insn >> 22) & 3) {
2213 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2216 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2219 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2222 tcg_temp_free_i32(tmp);
2223 gen_op_iwmmxt_movq_wRn_M0(wrd);
2224 gen_op_iwmmxt_set_mup();
2225 gen_op_iwmmxt_set_cup();
2227 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2228 case 0x114: case 0x514: case 0x914: case 0xd14:
2229 if (((insn >> 22) & 3) == 0)
2231 wrd = (insn >> 12) & 0xf;
2232 rd0 = (insn >> 16) & 0xf;
2233 gen_op_iwmmxt_movq_M0_wRn(rd0);
2234 tmp = tcg_temp_new_i32();
2235 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236 tcg_temp_free_i32(tmp);
2239 switch ((insn >> 22) & 3) {
2241 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2244 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2247 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2250 tcg_temp_free_i32(tmp);
2251 gen_op_iwmmxt_movq_wRn_M0(wrd);
2252 gen_op_iwmmxt_set_mup();
2253 gen_op_iwmmxt_set_cup();
2255 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2256 case 0x314: case 0x714: case 0xb14: case 0xf14:
2257 if (((insn >> 22) & 3) == 0)
2259 wrd = (insn >> 12) & 0xf;
2260 rd0 = (insn >> 16) & 0xf;
2261 gen_op_iwmmxt_movq_M0_wRn(rd0);
2262 tmp = tcg_temp_new_i32();
2263 switch ((insn >> 22) & 3) {
2265 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266 tcg_temp_free_i32(tmp);
2269 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2272 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273 tcg_temp_free_i32(tmp);
2276 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2279 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280 tcg_temp_free_i32(tmp);
2283 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2286 tcg_temp_free_i32(tmp);
2287 gen_op_iwmmxt_movq_wRn_M0(wrd);
2288 gen_op_iwmmxt_set_mup();
2289 gen_op_iwmmxt_set_cup();
2291 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2292 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293 wrd = (insn >> 12) & 0xf;
2294 rd0 = (insn >> 16) & 0xf;
2295 rd1 = (insn >> 0) & 0xf;
2296 gen_op_iwmmxt_movq_M0_wRn(rd0);
2297 switch ((insn >> 22) & 3) {
2299 if (insn & (1 << 21))
2300 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2302 gen_op_iwmmxt_minub_M0_wRn(rd1);
2305 if (insn & (1 << 21))
2306 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2308 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2311 if (insn & (1 << 21))
2312 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2314 gen_op_iwmmxt_minul_M0_wRn(rd1);
2319 gen_op_iwmmxt_movq_wRn_M0(wrd);
2320 gen_op_iwmmxt_set_mup();
2322 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2323 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324 wrd = (insn >> 12) & 0xf;
2325 rd0 = (insn >> 16) & 0xf;
2326 rd1 = (insn >> 0) & 0xf;
2327 gen_op_iwmmxt_movq_M0_wRn(rd0);
2328 switch ((insn >> 22) & 3) {
2330 if (insn & (1 << 21))
2331 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2333 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2336 if (insn & (1 << 21))
2337 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2339 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2342 if (insn & (1 << 21))
2343 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2345 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2350 gen_op_iwmmxt_movq_wRn_M0(wrd);
2351 gen_op_iwmmxt_set_mup();
2353 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2354 case 0x402: case 0x502: case 0x602: case 0x702:
2355 wrd = (insn >> 12) & 0xf;
2356 rd0 = (insn >> 16) & 0xf;
2357 rd1 = (insn >> 0) & 0xf;
2358 gen_op_iwmmxt_movq_M0_wRn(rd0);
2359 tmp = tcg_const_i32((insn >> 20) & 3);
2360 iwmmxt_load_reg(cpu_V1, rd1);
2361 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362 tcg_temp_free_i32(tmp);
2363 gen_op_iwmmxt_movq_wRn_M0(wrd);
2364 gen_op_iwmmxt_set_mup();
2366 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2367 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370 wrd = (insn >> 12) & 0xf;
2371 rd0 = (insn >> 16) & 0xf;
2372 rd1 = (insn >> 0) & 0xf;
2373 gen_op_iwmmxt_movq_M0_wRn(rd0);
2374 switch ((insn >> 20) & 0xf) {
2376 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2379 gen_op_iwmmxt_subub_M0_wRn(rd1);
2382 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2385 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2388 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2391 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2394 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2397 gen_op_iwmmxt_subul_M0_wRn(rd1);
2400 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2405 gen_op_iwmmxt_movq_wRn_M0(wrd);
2406 gen_op_iwmmxt_set_mup();
2407 gen_op_iwmmxt_set_cup();
2409 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2410 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413 wrd = (insn >> 12) & 0xf;
2414 rd0 = (insn >> 16) & 0xf;
2415 gen_op_iwmmxt_movq_M0_wRn(rd0);
2416 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418 tcg_temp_free_i32(tmp);
2419 gen_op_iwmmxt_movq_wRn_M0(wrd);
2420 gen_op_iwmmxt_set_mup();
2421 gen_op_iwmmxt_set_cup();
2423 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2424 case 0x418: case 0x518: case 0x618: case 0x718:
2425 case 0x818: case 0x918: case 0xa18: case 0xb18:
2426 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427 wrd = (insn >> 12) & 0xf;
2428 rd0 = (insn >> 16) & 0xf;
2429 rd1 = (insn >> 0) & 0xf;
2430 gen_op_iwmmxt_movq_M0_wRn(rd0);
2431 switch ((insn >> 20) & 0xf) {
2433 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2436 gen_op_iwmmxt_addub_M0_wRn(rd1);
2439 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2442 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2445 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2448 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2451 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2454 gen_op_iwmmxt_addul_M0_wRn(rd1);
2457 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2462 gen_op_iwmmxt_movq_wRn_M0(wrd);
2463 gen_op_iwmmxt_set_mup();
2464 gen_op_iwmmxt_set_cup();
2466 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2467 case 0x408: case 0x508: case 0x608: case 0x708:
2468 case 0x808: case 0x908: case 0xa08: case 0xb08:
2469 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2472 wrd = (insn >> 12) & 0xf;
2473 rd0 = (insn >> 16) & 0xf;
2474 rd1 = (insn >> 0) & 0xf;
2475 gen_op_iwmmxt_movq_M0_wRn(rd0);
2476 switch ((insn >> 22) & 3) {
2478 if (insn & (1 << 21))
2479 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2481 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2484 if (insn & (1 << 21))
2485 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2487 gen_op_iwmmxt_packul_M0_wRn(rd1);
2490 if (insn & (1 << 21))
2491 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2493 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2496 gen_op_iwmmxt_movq_wRn_M0(wrd);
2497 gen_op_iwmmxt_set_mup();
2498 gen_op_iwmmxt_set_cup();
2500 case 0x201: case 0x203: case 0x205: case 0x207:
2501 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502 case 0x211: case 0x213: case 0x215: case 0x217:
2503 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504 wrd = (insn >> 5) & 0xf;
2505 rd0 = (insn >> 12) & 0xf;
2506 rd1 = (insn >> 0) & 0xf;
2507 if (rd0 == 0xf || rd1 == 0xf)
2509 gen_op_iwmmxt_movq_M0_wRn(wrd);
2510 tmp = load_reg(s, rd0);
2511 tmp2 = load_reg(s, rd1);
2512 switch ((insn >> 16) & 0xf) {
2513 case 0x0: /* TMIA */
2514 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2516 case 0x8: /* TMIAPH */
2517 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2519 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2520 if (insn & (1 << 16))
2521 tcg_gen_shri_i32(tmp, tmp, 16);
2522 if (insn & (1 << 17))
2523 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2527 tcg_temp_free_i32(tmp2);
2528 tcg_temp_free_i32(tmp);
2531 tcg_temp_free_i32(tmp2);
2532 tcg_temp_free_i32(tmp);
2533 gen_op_iwmmxt_movq_wRn_M0(wrd);
2534 gen_op_iwmmxt_set_mup();
2543 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2544 (ie. an undefined instruction). */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2547 int acc, rd0, rd1, rdhi, rdlo;
2550 if ((insn & 0x0ff00f10) == 0x0e200010) {
2551 /* Multiply with Internal Accumulate Format */
2552 rd0 = (insn >> 12) & 0xf;
2554 acc = (insn >> 5) & 7;
2559 tmp = load_reg(s, rd0);
2560 tmp2 = load_reg(s, rd1);
2561 switch ((insn >> 16) & 0xf) {
2563 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2565 case 0x8: /* MIAPH */
2566 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2568 case 0xc: /* MIABB */
2569 case 0xd: /* MIABT */
2570 case 0xe: /* MIATB */
2571 case 0xf: /* MIATT */
2572 if (insn & (1 << 16))
2573 tcg_gen_shri_i32(tmp, tmp, 16);
2574 if (insn & (1 << 17))
2575 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2581 tcg_temp_free_i32(tmp2);
2582 tcg_temp_free_i32(tmp);
2584 gen_op_iwmmxt_movq_wRn_M0(acc);
2588 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589 /* Internal Accumulator Access Format */
2590 rdhi = (insn >> 16) & 0xf;
2591 rdlo = (insn >> 12) & 0xf;
2597 if (insn & ARM_CP_RW_BIT) { /* MRA */
2598 iwmmxt_load_reg(cpu_V0, acc);
2599 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2603 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604 iwmmxt_store_reg(cpu_V0, acc);
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614 if (dc_isar_feature(aa32_simd_r32, s)) { \
2615 reg = (((insn) >> (bigbit)) & 0x0f) \
2616 | (((insn) >> ((smallbit) - 4)) & 0x10); \
2618 if (insn & (1 << (smallbit))) \
2620 reg = ((insn) >> (bigbit)) & 0x0f; \
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2629 TCGv_i32 tmp = tcg_temp_new_i32();
2630 tcg_gen_ext16u_i32(var, var);
2631 tcg_gen_shli_i32(tmp, var, 16);
2632 tcg_gen_or_i32(var, var, tmp);
2633 tcg_temp_free_i32(tmp);
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2638 TCGv_i32 tmp = tcg_temp_new_i32();
2639 tcg_gen_andi_i32(var, var, 0xffff0000);
2640 tcg_gen_shri_i32(tmp, var, 16);
2641 tcg_gen_or_i32(var, var, tmp);
2642 tcg_temp_free_i32(tmp);
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2647 #ifndef CONFIG_USER_ONLY
2648 return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649 ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2655 static void gen_goto_ptr(void)
2657 tcg_gen_lookup_and_goto_ptr();
2660 /* This will end the TB but doesn't guarantee we'll return to
2661 * cpu_loop_exec. Any live exit_requests will be processed as we
2662 * enter the next TB.
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2666 if (use_goto_tb(s, dest)) {
2668 gen_set_pc_im(s, dest);
2669 tcg_gen_exit_tb(s->base.tb, n);
2671 gen_set_pc_im(s, dest);
2674 s->base.is_jmp = DISAS_NORETURN;
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2679 if (unlikely(is_singlestepping(s))) {
2680 /* An indirect jump so that we still trigger the debug exception. */
2681 gen_set_pc_im(s, dest);
2682 s->base.is_jmp = DISAS_JUMP;
2684 gen_goto_tb(s, 0, dest);
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2691 tcg_gen_sari_i32(t0, t0, 16);
2695 tcg_gen_sari_i32(t1, t1, 16);
2698 tcg_gen_mul_i32(t0, t0, t1);
2701 /* Return the mask of PSR bits set by a MSR instruction. */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2706 if (flags & (1 << 0)) {
2709 if (flags & (1 << 1)) {
2712 if (flags & (1 << 2)) {
2715 if (flags & (1 << 3)) {
2719 /* Mask out undefined and reserved bits. */
2720 mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2722 /* Mask out execution state. */
2727 /* Mask out privileged bits. */
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2739 /* ??? This is also undefined in system mode. */
2743 tmp = load_cpu_field(spsr);
2744 tcg_gen_andi_i32(tmp, tmp, ~mask);
2745 tcg_gen_andi_i32(t0, t0, mask);
2746 tcg_gen_or_i32(tmp, tmp, t0);
2747 store_cpu_field(tmp, spsr);
2749 gen_set_cpsr(t0, mask);
2751 tcg_temp_free_i32(t0);
2756 /* Returns nonzero if access to the PSR is not permitted. */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2760 tmp = tcg_temp_new_i32();
2761 tcg_gen_movi_i32(tmp, val);
2762 return gen_set_psr(s, mask, spsr, tmp);
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766 int *tgtmode, int *regno)
2768 /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769 * the target mode and register number, and identify the various
2770 * unpredictable cases.
2771 * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772 * + executed in user mode
2773 * + using R15 as the src/dest register
2774 * + accessing an unimplemented register
2775 * + accessing a register that's inaccessible at current PL/security state*
2776 * + accessing a register that you could access with a different insn
2777 * We choose to UNDEF in all these cases.
2778 * Since we don't know which of the various AArch32 modes we are in
2779 * we have to defer some checks to runtime.
2780 * Accesses to Monitor mode registers from Secure EL1 (which implies
2781 * that EL3 is AArch64) must trap to EL3.
2783 * If the access checks fail this function will emit code to take
2784 * an exception and return false. Otherwise it will return true,
2785 * and set *tgtmode and *regno appropriately.
2787 int exc_target = default_exception_el(s);
2789 /* These instructions are present only in ARMv8, or in ARMv7 with the
2790 * Virtualization Extensions.
2792 if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793 !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2797 if (IS_USER(s) || rn == 15) {
2801 /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802 * of registers into (r, sysm).
2805 /* SPSRs for other modes */
2807 case 0xe: /* SPSR_fiq */
2808 *tgtmode = ARM_CPU_MODE_FIQ;
2810 case 0x10: /* SPSR_irq */
2811 *tgtmode = ARM_CPU_MODE_IRQ;
2813 case 0x12: /* SPSR_svc */
2814 *tgtmode = ARM_CPU_MODE_SVC;
2816 case 0x14: /* SPSR_abt */
2817 *tgtmode = ARM_CPU_MODE_ABT;
2819 case 0x16: /* SPSR_und */
2820 *tgtmode = ARM_CPU_MODE_UND;
2822 case 0x1c: /* SPSR_mon */
2823 *tgtmode = ARM_CPU_MODE_MON;
2825 case 0x1e: /* SPSR_hyp */
2826 *tgtmode = ARM_CPU_MODE_HYP;
2828 default: /* unallocated */
2831 /* We arbitrarily assign SPSR a register number of 16. */
2834 /* general purpose registers for other modes */
2836 case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
2837 *tgtmode = ARM_CPU_MODE_USR;
2840 case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
2841 *tgtmode = ARM_CPU_MODE_FIQ;
2844 case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845 *tgtmode = ARM_CPU_MODE_IRQ;
2846 *regno = sysm & 1 ? 13 : 14;
2848 case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849 *tgtmode = ARM_CPU_MODE_SVC;
2850 *regno = sysm & 1 ? 13 : 14;
2852 case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853 *tgtmode = ARM_CPU_MODE_ABT;
2854 *regno = sysm & 1 ? 13 : 14;
2856 case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857 *tgtmode = ARM_CPU_MODE_UND;
2858 *regno = sysm & 1 ? 13 : 14;
2860 case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861 *tgtmode = ARM_CPU_MODE_MON;
2862 *regno = sysm & 1 ? 13 : 14;
2864 case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865 *tgtmode = ARM_CPU_MODE_HYP;
2866 /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867 *regno = sysm & 1 ? 13 : 17;
2869 default: /* unallocated */
2874 /* Catch the 'accessing inaccessible register' cases we can detect
2875 * at translate time.
2878 case ARM_CPU_MODE_MON:
2879 if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2882 if (s->current_el == 1) {
2883 /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884 * then accesses to Mon registers trap to EL3
2890 case ARM_CPU_MODE_HYP:
2892 * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893 * (and so we can forbid accesses from EL2 or below). elr_hyp
2894 * can be accessed also from Hyp mode, so forbid accesses from
2897 if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898 (s->current_el < 3 && *regno != 17)) {
2909 /* If we get here then some access check did not pass */
2910 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911 syn_uncategorized(), exc_target);
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2917 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918 int tgtmode = 0, regno = 0;
2920 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) {
2924 /* Sync state because msr_banked() can raise exceptions */
2925 gen_set_condexec(s);
2926 gen_set_pc_im(s, s->pc_curr);
2927 tcg_reg = load_reg(s, rn);
2928 tcg_tgtmode = tcg_const_i32(tgtmode);
2929 tcg_regno = tcg_const_i32(regno);
2930 gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931 tcg_temp_free_i32(tcg_tgtmode);
2932 tcg_temp_free_i32(tcg_regno);
2933 tcg_temp_free_i32(tcg_reg);
2934 s->base.is_jmp = DISAS_UPDATE;
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2939 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940 int tgtmode = 0, regno = 0;
2942 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) {
2946 /* Sync state because mrs_banked() can raise exceptions */
2947 gen_set_condexec(s);
2948 gen_set_pc_im(s, s->pc_curr);
2949 tcg_reg = tcg_temp_new_i32();
2950 tcg_tgtmode = tcg_const_i32(tgtmode);
2951 tcg_regno = tcg_const_i32(regno);
2952 gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953 tcg_temp_free_i32(tcg_tgtmode);
2954 tcg_temp_free_i32(tcg_regno);
2955 store_reg(s, rn, tcg_reg);
2956 s->base.is_jmp = DISAS_UPDATE;
2959 /* Store value to PC as for an exception return (ie don't
2960 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961 * will do the masking based on the new value of the Thumb bit.
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2965 tcg_gen_mov_i32(cpu_R[15], pc);
2966 tcg_temp_free_i32(pc);
2969 /* Generate a v6 exception return. Marks both values as dead. */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2972 store_pc_exc_ret(s, pc);
2973 /* The cpsr_write_eret helper will mask the low bits of PC
2974 * appropriately depending on the new Thumb bit, so it must
2975 * be called after storing the new PC.
2977 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2980 gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981 tcg_temp_free_i32(cpsr);
2982 /* Must exit loop to check un-masked IRQs */
2983 s->base.is_jmp = DISAS_EXIT;
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2989 gen_rfe(s, pc, load_cpu_field(spsr));
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2997 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999 case 2: tcg_gen_add_i32(t0, t0, t1); break;
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3007 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3014 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3015 switch ((size << 1) | u) { \
3017 gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3020 gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3023 gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3026 gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3029 gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3032 gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3034 default: return 1; \
3037 #define GEN_NEON_INTEGER_OP(name) do { \
3038 switch ((size << 1) | u) { \
3040 gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3043 gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3046 gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3049 gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3052 gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3055 gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3057 default: return 1; \
3060 static TCGv_i32 neon_load_scratch(int scratch)
3062 TCGv_i32 tmp = tcg_temp_new_i32();
3063 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3067 static void neon_store_scratch(int scratch, TCGv_i32 var)
3069 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3070 tcg_temp_free_i32(var);
3073 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3077 tmp = neon_load_reg(reg & 7, reg >> 4);
3079 gen_neon_dup_high16(tmp);
3081 gen_neon_dup_low16(tmp);
3084 tmp = neon_load_reg(reg & 15, reg >> 4);
3089 static int gen_neon_unzip(int rd, int rm, int size, int q)
3093 if (!q && size == 2) {
3096 pd = vfp_reg_ptr(true, rd);
3097 pm = vfp_reg_ptr(true, rm);
3101 gen_helper_neon_qunzip8(pd, pm);
3104 gen_helper_neon_qunzip16(pd, pm);
3107 gen_helper_neon_qunzip32(pd, pm);
3115 gen_helper_neon_unzip8(pd, pm);
3118 gen_helper_neon_unzip16(pd, pm);
3124 tcg_temp_free_ptr(pd);
3125 tcg_temp_free_ptr(pm);
3129 static int gen_neon_zip(int rd, int rm, int size, int q)
3133 if (!q && size == 2) {
3136 pd = vfp_reg_ptr(true, rd);
3137 pm = vfp_reg_ptr(true, rm);
3141 gen_helper_neon_qzip8(pd, pm);
3144 gen_helper_neon_qzip16(pd, pm);
3147 gen_helper_neon_qzip32(pd, pm);
3155 gen_helper_neon_zip8(pd, pm);
3158 gen_helper_neon_zip16(pd, pm);
3164 tcg_temp_free_ptr(pd);
3165 tcg_temp_free_ptr(pm);
3169 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3173 rd = tcg_temp_new_i32();
3174 tmp = tcg_temp_new_i32();
3176 tcg_gen_shli_i32(rd, t0, 8);
3177 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3178 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3179 tcg_gen_or_i32(rd, rd, tmp);
3181 tcg_gen_shri_i32(t1, t1, 8);
3182 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3183 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3184 tcg_gen_or_i32(t1, t1, tmp);
3185 tcg_gen_mov_i32(t0, rd);
3187 tcg_temp_free_i32(tmp);
3188 tcg_temp_free_i32(rd);
3191 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3195 rd = tcg_temp_new_i32();
3196 tmp = tcg_temp_new_i32();
3198 tcg_gen_shli_i32(rd, t0, 16);
3199 tcg_gen_andi_i32(tmp, t1, 0xffff);
3200 tcg_gen_or_i32(rd, rd, tmp);
3201 tcg_gen_shri_i32(t1, t1, 16);
3202 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3203 tcg_gen_or_i32(t1, t1, tmp);
3204 tcg_gen_mov_i32(t0, rd);
3206 tcg_temp_free_i32(tmp);
3207 tcg_temp_free_i32(rd);
3210 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3213 case 0: gen_helper_neon_narrow_u8(dest, src); break;
3214 case 1: gen_helper_neon_narrow_u16(dest, src); break;
3215 case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3220 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3223 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3224 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3225 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3230 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3233 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3234 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3235 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3240 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3243 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3244 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3245 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3250 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3256 case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3257 case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3262 case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3263 case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3270 case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3271 case 2: gen_ushl_i32(var, var, shift); break;
3276 case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3277 case 2: gen_sshl_i32(var, var, shift); break;
3284 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3288 case 0: gen_helper_neon_widen_u8(dest, src); break;
3289 case 1: gen_helper_neon_widen_u16(dest, src); break;
3290 case 2: tcg_gen_extu_i32_i64(dest, src); break;
3295 case 0: gen_helper_neon_widen_s8(dest, src); break;
3296 case 1: gen_helper_neon_widen_s16(dest, src); break;
3297 case 2: tcg_gen_ext_i32_i64(dest, src); break;
3301 tcg_temp_free_i32(src);
3304 static inline void gen_neon_addl(int size)
3307 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3308 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3309 case 2: tcg_gen_add_i64(CPU_V001); break;
3314 static inline void gen_neon_subl(int size)
3317 case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3318 case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3319 case 2: tcg_gen_sub_i64(CPU_V001); break;
3324 static inline void gen_neon_negl(TCGv_i64 var, int size)
3327 case 0: gen_helper_neon_negl_u16(var, var); break;
3328 case 1: gen_helper_neon_negl_u32(var, var); break;
3330 tcg_gen_neg_i64(var, var);
3336 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3339 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3340 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3345 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3350 switch ((size << 1) | u) {
3351 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3352 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3353 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3354 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3356 tmp = gen_muls_i64_i32(a, b);
3357 tcg_gen_mov_i64(dest, tmp);
3358 tcg_temp_free_i64(tmp);
3361 tmp = gen_mulu_i64_i32(a, b);
3362 tcg_gen_mov_i64(dest, tmp);
3363 tcg_temp_free_i64(tmp);
3368 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3369 Don't forget to clean them now. */
3371 tcg_temp_free_i32(a);
3372 tcg_temp_free_i32(b);
3376 static void gen_neon_narrow_op(int op, int u, int size,
3377 TCGv_i32 dest, TCGv_i64 src)
3381 gen_neon_unarrow_sats(size, dest, src);
3383 gen_neon_narrow(size, dest, src);
3387 gen_neon_narrow_satu(size, dest, src);
3389 gen_neon_narrow_sats(size, dest, src);
3394 /* Symbolic constants for op fields for Neon 3-register same-length.
3395 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
3398 #define NEON_3R_VHADD 0
3399 #define NEON_3R_VQADD 1
3400 #define NEON_3R_VRHADD 2
3401 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
3402 #define NEON_3R_VHSUB 4
3403 #define NEON_3R_VQSUB 5
3404 #define NEON_3R_VCGT 6
3405 #define NEON_3R_VCGE 7
3406 #define NEON_3R_VSHL 8
3407 #define NEON_3R_VQSHL 9
3408 #define NEON_3R_VRSHL 10
3409 #define NEON_3R_VQRSHL 11
3410 #define NEON_3R_VMAX 12
3411 #define NEON_3R_VMIN 13
3412 #define NEON_3R_VABD 14
3413 #define NEON_3R_VABA 15
3414 #define NEON_3R_VADD_VSUB 16
3415 #define NEON_3R_VTST_VCEQ 17
3416 #define NEON_3R_VML 18 /* VMLA, VMLS */
3417 #define NEON_3R_VMUL 19
3418 #define NEON_3R_VPMAX 20
3419 #define NEON_3R_VPMIN 21
3420 #define NEON_3R_VQDMULH_VQRDMULH 22
3421 #define NEON_3R_VPADD_VQRDMLAH 23
3422 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
3423 #define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
3424 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
3425 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
3426 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
3427 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
3428 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
3429 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
3431 static const uint8_t neon_3r_sizes[] = {
3432 [NEON_3R_VHADD] = 0x7,
3433 [NEON_3R_VQADD] = 0xf,
3434 [NEON_3R_VRHADD] = 0x7,
3435 [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
3436 [NEON_3R_VHSUB] = 0x7,
3437 [NEON_3R_VQSUB] = 0xf,
3438 [NEON_3R_VCGT] = 0x7,
3439 [NEON_3R_VCGE] = 0x7,
3440 [NEON_3R_VSHL] = 0xf,
3441 [NEON_3R_VQSHL] = 0xf,
3442 [NEON_3R_VRSHL] = 0xf,
3443 [NEON_3R_VQRSHL] = 0xf,
3444 [NEON_3R_VMAX] = 0x7,
3445 [NEON_3R_VMIN] = 0x7,
3446 [NEON_3R_VABD] = 0x7,
3447 [NEON_3R_VABA] = 0x7,
3448 [NEON_3R_VADD_VSUB] = 0xf,
3449 [NEON_3R_VTST_VCEQ] = 0x7,
3450 [NEON_3R_VML] = 0x7,
3451 [NEON_3R_VMUL] = 0x7,
3452 [NEON_3R_VPMAX] = 0x7,
3453 [NEON_3R_VPMIN] = 0x7,
3454 [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
3455 [NEON_3R_VPADD_VQRDMLAH] = 0x7,
3456 [NEON_3R_SHA] = 0xf, /* size field encodes op type */
3457 [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
3458 [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
3459 [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
3460 [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
3461 [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
3462 [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
3463 [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
3466 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3467 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3470 #define NEON_2RM_VREV64 0
3471 #define NEON_2RM_VREV32 1
3472 #define NEON_2RM_VREV16 2
3473 #define NEON_2RM_VPADDL 4
3474 #define NEON_2RM_VPADDL_U 5
3475 #define NEON_2RM_AESE 6 /* Includes AESD */
3476 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3477 #define NEON_2RM_VCLS 8
3478 #define NEON_2RM_VCLZ 9
3479 #define NEON_2RM_VCNT 10
3480 #define NEON_2RM_VMVN 11
3481 #define NEON_2RM_VPADAL 12
3482 #define NEON_2RM_VPADAL_U 13
3483 #define NEON_2RM_VQABS 14
3484 #define NEON_2RM_VQNEG 15
3485 #define NEON_2RM_VCGT0 16
3486 #define NEON_2RM_VCGE0 17
3487 #define NEON_2RM_VCEQ0 18
3488 #define NEON_2RM_VCLE0 19
3489 #define NEON_2RM_VCLT0 20
3490 #define NEON_2RM_SHA1H 21
3491 #define NEON_2RM_VABS 22
3492 #define NEON_2RM_VNEG 23
3493 #define NEON_2RM_VCGT0_F 24
3494 #define NEON_2RM_VCGE0_F 25
3495 #define NEON_2RM_VCEQ0_F 26
3496 #define NEON_2RM_VCLE0_F 27
3497 #define NEON_2RM_VCLT0_F 28
3498 #define NEON_2RM_VABS_F 30
3499 #define NEON_2RM_VNEG_F 31
3500 #define NEON_2RM_VSWP 32
3501 #define NEON_2RM_VTRN 33
3502 #define NEON_2RM_VUZP 34
3503 #define NEON_2RM_VZIP 35
3504 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3505 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3506 #define NEON_2RM_VSHLL 38
3507 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3508 #define NEON_2RM_VRINTN 40
3509 #define NEON_2RM_VRINTX 41
3510 #define NEON_2RM_VRINTA 42
3511 #define NEON_2RM_VRINTZ 43
3512 #define NEON_2RM_VCVT_F16_F32 44
3513 #define NEON_2RM_VRINTM 45
3514 #define NEON_2RM_VCVT_F32_F16 46
3515 #define NEON_2RM_VRINTP 47
3516 #define NEON_2RM_VCVTAU 48
3517 #define NEON_2RM_VCVTAS 49
3518 #define NEON_2RM_VCVTNU 50
3519 #define NEON_2RM_VCVTNS 51
3520 #define NEON_2RM_VCVTPU 52
3521 #define NEON_2RM_VCVTPS 53
3522 #define NEON_2RM_VCVTMU 54
3523 #define NEON_2RM_VCVTMS 55
3524 #define NEON_2RM_VRECPE 56
3525 #define NEON_2RM_VRSQRTE 57
3526 #define NEON_2RM_VRECPE_F 58
3527 #define NEON_2RM_VRSQRTE_F 59
3528 #define NEON_2RM_VCVT_FS 60
3529 #define NEON_2RM_VCVT_FU 61
3530 #define NEON_2RM_VCVT_SF 62
3531 #define NEON_2RM_VCVT_UF 63
3533 static bool neon_2rm_is_v8_op(int op)
3535 /* Return true if this neon 2reg-misc op is ARMv8 and up */
3537 case NEON_2RM_VRINTN:
3538 case NEON_2RM_VRINTA:
3539 case NEON_2RM_VRINTM:
3540 case NEON_2RM_VRINTP:
3541 case NEON_2RM_VRINTZ:
3542 case NEON_2RM_VRINTX:
3543 case NEON_2RM_VCVTAU:
3544 case NEON_2RM_VCVTAS:
3545 case NEON_2RM_VCVTNU:
3546 case NEON_2RM_VCVTNS:
3547 case NEON_2RM_VCVTPU:
3548 case NEON_2RM_VCVTPS:
3549 case NEON_2RM_VCVTMU:
3550 case NEON_2RM_VCVTMS:
3557 /* Each entry in this array has bit n set if the insn allows
3558 * size value n (otherwise it will UNDEF). Since unallocated
3559 * op values will have no bits set they always UNDEF.
3561 static const uint8_t neon_2rm_sizes[] = {
3562 [NEON_2RM_VREV64] = 0x7,
3563 [NEON_2RM_VREV32] = 0x3,
3564 [NEON_2RM_VREV16] = 0x1,
3565 [NEON_2RM_VPADDL] = 0x7,
3566 [NEON_2RM_VPADDL_U] = 0x7,
3567 [NEON_2RM_AESE] = 0x1,
3568 [NEON_2RM_AESMC] = 0x1,
3569 [NEON_2RM_VCLS] = 0x7,
3570 [NEON_2RM_VCLZ] = 0x7,
3571 [NEON_2RM_VCNT] = 0x1,
3572 [NEON_2RM_VMVN] = 0x1,
3573 [NEON_2RM_VPADAL] = 0x7,
3574 [NEON_2RM_VPADAL_U] = 0x7,
3575 [NEON_2RM_VQABS] = 0x7,
3576 [NEON_2RM_VQNEG] = 0x7,
3577 [NEON_2RM_VCGT0] = 0x7,
3578 [NEON_2RM_VCGE0] = 0x7,
3579 [NEON_2RM_VCEQ0] = 0x7,
3580 [NEON_2RM_VCLE0] = 0x7,
3581 [NEON_2RM_VCLT0] = 0x7,
3582 [NEON_2RM_SHA1H] = 0x4,
3583 [NEON_2RM_VABS] = 0x7,
3584 [NEON_2RM_VNEG] = 0x7,
3585 [NEON_2RM_VCGT0_F] = 0x4,
3586 [NEON_2RM_VCGE0_F] = 0x4,
3587 [NEON_2RM_VCEQ0_F] = 0x4,
3588 [NEON_2RM_VCLE0_F] = 0x4,
3589 [NEON_2RM_VCLT0_F] = 0x4,
3590 [NEON_2RM_VABS_F] = 0x4,
3591 [NEON_2RM_VNEG_F] = 0x4,
3592 [NEON_2RM_VSWP] = 0x1,
3593 [NEON_2RM_VTRN] = 0x7,
3594 [NEON_2RM_VUZP] = 0x7,
3595 [NEON_2RM_VZIP] = 0x7,
3596 [NEON_2RM_VMOVN] = 0x7,
3597 [NEON_2RM_VQMOVN] = 0x7,
3598 [NEON_2RM_VSHLL] = 0x7,
3599 [NEON_2RM_SHA1SU1] = 0x4,
3600 [NEON_2RM_VRINTN] = 0x4,
3601 [NEON_2RM_VRINTX] = 0x4,
3602 [NEON_2RM_VRINTA] = 0x4,
3603 [NEON_2RM_VRINTZ] = 0x4,
3604 [NEON_2RM_VCVT_F16_F32] = 0x2,
3605 [NEON_2RM_VRINTM] = 0x4,
3606 [NEON_2RM_VCVT_F32_F16] = 0x2,
3607 [NEON_2RM_VRINTP] = 0x4,
3608 [NEON_2RM_VCVTAU] = 0x4,
3609 [NEON_2RM_VCVTAS] = 0x4,
3610 [NEON_2RM_VCVTNU] = 0x4,
3611 [NEON_2RM_VCVTNS] = 0x4,
3612 [NEON_2RM_VCVTPU] = 0x4,
3613 [NEON_2RM_VCVTPS] = 0x4,
3614 [NEON_2RM_VCVTMU] = 0x4,
3615 [NEON_2RM_VCVTMS] = 0x4,
3616 [NEON_2RM_VRECPE] = 0x4,
3617 [NEON_2RM_VRSQRTE] = 0x4,
3618 [NEON_2RM_VRECPE_F] = 0x4,
3619 [NEON_2RM_VRSQRTE_F] = 0x4,
3620 [NEON_2RM_VCVT_FS] = 0x4,
3621 [NEON_2RM_VCVT_FU] = 0x4,
3622 [NEON_2RM_VCVT_SF] = 0x4,
3623 [NEON_2RM_VCVT_UF] = 0x4,
3626 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
3627 uint32_t opr_sz, uint32_t max_sz,
3628 gen_helper_gvec_3_ptr *fn)
3630 TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3632 tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
3633 tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3634 opr_sz, max_sz, 0, fn);
3635 tcg_temp_free_ptr(qc_ptr);
3638 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3639 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3641 static gen_helper_gvec_3_ptr * const fns[2] = {
3642 gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3644 tcg_debug_assert(vece >= 1 && vece <= 2);
3645 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3648 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3649 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3651 static gen_helper_gvec_3_ptr * const fns[2] = {
3652 gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3654 tcg_debug_assert(vece >= 1 && vece <= 2);
3655 gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3658 #define GEN_CMP0(NAME, COND) \
3659 static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
3661 tcg_gen_setcondi_i32(COND, d, a, 0); \
3662 tcg_gen_neg_i32(d, d); \
3664 static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
3666 tcg_gen_setcondi_i64(COND, d, a, 0); \
3667 tcg_gen_neg_i64(d, d); \
3669 static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3671 TCGv_vec zero = tcg_const_zeros_vec_matching(d); \
3672 tcg_gen_cmp_vec(COND, vece, d, a, zero); \
3673 tcg_temp_free_vec(zero); \
3675 void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
3676 uint32_t opr_sz, uint32_t max_sz) \
3678 const GVecGen2 op[4] = { \
3679 { .fno = gen_helper_gvec_##NAME##0_b, \
3680 .fniv = gen_##NAME##0_vec, \
3681 .opt_opc = vecop_list_cmp, \
3683 { .fno = gen_helper_gvec_##NAME##0_h, \
3684 .fniv = gen_##NAME##0_vec, \
3685 .opt_opc = vecop_list_cmp, \
3687 { .fni4 = gen_##NAME##0_i32, \
3688 .fniv = gen_##NAME##0_vec, \
3689 .opt_opc = vecop_list_cmp, \
3691 { .fni8 = gen_##NAME##0_i64, \
3692 .fniv = gen_##NAME##0_vec, \
3693 .opt_opc = vecop_list_cmp, \
3694 .prefer_i64 = TCG_TARGET_REG_BITS == 64, \
3697 tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
3700 static const TCGOpcode vecop_list_cmp[] = {
3704 GEN_CMP0(ceq, TCG_COND_EQ)
3705 GEN_CMP0(cle, TCG_COND_LE)
3706 GEN_CMP0(cge, TCG_COND_GE)
3707 GEN_CMP0(clt, TCG_COND_LT)
3708 GEN_CMP0(cgt, TCG_COND_GT)
3712 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3714 tcg_gen_vec_sar8i_i64(a, a, shift);
3715 tcg_gen_vec_add8_i64(d, d, a);
3718 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3720 tcg_gen_vec_sar16i_i64(a, a, shift);
3721 tcg_gen_vec_add16_i64(d, d, a);
3724 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3726 tcg_gen_sari_i32(a, a, shift);
3727 tcg_gen_add_i32(d, d, a);
3730 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3732 tcg_gen_sari_i64(a, a, shift);
3733 tcg_gen_add_i64(d, d, a);
3736 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3738 tcg_gen_sari_vec(vece, a, a, sh);
3739 tcg_gen_add_vec(vece, d, d, a);
3742 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3743 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3745 static const TCGOpcode vecop_list[] = {
3746 INDEX_op_sari_vec, INDEX_op_add_vec, 0
3748 static const GVecGen2i ops[4] = {
3749 { .fni8 = gen_ssra8_i64,
3750 .fniv = gen_ssra_vec,
3751 .fno = gen_helper_gvec_ssra_b,
3753 .opt_opc = vecop_list,
3755 { .fni8 = gen_ssra16_i64,
3756 .fniv = gen_ssra_vec,
3757 .fno = gen_helper_gvec_ssra_h,
3759 .opt_opc = vecop_list,
3761 { .fni4 = gen_ssra32_i32,
3762 .fniv = gen_ssra_vec,
3763 .fno = gen_helper_gvec_ssra_s,
3765 .opt_opc = vecop_list,
3767 { .fni8 = gen_ssra64_i64,
3768 .fniv = gen_ssra_vec,
3769 .fno = gen_helper_gvec_ssra_b,
3770 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3771 .opt_opc = vecop_list,
3776 /* tszimm encoding produces immediates in the range [1..esize]. */
3777 tcg_debug_assert(shift > 0);
3778 tcg_debug_assert(shift <= (8 << vece));
3781 * Shifts larger than the element size are architecturally valid.
3782 * Signed results in all sign bits.
3784 shift = MIN(shift, (8 << vece) - 1);
3785 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3788 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3790 tcg_gen_vec_shr8i_i64(a, a, shift);
3791 tcg_gen_vec_add8_i64(d, d, a);
3794 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3796 tcg_gen_vec_shr16i_i64(a, a, shift);
3797 tcg_gen_vec_add16_i64(d, d, a);
3800 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3802 tcg_gen_shri_i32(a, a, shift);
3803 tcg_gen_add_i32(d, d, a);
3806 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3808 tcg_gen_shri_i64(a, a, shift);
3809 tcg_gen_add_i64(d, d, a);
3812 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3814 tcg_gen_shri_vec(vece, a, a, sh);
3815 tcg_gen_add_vec(vece, d, d, a);
3818 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3819 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3821 static const TCGOpcode vecop_list[] = {
3822 INDEX_op_shri_vec, INDEX_op_add_vec, 0
3824 static const GVecGen2i ops[4] = {
3825 { .fni8 = gen_usra8_i64,
3826 .fniv = gen_usra_vec,
3827 .fno = gen_helper_gvec_usra_b,
3829 .opt_opc = vecop_list,
3831 { .fni8 = gen_usra16_i64,
3832 .fniv = gen_usra_vec,
3833 .fno = gen_helper_gvec_usra_h,
3835 .opt_opc = vecop_list,
3837 { .fni4 = gen_usra32_i32,
3838 .fniv = gen_usra_vec,
3839 .fno = gen_helper_gvec_usra_s,
3841 .opt_opc = vecop_list,
3843 { .fni8 = gen_usra64_i64,
3844 .fniv = gen_usra_vec,
3845 .fno = gen_helper_gvec_usra_d,
3846 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3848 .opt_opc = vecop_list,
3852 /* tszimm encoding produces immediates in the range [1..esize]. */
3853 tcg_debug_assert(shift > 0);
3854 tcg_debug_assert(shift <= (8 << vece));
3857 * Shifts larger than the element size are architecturally valid.
3858 * Unsigned results in all zeros as input to accumulate: nop.
3860 if (shift < (8 << vece)) {
3861 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3863 /* Nop, but we do need to clear the tail. */
3864 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3869 * Shift one less than the requested amount, and the low bit is
3870 * the rounding bit. For the 8 and 16-bit operations, because we
3871 * mask the low bit, we can perform a normal integer shift instead
3872 * of a vector shift.
3874 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3876 TCGv_i64 t = tcg_temp_new_i64();
3878 tcg_gen_shri_i64(t, a, sh - 1);
3879 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3880 tcg_gen_vec_sar8i_i64(d, a, sh);
3881 tcg_gen_vec_add8_i64(d, d, t);
3882 tcg_temp_free_i64(t);
3885 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3887 TCGv_i64 t = tcg_temp_new_i64();
3889 tcg_gen_shri_i64(t, a, sh - 1);
3890 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3891 tcg_gen_vec_sar16i_i64(d, a, sh);
3892 tcg_gen_vec_add16_i64(d, d, t);
3893 tcg_temp_free_i64(t);
3896 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3898 TCGv_i32 t = tcg_temp_new_i32();
3900 tcg_gen_extract_i32(t, a, sh - 1, 1);
3901 tcg_gen_sari_i32(d, a, sh);
3902 tcg_gen_add_i32(d, d, t);
3903 tcg_temp_free_i32(t);
3906 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3908 TCGv_i64 t = tcg_temp_new_i64();
3910 tcg_gen_extract_i64(t, a, sh - 1, 1);
3911 tcg_gen_sari_i64(d, a, sh);
3912 tcg_gen_add_i64(d, d, t);
3913 tcg_temp_free_i64(t);
3916 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3918 TCGv_vec t = tcg_temp_new_vec_matching(d);
3919 TCGv_vec ones = tcg_temp_new_vec_matching(d);
3921 tcg_gen_shri_vec(vece, t, a, sh - 1);
3922 tcg_gen_dupi_vec(vece, ones, 1);
3923 tcg_gen_and_vec(vece, t, t, ones);
3924 tcg_gen_sari_vec(vece, d, a, sh);
3925 tcg_gen_add_vec(vece, d, d, t);
3927 tcg_temp_free_vec(t);
3928 tcg_temp_free_vec(ones);
3931 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3932 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3934 static const TCGOpcode vecop_list[] = {
3935 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3937 static const GVecGen2i ops[4] = {
3938 { .fni8 = gen_srshr8_i64,
3939 .fniv = gen_srshr_vec,
3940 .fno = gen_helper_gvec_srshr_b,
3941 .opt_opc = vecop_list,
3943 { .fni8 = gen_srshr16_i64,
3944 .fniv = gen_srshr_vec,
3945 .fno = gen_helper_gvec_srshr_h,
3946 .opt_opc = vecop_list,
3948 { .fni4 = gen_srshr32_i32,
3949 .fniv = gen_srshr_vec,
3950 .fno = gen_helper_gvec_srshr_s,
3951 .opt_opc = vecop_list,
3953 { .fni8 = gen_srshr64_i64,
3954 .fniv = gen_srshr_vec,
3955 .fno = gen_helper_gvec_srshr_d,
3956 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3957 .opt_opc = vecop_list,
3961 /* tszimm encoding produces immediates in the range [1..esize] */
3962 tcg_debug_assert(shift > 0);
3963 tcg_debug_assert(shift <= (8 << vece));
3965 if (shift == (8 << vece)) {
3967 * Shifts larger than the element size are architecturally valid.
3968 * Signed results in all sign bits. With rounding, this produces
3969 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3972 tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3974 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3978 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3980 TCGv_i64 t = tcg_temp_new_i64();
3982 gen_srshr8_i64(t, a, sh);
3983 tcg_gen_vec_add8_i64(d, d, t);
3984 tcg_temp_free_i64(t);
3987 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3989 TCGv_i64 t = tcg_temp_new_i64();
3991 gen_srshr16_i64(t, a, sh);
3992 tcg_gen_vec_add16_i64(d, d, t);
3993 tcg_temp_free_i64(t);
3996 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3998 TCGv_i32 t = tcg_temp_new_i32();
4000 gen_srshr32_i32(t, a, sh);
4001 tcg_gen_add_i32(d, d, t);
4002 tcg_temp_free_i32(t);
4005 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4007 TCGv_i64 t = tcg_temp_new_i64();
4009 gen_srshr64_i64(t, a, sh);
4010 tcg_gen_add_i64(d, d, t);
4011 tcg_temp_free_i64(t);
4014 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4016 TCGv_vec t = tcg_temp_new_vec_matching(d);
4018 gen_srshr_vec(vece, t, a, sh);
4019 tcg_gen_add_vec(vece, d, d, t);
4020 tcg_temp_free_vec(t);
4023 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4024 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4026 static const TCGOpcode vecop_list[] = {
4027 INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
4029 static const GVecGen2i ops[4] = {
4030 { .fni8 = gen_srsra8_i64,
4031 .fniv = gen_srsra_vec,
4032 .fno = gen_helper_gvec_srsra_b,
4033 .opt_opc = vecop_list,
4036 { .fni8 = gen_srsra16_i64,
4037 .fniv = gen_srsra_vec,
4038 .fno = gen_helper_gvec_srsra_h,
4039 .opt_opc = vecop_list,
4042 { .fni4 = gen_srsra32_i32,
4043 .fniv = gen_srsra_vec,
4044 .fno = gen_helper_gvec_srsra_s,
4045 .opt_opc = vecop_list,
4048 { .fni8 = gen_srsra64_i64,
4049 .fniv = gen_srsra_vec,
4050 .fno = gen_helper_gvec_srsra_d,
4051 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4052 .opt_opc = vecop_list,
4057 /* tszimm encoding produces immediates in the range [1..esize] */
4058 tcg_debug_assert(shift > 0);
4059 tcg_debug_assert(shift <= (8 << vece));
4062 * Shifts larger than the element size are architecturally valid.
4063 * Signed results in all sign bits. With rounding, this produces
4064 * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
4065 * I.e. always zero. With accumulation, this leaves D unchanged.
4067 if (shift == (8 << vece)) {
4068 /* Nop, but we do need to clear the tail. */
4069 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4071 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4075 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4077 TCGv_i64 t = tcg_temp_new_i64();
4079 tcg_gen_shri_i64(t, a, sh - 1);
4080 tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
4081 tcg_gen_vec_shr8i_i64(d, a, sh);
4082 tcg_gen_vec_add8_i64(d, d, t);
4083 tcg_temp_free_i64(t);
4086 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4088 TCGv_i64 t = tcg_temp_new_i64();
4090 tcg_gen_shri_i64(t, a, sh - 1);
4091 tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
4092 tcg_gen_vec_shr16i_i64(d, a, sh);
4093 tcg_gen_vec_add16_i64(d, d, t);
4094 tcg_temp_free_i64(t);
4097 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4099 TCGv_i32 t = tcg_temp_new_i32();
4101 tcg_gen_extract_i32(t, a, sh - 1, 1);
4102 tcg_gen_shri_i32(d, a, sh);
4103 tcg_gen_add_i32(d, d, t);
4104 tcg_temp_free_i32(t);
4107 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4109 TCGv_i64 t = tcg_temp_new_i64();
4111 tcg_gen_extract_i64(t, a, sh - 1, 1);
4112 tcg_gen_shri_i64(d, a, sh);
4113 tcg_gen_add_i64(d, d, t);
4114 tcg_temp_free_i64(t);
4117 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
4119 TCGv_vec t = tcg_temp_new_vec_matching(d);
4120 TCGv_vec ones = tcg_temp_new_vec_matching(d);
4122 tcg_gen_shri_vec(vece, t, a, shift - 1);
4123 tcg_gen_dupi_vec(vece, ones, 1);
4124 tcg_gen_and_vec(vece, t, t, ones);
4125 tcg_gen_shri_vec(vece, d, a, shift);
4126 tcg_gen_add_vec(vece, d, d, t);
4128 tcg_temp_free_vec(t);
4129 tcg_temp_free_vec(ones);
4132 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4133 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4135 static const TCGOpcode vecop_list[] = {
4136 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4138 static const GVecGen2i ops[4] = {
4139 { .fni8 = gen_urshr8_i64,
4140 .fniv = gen_urshr_vec,
4141 .fno = gen_helper_gvec_urshr_b,
4142 .opt_opc = vecop_list,
4144 { .fni8 = gen_urshr16_i64,
4145 .fniv = gen_urshr_vec,
4146 .fno = gen_helper_gvec_urshr_h,
4147 .opt_opc = vecop_list,
4149 { .fni4 = gen_urshr32_i32,
4150 .fniv = gen_urshr_vec,
4151 .fno = gen_helper_gvec_urshr_s,
4152 .opt_opc = vecop_list,
4154 { .fni8 = gen_urshr64_i64,
4155 .fniv = gen_urshr_vec,
4156 .fno = gen_helper_gvec_urshr_d,
4157 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4158 .opt_opc = vecop_list,
4162 /* tszimm encoding produces immediates in the range [1..esize] */
4163 tcg_debug_assert(shift > 0);
4164 tcg_debug_assert(shift <= (8 << vece));
4166 if (shift == (8 << vece)) {
4168 * Shifts larger than the element size are architecturally valid.
4169 * Unsigned results in zero. With rounding, this produces a
4170 * copy of the most significant bit.
4172 tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
4174 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4178 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4180 TCGv_i64 t = tcg_temp_new_i64();
4183 tcg_gen_vec_shr8i_i64(t, a, 7);
4185 gen_urshr8_i64(t, a, sh);
4187 tcg_gen_vec_add8_i64(d, d, t);
4188 tcg_temp_free_i64(t);
4191 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4193 TCGv_i64 t = tcg_temp_new_i64();
4196 tcg_gen_vec_shr16i_i64(t, a, 15);
4198 gen_urshr16_i64(t, a, sh);
4200 tcg_gen_vec_add16_i64(d, d, t);
4201 tcg_temp_free_i64(t);
4204 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4206 TCGv_i32 t = tcg_temp_new_i32();
4209 tcg_gen_shri_i32(t, a, 31);
4211 gen_urshr32_i32(t, a, sh);
4213 tcg_gen_add_i32(d, d, t);
4214 tcg_temp_free_i32(t);
4217 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4219 TCGv_i64 t = tcg_temp_new_i64();
4222 tcg_gen_shri_i64(t, a, 63);
4224 gen_urshr64_i64(t, a, sh);
4226 tcg_gen_add_i64(d, d, t);
4227 tcg_temp_free_i64(t);
4230 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4232 TCGv_vec t = tcg_temp_new_vec_matching(d);
4234 if (sh == (8 << vece)) {
4235 tcg_gen_shri_vec(vece, t, a, sh - 1);
4237 gen_urshr_vec(vece, t, a, sh);
4239 tcg_gen_add_vec(vece, d, d, t);
4240 tcg_temp_free_vec(t);
4243 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4244 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4246 static const TCGOpcode vecop_list[] = {
4247 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4249 static const GVecGen2i ops[4] = {
4250 { .fni8 = gen_ursra8_i64,
4251 .fniv = gen_ursra_vec,
4252 .fno = gen_helper_gvec_ursra_b,
4253 .opt_opc = vecop_list,
4256 { .fni8 = gen_ursra16_i64,
4257 .fniv = gen_ursra_vec,
4258 .fno = gen_helper_gvec_ursra_h,
4259 .opt_opc = vecop_list,
4262 { .fni4 = gen_ursra32_i32,
4263 .fniv = gen_ursra_vec,
4264 .fno = gen_helper_gvec_ursra_s,
4265 .opt_opc = vecop_list,
4268 { .fni8 = gen_ursra64_i64,
4269 .fniv = gen_ursra_vec,
4270 .fno = gen_helper_gvec_ursra_d,
4271 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4272 .opt_opc = vecop_list,
4277 /* tszimm encoding produces immediates in the range [1..esize] */
4278 tcg_debug_assert(shift > 0);
4279 tcg_debug_assert(shift <= (8 << vece));
4281 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4284 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4286 uint64_t mask = dup_const(MO_8, 0xff >> shift);
4287 TCGv_i64 t = tcg_temp_new_i64();
4289 tcg_gen_shri_i64(t, a, shift);
4290 tcg_gen_andi_i64(t, t, mask);
4291 tcg_gen_andi_i64(d, d, ~mask);
4292 tcg_gen_or_i64(d, d, t);
4293 tcg_temp_free_i64(t);
4296 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4298 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4299 TCGv_i64 t = tcg_temp_new_i64();
4301 tcg_gen_shri_i64(t, a, shift);
4302 tcg_gen_andi_i64(t, t, mask);
4303 tcg_gen_andi_i64(d, d, ~mask);
4304 tcg_gen_or_i64(d, d, t);
4305 tcg_temp_free_i64(t);
4308 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4310 tcg_gen_shri_i32(a, a, shift);
4311 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4314 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4316 tcg_gen_shri_i64(a, a, shift);
4317 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4320 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4322 TCGv_vec t = tcg_temp_new_vec_matching(d);
4323 TCGv_vec m = tcg_temp_new_vec_matching(d);
4325 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4326 tcg_gen_shri_vec(vece, t, a, sh);
4327 tcg_gen_and_vec(vece, d, d, m);
4328 tcg_gen_or_vec(vece, d, d, t);
4330 tcg_temp_free_vec(t);
4331 tcg_temp_free_vec(m);
4334 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4335 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4337 static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
4338 const GVecGen2i ops[4] = {
4339 { .fni8 = gen_shr8_ins_i64,
4340 .fniv = gen_shr_ins_vec,
4341 .fno = gen_helper_gvec_sri_b,
4343 .opt_opc = vecop_list,
4345 { .fni8 = gen_shr16_ins_i64,
4346 .fniv = gen_shr_ins_vec,
4347 .fno = gen_helper_gvec_sri_h,
4349 .opt_opc = vecop_list,
4351 { .fni4 = gen_shr32_ins_i32,
4352 .fniv = gen_shr_ins_vec,
4353 .fno = gen_helper_gvec_sri_s,
4355 .opt_opc = vecop_list,
4357 { .fni8 = gen_shr64_ins_i64,
4358 .fniv = gen_shr_ins_vec,
4359 .fno = gen_helper_gvec_sri_d,
4360 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4362 .opt_opc = vecop_list,
4366 /* tszimm encoding produces immediates in the range [1..esize]. */
4367 tcg_debug_assert(shift > 0);
4368 tcg_debug_assert(shift <= (8 << vece));
4370 /* Shift of esize leaves destination unchanged. */
4371 if (shift < (8 << vece)) {
4372 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4374 /* Nop, but we do need to clear the tail. */
4375 tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4379 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4381 uint64_t mask = dup_const(MO_8, 0xff << shift);
4382 TCGv_i64 t = tcg_temp_new_i64();
4384 tcg_gen_shli_i64(t, a, shift);
4385 tcg_gen_andi_i64(t, t, mask);
4386 tcg_gen_andi_i64(d, d, ~mask);
4387 tcg_gen_or_i64(d, d, t);
4388 tcg_temp_free_i64(t);
4391 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4393 uint64_t mask = dup_const(MO_16, 0xffff << shift);
4394 TCGv_i64 t = tcg_temp_new_i64();
4396 tcg_gen_shli_i64(t, a, shift);
4397 tcg_gen_andi_i64(t, t, mask);
4398 tcg_gen_andi_i64(d, d, ~mask);
4399 tcg_gen_or_i64(d, d, t);
4400 tcg_temp_free_i64(t);
4403 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4405 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4408 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4410 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4413 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4415 TCGv_vec t = tcg_temp_new_vec_matching(d);
4416 TCGv_vec m = tcg_temp_new_vec_matching(d);
4418 tcg_gen_shli_vec(vece, t, a, sh);
4419 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4420 tcg_gen_and_vec(vece, d, d, m);
4421 tcg_gen_or_vec(vece, d, d, t);
4423 tcg_temp_free_vec(t);
4424 tcg_temp_free_vec(m);
4427 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4428 int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4430 static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
4431 const GVecGen2i ops[4] = {
4432 { .fni8 = gen_shl8_ins_i64,
4433 .fniv = gen_shl_ins_vec,
4434 .fno = gen_helper_gvec_sli_b,
4436 .opt_opc = vecop_list,
4438 { .fni8 = gen_shl16_ins_i64,
4439 .fniv = gen_shl_ins_vec,
4440 .fno = gen_helper_gvec_sli_h,
4442 .opt_opc = vecop_list,
4444 { .fni4 = gen_shl32_ins_i32,
4445 .fniv = gen_shl_ins_vec,
4446 .fno = gen_helper_gvec_sli_s,
4448 .opt_opc = vecop_list,
4450 { .fni8 = gen_shl64_ins_i64,
4451 .fniv = gen_shl_ins_vec,
4452 .fno = gen_helper_gvec_sli_d,
4453 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4455 .opt_opc = vecop_list,
4459 /* tszimm encoding produces immediates in the range [0..esize-1]. */
4460 tcg_debug_assert(shift >= 0);
4461 tcg_debug_assert(shift < (8 << vece));
4464 tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
4466 tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4470 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4472 gen_helper_neon_mul_u8(a, a, b);
4473 gen_helper_neon_add_u8(d, d, a);
4476 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4478 gen_helper_neon_mul_u8(a, a, b);
4479 gen_helper_neon_sub_u8(d, d, a);
4482 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4484 gen_helper_neon_mul_u16(a, a, b);
4485 gen_helper_neon_add_u16(d, d, a);
4488 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4490 gen_helper_neon_mul_u16(a, a, b);
4491 gen_helper_neon_sub_u16(d, d, a);
4494 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4496 tcg_gen_mul_i32(a, a, b);
4497 tcg_gen_add_i32(d, d, a);
4500 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4502 tcg_gen_mul_i32(a, a, b);
4503 tcg_gen_sub_i32(d, d, a);
4506 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4508 tcg_gen_mul_i64(a, a, b);
4509 tcg_gen_add_i64(d, d, a);
4512 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4514 tcg_gen_mul_i64(a, a, b);
4515 tcg_gen_sub_i64(d, d, a);
4518 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4520 tcg_gen_mul_vec(vece, a, a, b);
4521 tcg_gen_add_vec(vece, d, d, a);
4524 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4526 tcg_gen_mul_vec(vece, a, a, b);
4527 tcg_gen_sub_vec(vece, d, d, a);
4530 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4531 * these tables are shared with AArch64 which does support them.
4533 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4534 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4536 static const TCGOpcode vecop_list[] = {
4537 INDEX_op_mul_vec, INDEX_op_add_vec, 0
4539 static const GVecGen3 ops[4] = {
4540 { .fni4 = gen_mla8_i32,
4541 .fniv = gen_mla_vec,
4543 .opt_opc = vecop_list,
4545 { .fni4 = gen_mla16_i32,
4546 .fniv = gen_mla_vec,
4548 .opt_opc = vecop_list,
4550 { .fni4 = gen_mla32_i32,
4551 .fniv = gen_mla_vec,
4553 .opt_opc = vecop_list,
4555 { .fni8 = gen_mla64_i64,
4556 .fniv = gen_mla_vec,
4557 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4559 .opt_opc = vecop_list,
4562 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4565 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4566 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4568 static const TCGOpcode vecop_list[] = {
4569 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4571 static const GVecGen3 ops[4] = {
4572 { .fni4 = gen_mls8_i32,
4573 .fniv = gen_mls_vec,
4575 .opt_opc = vecop_list,
4577 { .fni4 = gen_mls16_i32,
4578 .fniv = gen_mls_vec,
4580 .opt_opc = vecop_list,
4582 { .fni4 = gen_mls32_i32,
4583 .fniv = gen_mls_vec,
4585 .opt_opc = vecop_list,
4587 { .fni8 = gen_mls64_i64,
4588 .fniv = gen_mls_vec,
4589 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4591 .opt_opc = vecop_list,
4594 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4597 /* CMTST : test is "if (X & Y != 0)". */
4598 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4600 tcg_gen_and_i32(d, a, b);
4601 tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4602 tcg_gen_neg_i32(d, d);
4605 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4607 tcg_gen_and_i64(d, a, b);
4608 tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4609 tcg_gen_neg_i64(d, d);
4612 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4614 tcg_gen_and_vec(vece, d, a, b);
4615 tcg_gen_dupi_vec(vece, a, 0);
4616 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4619 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4620 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4622 static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4623 static const GVecGen3 ops[4] = {
4624 { .fni4 = gen_helper_neon_tst_u8,
4625 .fniv = gen_cmtst_vec,
4626 .opt_opc = vecop_list,
4628 { .fni4 = gen_helper_neon_tst_u16,
4629 .fniv = gen_cmtst_vec,
4630 .opt_opc = vecop_list,
4632 { .fni4 = gen_cmtst_i32,
4633 .fniv = gen_cmtst_vec,
4634 .opt_opc = vecop_list,
4636 { .fni8 = gen_cmtst_i64,
4637 .fniv = gen_cmtst_vec,
4638 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4639 .opt_opc = vecop_list,
4642 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4645 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4647 TCGv_i32 lval = tcg_temp_new_i32();
4648 TCGv_i32 rval = tcg_temp_new_i32();
4649 TCGv_i32 lsh = tcg_temp_new_i32();
4650 TCGv_i32 rsh = tcg_temp_new_i32();
4651 TCGv_i32 zero = tcg_const_i32(0);
4652 TCGv_i32 max = tcg_const_i32(32);
4655 * Rely on the TCG guarantee that out of range shifts produce
4656 * unspecified results, not undefined behaviour (i.e. no trap).
4657 * Discard out-of-range results after the fact.
4659 tcg_gen_ext8s_i32(lsh, shift);
4660 tcg_gen_neg_i32(rsh, lsh);
4661 tcg_gen_shl_i32(lval, src, lsh);
4662 tcg_gen_shr_i32(rval, src, rsh);
4663 tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4664 tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4666 tcg_temp_free_i32(lval);
4667 tcg_temp_free_i32(rval);
4668 tcg_temp_free_i32(lsh);
4669 tcg_temp_free_i32(rsh);
4670 tcg_temp_free_i32(zero);
4671 tcg_temp_free_i32(max);
4674 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4676 TCGv_i64 lval = tcg_temp_new_i64();
4677 TCGv_i64 rval = tcg_temp_new_i64();
4678 TCGv_i64 lsh = tcg_temp_new_i64();
4679 TCGv_i64 rsh = tcg_temp_new_i64();
4680 TCGv_i64 zero = tcg_const_i64(0);
4681 TCGv_i64 max = tcg_const_i64(64);
4684 * Rely on the TCG guarantee that out of range shifts produce
4685 * unspecified results, not undefined behaviour (i.e. no trap).
4686 * Discard out-of-range results after the fact.
4688 tcg_gen_ext8s_i64(lsh, shift);
4689 tcg_gen_neg_i64(rsh, lsh);
4690 tcg_gen_shl_i64(lval, src, lsh);
4691 tcg_gen_shr_i64(rval, src, rsh);
4692 tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4693 tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4695 tcg_temp_free_i64(lval);
4696 tcg_temp_free_i64(rval);
4697 tcg_temp_free_i64(lsh);
4698 tcg_temp_free_i64(rsh);
4699 tcg_temp_free_i64(zero);
4700 tcg_temp_free_i64(max);
4703 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4704 TCGv_vec src, TCGv_vec shift)
4706 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4707 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4708 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4709 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4712 tcg_gen_neg_vec(vece, rsh, shift);
4714 tcg_gen_mov_vec(lsh, shift);
4716 msk = tcg_temp_new_vec_matching(dst);
4717 tcg_gen_dupi_vec(vece, msk, 0xff);
4718 tcg_gen_and_vec(vece, lsh, shift, msk);
4719 tcg_gen_and_vec(vece, rsh, rsh, msk);
4720 tcg_temp_free_vec(msk);
4724 * Rely on the TCG guarantee that out of range shifts produce
4725 * unspecified results, not undefined behaviour (i.e. no trap).
4726 * Discard out-of-range results after the fact.
4728 tcg_gen_shlv_vec(vece, lval, src, lsh);
4729 tcg_gen_shrv_vec(vece, rval, src, rsh);
4731 max = tcg_temp_new_vec_matching(dst);
4732 tcg_gen_dupi_vec(vece, max, 8 << vece);
4735 * The choice of LT (signed) and GEU (unsigned) are biased toward
4736 * the instructions of the x86_64 host. For MO_8, the whole byte
4737 * is significant so we must use an unsigned compare; otherwise we
4738 * have already masked to a byte and so a signed compare works.
4739 * Other tcg hosts have a full set of comparisons and do not care.
4742 tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4743 tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4744 tcg_gen_andc_vec(vece, lval, lval, lsh);
4745 tcg_gen_andc_vec(vece, rval, rval, rsh);
4747 tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4748 tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4749 tcg_gen_and_vec(vece, lval, lval, lsh);
4750 tcg_gen_and_vec(vece, rval, rval, rsh);
4752 tcg_gen_or_vec(vece, dst, lval, rval);
4754 tcg_temp_free_vec(max);
4755 tcg_temp_free_vec(lval);
4756 tcg_temp_free_vec(rval);
4757 tcg_temp_free_vec(lsh);
4758 tcg_temp_free_vec(rsh);
4761 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4762 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4764 static const TCGOpcode vecop_list[] = {
4765 INDEX_op_neg_vec, INDEX_op_shlv_vec,
4766 INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4768 static const GVecGen3 ops[4] = {
4769 { .fniv = gen_ushl_vec,
4770 .fno = gen_helper_gvec_ushl_b,
4771 .opt_opc = vecop_list,
4773 { .fniv = gen_ushl_vec,
4774 .fno = gen_helper_gvec_ushl_h,
4775 .opt_opc = vecop_list,
4777 { .fni4 = gen_ushl_i32,
4778 .fniv = gen_ushl_vec,
4779 .opt_opc = vecop_list,
4781 { .fni8 = gen_ushl_i64,
4782 .fniv = gen_ushl_vec,
4783 .opt_opc = vecop_list,
4786 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4789 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4791 TCGv_i32 lval = tcg_temp_new_i32();
4792 TCGv_i32 rval = tcg_temp_new_i32();
4793 TCGv_i32 lsh = tcg_temp_new_i32();
4794 TCGv_i32 rsh = tcg_temp_new_i32();
4795 TCGv_i32 zero = tcg_const_i32(0);
4796 TCGv_i32 max = tcg_const_i32(31);
4799 * Rely on the TCG guarantee that out of range shifts produce
4800 * unspecified results, not undefined behaviour (i.e. no trap).
4801 * Discard out-of-range results after the fact.
4803 tcg_gen_ext8s_i32(lsh, shift);
4804 tcg_gen_neg_i32(rsh, lsh);
4805 tcg_gen_shl_i32(lval, src, lsh);
4806 tcg_gen_umin_i32(rsh, rsh, max);
4807 tcg_gen_sar_i32(rval, src, rsh);
4808 tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4809 tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4811 tcg_temp_free_i32(lval);
4812 tcg_temp_free_i32(rval);
4813 tcg_temp_free_i32(lsh);
4814 tcg_temp_free_i32(rsh);
4815 tcg_temp_free_i32(zero);
4816 tcg_temp_free_i32(max);
4819 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4821 TCGv_i64 lval = tcg_temp_new_i64();
4822 TCGv_i64 rval = tcg_temp_new_i64();
4823 TCGv_i64 lsh = tcg_temp_new_i64();
4824 TCGv_i64 rsh = tcg_temp_new_i64();
4825 TCGv_i64 zero = tcg_const_i64(0);
4826 TCGv_i64 max = tcg_const_i64(63);
4829 * Rely on the TCG guarantee that out of range shifts produce
4830 * unspecified results, not undefined behaviour (i.e. no trap).
4831 * Discard out-of-range results after the fact.
4833 tcg_gen_ext8s_i64(lsh, shift);
4834 tcg_gen_neg_i64(rsh, lsh);
4835 tcg_gen_shl_i64(lval, src, lsh);
4836 tcg_gen_umin_i64(rsh, rsh, max);
4837 tcg_gen_sar_i64(rval, src, rsh);
4838 tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4839 tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4841 tcg_temp_free_i64(lval);
4842 tcg_temp_free_i64(rval);
4843 tcg_temp_free_i64(lsh);
4844 tcg_temp_free_i64(rsh);
4845 tcg_temp_free_i64(zero);
4846 tcg_temp_free_i64(max);
4849 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4850 TCGv_vec src, TCGv_vec shift)
4852 TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4853 TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4854 TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4855 TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4856 TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4859 * Rely on the TCG guarantee that out of range shifts produce
4860 * unspecified results, not undefined behaviour (i.e. no trap).
4861 * Discard out-of-range results after the fact.
4863 tcg_gen_neg_vec(vece, rsh, shift);
4865 tcg_gen_mov_vec(lsh, shift);
4867 tcg_gen_dupi_vec(vece, tmp, 0xff);
4868 tcg_gen_and_vec(vece, lsh, shift, tmp);
4869 tcg_gen_and_vec(vece, rsh, rsh, tmp);
4872 /* Bound rsh so out of bound right shift gets -1. */
4873 tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4874 tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4875 tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4877 tcg_gen_shlv_vec(vece, lval, src, lsh);
4878 tcg_gen_sarv_vec(vece, rval, src, rsh);
4880 /* Select in-bound left shift. */
4881 tcg_gen_andc_vec(vece, lval, lval, tmp);
4883 /* Select between left and right shift. */
4885 tcg_gen_dupi_vec(vece, tmp, 0);
4886 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4888 tcg_gen_dupi_vec(vece, tmp, 0x80);
4889 tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4892 tcg_temp_free_vec(lval);
4893 tcg_temp_free_vec(rval);
4894 tcg_temp_free_vec(lsh);
4895 tcg_temp_free_vec(rsh);
4896 tcg_temp_free_vec(tmp);
4899 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4900 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4902 static const TCGOpcode vecop_list[] = {
4903 INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4904 INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4906 static const GVecGen3 ops[4] = {
4907 { .fniv = gen_sshl_vec,
4908 .fno = gen_helper_gvec_sshl_b,
4909 .opt_opc = vecop_list,
4911 { .fniv = gen_sshl_vec,
4912 .fno = gen_helper_gvec_sshl_h,
4913 .opt_opc = vecop_list,
4915 { .fni4 = gen_sshl_i32,
4916 .fniv = gen_sshl_vec,
4917 .opt_opc = vecop_list,
4919 { .fni8 = gen_sshl_i64,
4920 .fniv = gen_sshl_vec,
4921 .opt_opc = vecop_list,
4924 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4927 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4928 TCGv_vec a, TCGv_vec b)
4930 TCGv_vec x = tcg_temp_new_vec_matching(t);
4931 tcg_gen_add_vec(vece, x, a, b);
4932 tcg_gen_usadd_vec(vece, t, a, b);
4933 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4934 tcg_gen_or_vec(vece, sat, sat, x);
4935 tcg_temp_free_vec(x);
4938 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4939 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4941 static const TCGOpcode vecop_list[] = {
4942 INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4944 static const GVecGen4 ops[4] = {
4945 { .fniv = gen_uqadd_vec,
4946 .fno = gen_helper_gvec_uqadd_b,
4948 .opt_opc = vecop_list,
4950 { .fniv = gen_uqadd_vec,
4951 .fno = gen_helper_gvec_uqadd_h,
4953 .opt_opc = vecop_list,
4955 { .fniv = gen_uqadd_vec,
4956 .fno = gen_helper_gvec_uqadd_s,
4958 .opt_opc = vecop_list,
4960 { .fniv = gen_uqadd_vec,
4961 .fno = gen_helper_gvec_uqadd_d,
4963 .opt_opc = vecop_list,
4966 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4967 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4970 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4971 TCGv_vec a, TCGv_vec b)
4973 TCGv_vec x = tcg_temp_new_vec_matching(t);
4974 tcg_gen_add_vec(vece, x, a, b);
4975 tcg_gen_ssadd_vec(vece, t, a, b);
4976 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4977 tcg_gen_or_vec(vece, sat, sat, x);
4978 tcg_temp_free_vec(x);
4981 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4982 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4984 static const TCGOpcode vecop_list[] = {
4985 INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4987 static const GVecGen4 ops[4] = {
4988 { .fniv = gen_sqadd_vec,
4989 .fno = gen_helper_gvec_sqadd_b,
4990 .opt_opc = vecop_list,
4993 { .fniv = gen_sqadd_vec,
4994 .fno = gen_helper_gvec_sqadd_h,
4995 .opt_opc = vecop_list,
4998 { .fniv = gen_sqadd_vec,
4999 .fno = gen_helper_gvec_sqadd_s,
5000 .opt_opc = vecop_list,
5003 { .fniv = gen_sqadd_vec,
5004 .fno = gen_helper_gvec_sqadd_d,
5005 .opt_opc = vecop_list,
5009 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5010 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5013 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5014 TCGv_vec a, TCGv_vec b)
5016 TCGv_vec x = tcg_temp_new_vec_matching(t);
5017 tcg_gen_sub_vec(vece, x, a, b);
5018 tcg_gen_ussub_vec(vece, t, a, b);
5019 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5020 tcg_gen_or_vec(vece, sat, sat, x);
5021 tcg_temp_free_vec(x);
5024 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5025 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5027 static const TCGOpcode vecop_list[] = {
5028 INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5030 static const GVecGen4 ops[4] = {
5031 { .fniv = gen_uqsub_vec,
5032 .fno = gen_helper_gvec_uqsub_b,
5033 .opt_opc = vecop_list,
5036 { .fniv = gen_uqsub_vec,
5037 .fno = gen_helper_gvec_uqsub_h,
5038 .opt_opc = vecop_list,
5041 { .fniv = gen_uqsub_vec,
5042 .fno = gen_helper_gvec_uqsub_s,
5043 .opt_opc = vecop_list,
5046 { .fniv = gen_uqsub_vec,
5047 .fno = gen_helper_gvec_uqsub_d,
5048 .opt_opc = vecop_list,
5052 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5053 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5056 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
5057 TCGv_vec a, TCGv_vec b)
5059 TCGv_vec x = tcg_temp_new_vec_matching(t);
5060 tcg_gen_sub_vec(vece, x, a, b);
5061 tcg_gen_sssub_vec(vece, t, a, b);
5062 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
5063 tcg_gen_or_vec(vece, sat, sat, x);
5064 tcg_temp_free_vec(x);
5067 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5068 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5070 static const TCGOpcode vecop_list[] = {
5071 INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
5073 static const GVecGen4 ops[4] = {
5074 { .fniv = gen_sqsub_vec,
5075 .fno = gen_helper_gvec_sqsub_b,
5076 .opt_opc = vecop_list,
5079 { .fniv = gen_sqsub_vec,
5080 .fno = gen_helper_gvec_sqsub_h,
5081 .opt_opc = vecop_list,
5084 { .fniv = gen_sqsub_vec,
5085 .fno = gen_helper_gvec_sqsub_s,
5086 .opt_opc = vecop_list,
5089 { .fniv = gen_sqsub_vec,
5090 .fno = gen_helper_gvec_sqsub_d,
5091 .opt_opc = vecop_list,
5095 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5096 rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5099 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5101 TCGv_i32 t = tcg_temp_new_i32();
5103 tcg_gen_sub_i32(t, a, b);
5104 tcg_gen_sub_i32(d, b, a);
5105 tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
5106 tcg_temp_free_i32(t);
5109 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5111 TCGv_i64 t = tcg_temp_new_i64();
5113 tcg_gen_sub_i64(t, a, b);
5114 tcg_gen_sub_i64(d, b, a);
5115 tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
5116 tcg_temp_free_i64(t);
5119 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5121 TCGv_vec t = tcg_temp_new_vec_matching(d);
5123 tcg_gen_smin_vec(vece, t, a, b);
5124 tcg_gen_smax_vec(vece, d, a, b);
5125 tcg_gen_sub_vec(vece, d, d, t);
5126 tcg_temp_free_vec(t);
5129 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5130 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5132 static const TCGOpcode vecop_list[] = {
5133 INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5135 static const GVecGen3 ops[4] = {
5136 { .fniv = gen_sabd_vec,
5137 .fno = gen_helper_gvec_sabd_b,
5138 .opt_opc = vecop_list,
5140 { .fniv = gen_sabd_vec,
5141 .fno = gen_helper_gvec_sabd_h,
5142 .opt_opc = vecop_list,
5144 { .fni4 = gen_sabd_i32,
5145 .fniv = gen_sabd_vec,
5146 .fno = gen_helper_gvec_sabd_s,
5147 .opt_opc = vecop_list,
5149 { .fni8 = gen_sabd_i64,
5150 .fniv = gen_sabd_vec,
5151 .fno = gen_helper_gvec_sabd_d,
5152 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5153 .opt_opc = vecop_list,
5156 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5159 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5161 TCGv_i32 t = tcg_temp_new_i32();
5163 tcg_gen_sub_i32(t, a, b);
5164 tcg_gen_sub_i32(d, b, a);
5165 tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
5166 tcg_temp_free_i32(t);
5169 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5171 TCGv_i64 t = tcg_temp_new_i64();
5173 tcg_gen_sub_i64(t, a, b);
5174 tcg_gen_sub_i64(d, b, a);
5175 tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
5176 tcg_temp_free_i64(t);
5179 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5181 TCGv_vec t = tcg_temp_new_vec_matching(d);
5183 tcg_gen_umin_vec(vece, t, a, b);
5184 tcg_gen_umax_vec(vece, d, a, b);
5185 tcg_gen_sub_vec(vece, d, d, t);
5186 tcg_temp_free_vec(t);
5189 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5190 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5192 static const TCGOpcode vecop_list[] = {
5193 INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5195 static const GVecGen3 ops[4] = {
5196 { .fniv = gen_uabd_vec,
5197 .fno = gen_helper_gvec_uabd_b,
5198 .opt_opc = vecop_list,
5200 { .fniv = gen_uabd_vec,
5201 .fno = gen_helper_gvec_uabd_h,
5202 .opt_opc = vecop_list,
5204 { .fni4 = gen_uabd_i32,
5205 .fniv = gen_uabd_vec,
5206 .fno = gen_helper_gvec_uabd_s,
5207 .opt_opc = vecop_list,
5209 { .fni8 = gen_uabd_i64,
5210 .fniv = gen_uabd_vec,
5211 .fno = gen_helper_gvec_uabd_d,
5212 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5213 .opt_opc = vecop_list,
5216 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5219 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5221 TCGv_i32 t = tcg_temp_new_i32();
5222 gen_sabd_i32(t, a, b);
5223 tcg_gen_add_i32(d, d, t);
5224 tcg_temp_free_i32(t);
5227 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5229 TCGv_i64 t = tcg_temp_new_i64();
5230 gen_sabd_i64(t, a, b);
5231 tcg_gen_add_i64(d, d, t);
5232 tcg_temp_free_i64(t);
5235 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5237 TCGv_vec t = tcg_temp_new_vec_matching(d);
5238 gen_sabd_vec(vece, t, a, b);
5239 tcg_gen_add_vec(vece, d, d, t);
5240 tcg_temp_free_vec(t);
5243 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5244 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5246 static const TCGOpcode vecop_list[] = {
5247 INDEX_op_sub_vec, INDEX_op_add_vec,
5248 INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5250 static const GVecGen3 ops[4] = {
5251 { .fniv = gen_saba_vec,
5252 .fno = gen_helper_gvec_saba_b,
5253 .opt_opc = vecop_list,
5256 { .fniv = gen_saba_vec,
5257 .fno = gen_helper_gvec_saba_h,
5258 .opt_opc = vecop_list,
5261 { .fni4 = gen_saba_i32,
5262 .fniv = gen_saba_vec,
5263 .fno = gen_helper_gvec_saba_s,
5264 .opt_opc = vecop_list,
5267 { .fni8 = gen_saba_i64,
5268 .fniv = gen_saba_vec,
5269 .fno = gen_helper_gvec_saba_d,
5270 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5271 .opt_opc = vecop_list,
5275 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5278 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5280 TCGv_i32 t = tcg_temp_new_i32();
5281 gen_uabd_i32(t, a, b);
5282 tcg_gen_add_i32(d, d, t);
5283 tcg_temp_free_i32(t);
5286 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5288 TCGv_i64 t = tcg_temp_new_i64();
5289 gen_uabd_i64(t, a, b);
5290 tcg_gen_add_i64(d, d, t);
5291 tcg_temp_free_i64(t);
5294 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5296 TCGv_vec t = tcg_temp_new_vec_matching(d);
5297 gen_uabd_vec(vece, t, a, b);
5298 tcg_gen_add_vec(vece, d, d, t);
5299 tcg_temp_free_vec(t);
5302 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5303 uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5305 static const TCGOpcode vecop_list[] = {
5306 INDEX_op_sub_vec, INDEX_op_add_vec,
5307 INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5309 static const GVecGen3 ops[4] = {
5310 { .fniv = gen_uaba_vec,
5311 .fno = gen_helper_gvec_uaba_b,
5312 .opt_opc = vecop_list,
5315 { .fniv = gen_uaba_vec,
5316 .fno = gen_helper_gvec_uaba_h,
5317 .opt_opc = vecop_list,
5320 { .fni4 = gen_uaba_i32,
5321 .fniv = gen_uaba_vec,
5322 .fno = gen_helper_gvec_uaba_s,
5323 .opt_opc = vecop_list,
5326 { .fni8 = gen_uaba_i64,
5327 .fniv = gen_uaba_vec,
5328 .fno = gen_helper_gvec_uaba_d,
5329 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5330 .opt_opc = vecop_list,
5334 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5337 /* Translate a NEON data processing instruction. Return nonzero if the
5338 instruction is invalid.
5339 We process data in a mixture of 32-bit and 64-bit chunks.
5340 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
5342 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5346 int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
5355 TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5356 TCGv_ptr ptr1, ptr2;
5359 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
5363 /* FIXME: this access check should not take precedence over UNDEF
5364 * for invalid encodings; we will generate incorrect syndrome information
5365 * for attempts to execute invalid vfp/neon encodings with FP disabled.
5367 if (s->fp_excp_el) {
5368 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
5369 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5373 if (!s->vfp_enabled)
5375 q = (insn & (1 << 6)) != 0;
5376 u = (insn >> 24) & 1;
5377 VFP_DREG_D(rd, insn);
5378 VFP_DREG_N(rn, insn);
5379 VFP_DREG_M(rm, insn);
5380 size = (insn >> 20) & 3;
5381 vec_size = q ? 16 : 8;
5382 rd_ofs = neon_reg_offset(rd, 0);
5383 rn_ofs = neon_reg_offset(rn, 0);
5384 rm_ofs = neon_reg_offset(rm, 0);
5386 if ((insn & (1 << 23)) == 0) {
5387 /* Three register same length. */
5388 op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
5389 /* Catch invalid op and bad size combinations: UNDEF */
5390 if ((neon_3r_sizes[op] & (1 << size)) == 0) {
5393 /* All insns of this form UNDEF for either this condition or the
5394 * superset of cases "Q==1"; we catch the latter later.
5396 if (q && ((rd | rn | rm) & 1)) {
5400 case NEON_3R_VFM_VQRDMLSH:
5408 /* VQRDMLSH : handled by decodetree */
5411 case NEON_3R_VADD_VSUB:
5415 case NEON_3R_VTST_VCEQ:
5425 case NEON_3R_VRHADD:
5431 case NEON_3R_VQRSHL:
5434 case NEON_3R_VPADD_VQRDMLAH:
5435 case NEON_3R_VQDMULH_VQRDMULH:
5436 /* Already handled by decodetree */
5441 /* 64-bit element instructions: handled by decodetree */
5446 case NEON_3R_FLOAT_ARITH:
5447 pairwise = (u && size < 2); /* if VPADD (float) */
5449 return 1; /* handled by decodetree */
5452 case NEON_3R_FLOAT_MINMAX:
5453 pairwise = u; /* if VPMIN/VPMAX (float) */
5455 case NEON_3R_FLOAT_CMP:
5457 /* no encoding for U=0 C=1x */
5461 case NEON_3R_FLOAT_ACMP:
5466 case NEON_3R_FLOAT_MISC:
5467 /* VMAXNM/VMINNM in ARMv8 */
5468 if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5472 case NEON_3R_VFM_VQRDMLSH:
5473 if (!dc_isar_feature(aa32_simdfmac, s)) {
5481 if (pairwise && q) {
5482 /* All the pairwise insns UNDEF if Q is set */
5486 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5491 tmp = neon_load_reg(rn, 0);
5492 tmp2 = neon_load_reg(rn, 1);
5494 tmp = neon_load_reg(rm, 0);
5495 tmp2 = neon_load_reg(rm, 1);
5499 tmp = neon_load_reg(rn, pass);
5500 tmp2 = neon_load_reg(rm, pass);
5503 case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
5505 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5506 switch ((u << 2) | size) {
5508 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5513 tcg_temp_free_ptr(fpstatus);
5516 case NEON_3R_FLOAT_MULTIPLY:
5518 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5519 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5521 tcg_temp_free_i32(tmp2);
5522 tmp2 = neon_load_reg(rd, pass);
5524 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5526 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5529 tcg_temp_free_ptr(fpstatus);
5532 case NEON_3R_FLOAT_CMP:
5534 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5536 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5539 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5541 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5544 tcg_temp_free_ptr(fpstatus);
5547 case NEON_3R_FLOAT_ACMP:
5549 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5551 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5553 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5555 tcg_temp_free_ptr(fpstatus);
5558 case NEON_3R_FLOAT_MINMAX:
5560 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5562 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5564 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5566 tcg_temp_free_ptr(fpstatus);
5569 case NEON_3R_FLOAT_MISC:
5572 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5574 gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5576 gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5578 tcg_temp_free_ptr(fpstatus);
5581 gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5583 gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5587 case NEON_3R_VFM_VQRDMLSH:
5589 /* VFMA, VFMS: fused multiply-add */
5590 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5591 TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5594 gen_helper_vfp_negs(tmp, tmp);
5596 gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5597 tcg_temp_free_i32(tmp3);
5598 tcg_temp_free_ptr(fpstatus);
5604 tcg_temp_free_i32(tmp2);
5606 /* Save the result. For elementwise operations we can put it
5607 straight into the destination register. For pairwise operations
5608 we have to be careful to avoid clobbering the source operands. */
5609 if (pairwise && rd == rm) {
5610 neon_store_scratch(pass, tmp);
5612 neon_store_reg(rd, pass, tmp);
5616 if (pairwise && rd == rm) {
5617 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5618 tmp = neon_load_scratch(pass);
5619 neon_store_reg(rd, pass, tmp);
5622 /* End of 3 register same size operations. */
5623 } else if (insn & (1 << 4)) {
5624 if ((insn & 0x00380080) != 0) {
5625 /* Two registers and shift. */
5626 op = (insn >> 8) & 0xf;
5627 if (insn & (1 << 7)) {
5635 while ((insn & (1 << (size + 19))) == 0)
5638 shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5640 /* Shift by immediate:
5641 VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
5642 if (q && ((rd | rm) & 1)) {
5645 if (!u && (op == 4 || op == 6)) {
5648 /* Right shifts are encoded as N - shift, where N is the
5649 element size in bits. */
5651 shift = shift - (1 << (size + 3));
5656 /* Right shift comes here negative. */
5658 /* Shifts larger than the element size are architecturally
5659 * valid. Unsigned results in all zeros; signed results
5663 tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5664 MIN(shift, (8 << size) - 1),
5665 vec_size, vec_size);
5666 } else if (shift >= 8 << size) {
5667 tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
5670 tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5671 vec_size, vec_size);
5676 /* Right shift comes here negative. */
5679 gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
5680 vec_size, vec_size);
5682 gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
5683 vec_size, vec_size);
5688 /* Right shift comes here negative. */
5691 gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
5692 vec_size, vec_size);
5694 gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
5695 vec_size, vec_size);
5700 /* Right shift comes here negative. */
5703 gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
5704 vec_size, vec_size);
5706 gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
5707 vec_size, vec_size);
5715 /* Right shift comes here negative. */
5717 gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
5718 vec_size, vec_size);
5721 case 5: /* VSHL, VSLI */
5723 gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
5724 vec_size, vec_size);
5726 tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5727 vec_size, vec_size);
5738 /* To avoid excessive duplication of ops we implement shift
5739 * by immediate using the variable shift operations.
5741 imm = dup_const(size, shift);
5743 for (pass = 0; pass < count; pass++) {
5745 neon_load_reg64(cpu_V0, rm + pass);
5746 tcg_gen_movi_i64(cpu_V1, imm);
5748 case 6: /* VQSHLU */
5749 gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5754 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5757 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5762 g_assert_not_reached();
5764 neon_store_reg64(cpu_V0, rd + pass);
5765 } else { /* size < 3 */
5766 /* Operands in T0 and T1. */
5767 tmp = neon_load_reg(rm, pass);
5768 tmp2 = tcg_temp_new_i32();
5769 tcg_gen_movi_i32(tmp2, imm);
5771 case 6: /* VQSHLU */
5774 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5778 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5782 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5790 GEN_NEON_INTEGER_OP_ENV(qshl);
5793 g_assert_not_reached();
5795 tcg_temp_free_i32(tmp2);
5796 neon_store_reg(rd, pass, tmp);
5799 } else if (op < 10) {
5800 /* Shift by immediate and narrow:
5801 VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
5802 int input_unsigned = (op == 8) ? !u : u;
5806 shift = shift - (1 << (size + 3));
5809 tmp64 = tcg_const_i64(shift);
5810 neon_load_reg64(cpu_V0, rm);
5811 neon_load_reg64(cpu_V1, rm + 1);
5812 for (pass = 0; pass < 2; pass++) {
5820 if (input_unsigned) {
5821 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5823 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5826 if (input_unsigned) {
5827 gen_ushl_i64(cpu_V0, in, tmp64);
5829 gen_sshl_i64(cpu_V0, in, tmp64);
5832 tmp = tcg_temp_new_i32();
5833 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5834 neon_store_reg(rd, pass, tmp);
5836 tcg_temp_free_i64(tmp64);
5839 imm = (uint16_t)shift;
5843 imm = (uint32_t)shift;
5845 tmp2 = tcg_const_i32(imm);
5846 tmp4 = neon_load_reg(rm + 1, 0);
5847 tmp5 = neon_load_reg(rm + 1, 1);
5848 for (pass = 0; pass < 2; pass++) {
5850 tmp = neon_load_reg(rm, 0);
5854 gen_neon_shift_narrow(size, tmp, tmp2, q,
5857 tmp3 = neon_load_reg(rm, 1);
5861 gen_neon_shift_narrow(size, tmp3, tmp2, q,
5863 tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5864 tcg_temp_free_i32(tmp);
5865 tcg_temp_free_i32(tmp3);
5866 tmp = tcg_temp_new_i32();
5867 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5868 neon_store_reg(rd, pass, tmp);
5870 tcg_temp_free_i32(tmp2);
5872 } else if (op == 10) {
5874 if (q || (rd & 1)) {
5877 tmp = neon_load_reg(rm, 0);
5878 tmp2 = neon_load_reg(rm, 1);
5879 for (pass = 0; pass < 2; pass++) {
5883 gen_neon_widen(cpu_V0, tmp, size, u);
5886 /* The shift is less than the width of the source
5887 type, so we can just shift the whole register. */
5888 tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5889 /* Widen the result of shift: we need to clear
5890 * the potential overflow bits resulting from
5891 * left bits of the narrow input appearing as
5892 * right bits of left the neighbour narrow
5894 if (size < 2 || !u) {
5897 imm = (0xffu >> (8 - shift));
5899 } else if (size == 1) {
5900 imm = 0xffff >> (16 - shift);
5903 imm = 0xffffffff >> (32 - shift);
5906 imm64 = imm | (((uint64_t)imm) << 32);
5910 tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5913 neon_store_reg64(cpu_V0, rd + pass);
5915 } else if (op >= 14) {
5916 /* VCVT fixed-point. */
5919 VFPGenFixPointFn *fn;
5921 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5927 fn = gen_helper_vfp_ultos;
5929 fn = gen_helper_vfp_sltos;
5933 fn = gen_helper_vfp_touls_round_to_zero;
5935 fn = gen_helper_vfp_tosls_round_to_zero;
5939 /* We have already masked out the must-be-1 top bit of imm6,
5940 * hence this 32-shift where the ARM ARM has 64-imm6.
5943 fpst = get_fpstatus_ptr(1);
5944 shiftv = tcg_const_i32(shift);
5945 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5946 TCGv_i32 tmpf = neon_load_reg(rm, pass);
5947 fn(tmpf, tmpf, shiftv, fpst);
5948 neon_store_reg(rd, pass, tmpf);
5950 tcg_temp_free_ptr(fpst);
5951 tcg_temp_free_i32(shiftv);
5955 } else { /* (insn & 0x00380080) == 0 */
5956 int invert, reg_ofs, vec_size;
5958 if (q && (rd & 1)) {
5962 op = (insn >> 8) & 0xf;
5963 /* One register and immediate. */
5964 imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5965 invert = (insn & (1 << 5)) != 0;
5966 /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5967 * We choose to not special-case this and will behave as if a
5968 * valid constant encoding of 0 had been given.
5987 imm = (imm << 8) | (imm << 24);
5990 imm = (imm << 8) | 0xff;
5993 imm = (imm << 16) | 0xffff;
5996 imm |= (imm << 8) | (imm << 16) | (imm << 24);
6005 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
6006 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
6013 reg_ofs = neon_reg_offset(rd, 0);
6014 vec_size = q ? 16 : 8;
6016 if (op & 1 && op < 12) {
6018 /* The immediate value has already been inverted,
6019 * so BIC becomes AND.
6021 tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
6022 vec_size, vec_size);
6024 tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
6025 vec_size, vec_size);
6029 if (op == 14 && invert) {
6030 TCGv_i64 t64 = tcg_temp_new_i64();
6032 for (pass = 0; pass <= q; ++pass) {
6036 for (n = 0; n < 8; n++) {
6037 if (imm & (1 << (n + pass * 8))) {
6038 val |= 0xffull << (n * 8);
6041 tcg_gen_movi_i64(t64, val);
6042 neon_store_reg64(t64, rd + pass);
6044 tcg_temp_free_i64(t64);
6046 tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
6051 } else { /* (insn & 0x00800010 == 0x00800000) */
6053 op = (insn >> 8) & 0xf;
6054 if ((insn & (1 << 6)) == 0) {
6055 /* Three registers of different lengths. */
6059 /* undefreq: bit 0 : UNDEF if size == 0
6060 * bit 1 : UNDEF if size == 1
6061 * bit 2 : UNDEF if size == 2
6062 * bit 3 : UNDEF if U == 1
6063 * Note that [2:0] set implies 'always UNDEF'
6066 /* prewiden, src1_wide, src2_wide, undefreq */
6067 static const int neon_3reg_wide[16][4] = {
6068 {1, 0, 0, 0}, /* VADDL */
6069 {1, 1, 0, 0}, /* VADDW */
6070 {1, 0, 0, 0}, /* VSUBL */
6071 {1, 1, 0, 0}, /* VSUBW */
6072 {0, 1, 1, 0}, /* VADDHN */
6073 {0, 0, 0, 0}, /* VABAL */
6074 {0, 1, 1, 0}, /* VSUBHN */
6075 {0, 0, 0, 0}, /* VABDL */
6076 {0, 0, 0, 0}, /* VMLAL */
6077 {0, 0, 0, 9}, /* VQDMLAL */
6078 {0, 0, 0, 0}, /* VMLSL */
6079 {0, 0, 0, 9}, /* VQDMLSL */
6080 {0, 0, 0, 0}, /* Integer VMULL */
6081 {0, 0, 0, 9}, /* VQDMULL */
6082 {0, 0, 0, 0xa}, /* Polynomial VMULL */
6083 {0, 0, 0, 7}, /* Reserved: always UNDEF */
6086 prewiden = neon_3reg_wide[op][0];
6087 src1_wide = neon_3reg_wide[op][1];
6088 src2_wide = neon_3reg_wide[op][2];
6089 undefreq = neon_3reg_wide[op][3];
6091 if ((undefreq & (1 << size)) ||
6092 ((undefreq & 8) && u)) {
6095 if ((src1_wide && (rn & 1)) ||
6096 (src2_wide && (rm & 1)) ||
6097 (!src2_wide && (rd & 1))) {
6101 /* Handle polynomial VMULL in a single pass. */
6105 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6106 0, gen_helper_neon_pmull_h);
6109 if (!dc_isar_feature(aa32_pmull, s)) {
6112 tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
6113 0, gen_helper_gvec_pmull_q);
6118 /* Avoid overlapping operands. Wide source operands are
6119 always aligned so will never overlap with wide
6120 destinations in problematic ways. */
6121 if (rd == rm && !src2_wide) {
6122 tmp = neon_load_reg(rm, 1);
6123 neon_store_scratch(2, tmp);
6124 } else if (rd == rn && !src1_wide) {
6125 tmp = neon_load_reg(rn, 1);
6126 neon_store_scratch(2, tmp);
6129 for (pass = 0; pass < 2; pass++) {
6131 neon_load_reg64(cpu_V0, rn + pass);
6134 if (pass == 1 && rd == rn) {
6135 tmp = neon_load_scratch(2);
6137 tmp = neon_load_reg(rn, pass);
6140 gen_neon_widen(cpu_V0, tmp, size, u);
6144 neon_load_reg64(cpu_V1, rm + pass);
6147 if (pass == 1 && rd == rm) {
6148 tmp2 = neon_load_scratch(2);
6150 tmp2 = neon_load_reg(rm, pass);
6153 gen_neon_widen(cpu_V1, tmp2, size, u);
6157 case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6158 gen_neon_addl(size);
6160 case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6161 gen_neon_subl(size);
6163 case 5: case 7: /* VABAL, VABDL */
6164 switch ((size << 1) | u) {
6166 gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6169 gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6172 gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6175 gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6178 gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6181 gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6185 tcg_temp_free_i32(tmp2);
6186 tcg_temp_free_i32(tmp);
6188 case 8: case 9: case 10: case 11: case 12: case 13:
6189 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6190 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6192 default: /* 15 is RESERVED: caught earlier */
6197 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6198 neon_store_reg64(cpu_V0, rd + pass);
6199 } else if (op == 5 || (op >= 8 && op <= 11)) {
6201 neon_load_reg64(cpu_V1, rd + pass);
6203 case 10: /* VMLSL */
6204 gen_neon_negl(cpu_V0, size);
6206 case 5: case 8: /* VABAL, VMLAL */
6207 gen_neon_addl(size);
6209 case 9: case 11: /* VQDMLAL, VQDMLSL */
6210 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6212 gen_neon_negl(cpu_V0, size);
6214 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6219 neon_store_reg64(cpu_V0, rd + pass);
6220 } else if (op == 4 || op == 6) {
6221 /* Narrowing operation. */
6222 tmp = tcg_temp_new_i32();
6226 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6229 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6232 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6239 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6242 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6245 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6246 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
6254 neon_store_reg(rd, 0, tmp3);
6255 neon_store_reg(rd, 1, tmp);
6258 /* Write back the result. */
6259 neon_store_reg64(cpu_V0, rd + pass);
6263 /* Two registers and a scalar. NB that for ops of this form
6264 * the ARM ARM labels bit 24 as Q, but it is in our variable
6271 case 1: /* Float VMLA scalar */
6272 case 5: /* Floating point VMLS scalar */
6273 case 9: /* Floating point VMUL scalar */
6278 case 0: /* Integer VMLA scalar */
6279 case 4: /* Integer VMLS scalar */
6280 case 8: /* Integer VMUL scalar */
6281 case 12: /* VQDMULH scalar */
6282 case 13: /* VQRDMULH scalar */
6283 if (u && ((rd | rn) & 1)) {
6286 tmp = neon_get_scalar(size, rm);
6287 neon_store_scratch(0, tmp);
6288 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6289 tmp = neon_load_scratch(0);
6290 tmp2 = neon_load_reg(rn, pass);
6293 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6295 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6297 } else if (op == 13) {
6299 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6301 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6303 } else if (op & 1) {
6304 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6305 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6306 tcg_temp_free_ptr(fpstatus);
6309 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6310 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6311 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6315 tcg_temp_free_i32(tmp2);
6318 tmp2 = neon_load_reg(rd, pass);
6321 gen_neon_add(size, tmp, tmp2);
6325 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6326 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6327 tcg_temp_free_ptr(fpstatus);
6331 gen_neon_rsb(size, tmp, tmp2);
6335 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6336 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6337 tcg_temp_free_ptr(fpstatus);
6343 tcg_temp_free_i32(tmp2);
6345 neon_store_reg(rd, pass, tmp);
6348 case 3: /* VQDMLAL scalar */
6349 case 7: /* VQDMLSL scalar */
6350 case 11: /* VQDMULL scalar */
6355 case 2: /* VMLAL sclar */
6356 case 6: /* VMLSL scalar */
6357 case 10: /* VMULL scalar */
6361 tmp2 = neon_get_scalar(size, rm);
6362 /* We need a copy of tmp2 because gen_neon_mull
6363 * deletes it during pass 0. */
6364 tmp4 = tcg_temp_new_i32();
6365 tcg_gen_mov_i32(tmp4, tmp2);
6366 tmp3 = neon_load_reg(rn, 1);
6368 for (pass = 0; pass < 2; pass++) {
6370 tmp = neon_load_reg(rn, 0);
6375 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6377 neon_load_reg64(cpu_V1, rd + pass);
6381 gen_neon_negl(cpu_V0, size);
6384 gen_neon_addl(size);
6387 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6389 gen_neon_negl(cpu_V0, size);
6391 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6397 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6402 neon_store_reg64(cpu_V0, rd + pass);
6405 case 14: /* VQRDMLAH scalar */
6406 case 15: /* VQRDMLSH scalar */
6408 NeonGenThreeOpEnvFn *fn;
6410 if (!dc_isar_feature(aa32_rdm, s)) {
6413 if (u && ((rd | rn) & 1)) {
6418 fn = gen_helper_neon_qrdmlah_s16;
6420 fn = gen_helper_neon_qrdmlah_s32;
6424 fn = gen_helper_neon_qrdmlsh_s16;
6426 fn = gen_helper_neon_qrdmlsh_s32;
6430 tmp2 = neon_get_scalar(size, rm);
6431 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6432 tmp = neon_load_reg(rn, pass);
6433 tmp3 = neon_load_reg(rd, pass);
6434 fn(tmp, cpu_env, tmp, tmp2, tmp3);
6435 tcg_temp_free_i32(tmp3);
6436 neon_store_reg(rd, pass, tmp);
6438 tcg_temp_free_i32(tmp2);
6442 g_assert_not_reached();
6445 } else { /* size == 3 */
6448 imm = (insn >> 8) & 0xf;
6453 if (q && ((rd | rn | rm) & 1)) {
6458 neon_load_reg64(cpu_V0, rn);
6460 neon_load_reg64(cpu_V1, rn + 1);
6462 } else if (imm == 8) {
6463 neon_load_reg64(cpu_V0, rn + 1);
6465 neon_load_reg64(cpu_V1, rm);
6468 tmp64 = tcg_temp_new_i64();
6470 neon_load_reg64(cpu_V0, rn);
6471 neon_load_reg64(tmp64, rn + 1);
6473 neon_load_reg64(cpu_V0, rn + 1);
6474 neon_load_reg64(tmp64, rm);
6476 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6477 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6478 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6480 neon_load_reg64(cpu_V1, rm);
6482 neon_load_reg64(cpu_V1, rm + 1);
6485 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6486 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6487 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6488 tcg_temp_free_i64(tmp64);
6491 neon_load_reg64(cpu_V0, rn);
6492 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6493 neon_load_reg64(cpu_V1, rm);
6494 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6495 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6497 neon_store_reg64(cpu_V0, rd);
6499 neon_store_reg64(cpu_V1, rd + 1);
6501 } else if ((insn & (1 << 11)) == 0) {
6502 /* Two register misc. */
6503 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6504 size = (insn >> 18) & 3;
6505 /* UNDEF for unknown op values and bad op-size combinations */
6506 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6509 if (neon_2rm_is_v8_op(op) &&
6510 !arm_dc_feature(s, ARM_FEATURE_V8)) {
6513 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6514 q && ((rm | rd) & 1)) {
6518 case NEON_2RM_VREV64:
6519 for (pass = 0; pass < (q ? 2 : 1); pass++) {
6520 tmp = neon_load_reg(rm, pass * 2);
6521 tmp2 = neon_load_reg(rm, pass * 2 + 1);
6523 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6524 case 1: gen_swap_half(tmp); break;
6525 case 2: /* no-op */ break;
6528 neon_store_reg(rd, pass * 2 + 1, tmp);
6530 neon_store_reg(rd, pass * 2, tmp2);
6533 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6534 case 1: gen_swap_half(tmp2); break;
6537 neon_store_reg(rd, pass * 2, tmp2);
6541 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6542 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6543 for (pass = 0; pass < q + 1; pass++) {
6544 tmp = neon_load_reg(rm, pass * 2);
6545 gen_neon_widen(cpu_V0, tmp, size, op & 1);
6546 tmp = neon_load_reg(rm, pass * 2 + 1);
6547 gen_neon_widen(cpu_V1, tmp, size, op & 1);
6549 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6550 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6551 case 2: tcg_gen_add_i64(CPU_V001); break;
6554 if (op >= NEON_2RM_VPADAL) {
6556 neon_load_reg64(cpu_V1, rd + pass);
6557 gen_neon_addl(size);
6559 neon_store_reg64(cpu_V0, rd + pass);
6565 for (n = 0; n < (q ? 4 : 2); n += 2) {
6566 tmp = neon_load_reg(rm, n);
6567 tmp2 = neon_load_reg(rd, n + 1);
6568 neon_store_reg(rm, n, tmp2);
6569 neon_store_reg(rd, n + 1, tmp);
6576 if (gen_neon_unzip(rd, rm, size, q)) {
6581 if (gen_neon_zip(rd, rm, size, q)) {
6585 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6586 /* also VQMOVUN; op field and mnemonics don't line up */
6591 for (pass = 0; pass < 2; pass++) {
6592 neon_load_reg64(cpu_V0, rm + pass);
6593 tmp = tcg_temp_new_i32();
6594 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6599 neon_store_reg(rd, 0, tmp2);
6600 neon_store_reg(rd, 1, tmp);
6604 case NEON_2RM_VSHLL:
6605 if (q || (rd & 1)) {
6608 tmp = neon_load_reg(rm, 0);
6609 tmp2 = neon_load_reg(rm, 1);
6610 for (pass = 0; pass < 2; pass++) {
6613 gen_neon_widen(cpu_V0, tmp, size, 1);
6614 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6615 neon_store_reg64(cpu_V0, rd + pass);
6618 case NEON_2RM_VCVT_F16_F32:
6623 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6627 fpst = get_fpstatus_ptr(true);
6628 ahp = get_ahp_flag();
6629 tmp = neon_load_reg(rm, 0);
6630 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6631 tmp2 = neon_load_reg(rm, 1);
6632 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6633 tcg_gen_shli_i32(tmp2, tmp2, 16);
6634 tcg_gen_or_i32(tmp2, tmp2, tmp);
6635 tcg_temp_free_i32(tmp);
6636 tmp = neon_load_reg(rm, 2);
6637 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6638 tmp3 = neon_load_reg(rm, 3);
6639 neon_store_reg(rd, 0, tmp2);
6640 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6641 tcg_gen_shli_i32(tmp3, tmp3, 16);
6642 tcg_gen_or_i32(tmp3, tmp3, tmp);
6643 neon_store_reg(rd, 1, tmp3);
6644 tcg_temp_free_i32(tmp);
6645 tcg_temp_free_i32(ahp);
6646 tcg_temp_free_ptr(fpst);
6649 case NEON_2RM_VCVT_F32_F16:
6653 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6657 fpst = get_fpstatus_ptr(true);
6658 ahp = get_ahp_flag();
6659 tmp3 = tcg_temp_new_i32();
6660 tmp = neon_load_reg(rm, 0);
6661 tmp2 = neon_load_reg(rm, 1);
6662 tcg_gen_ext16u_i32(tmp3, tmp);
6663 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6664 neon_store_reg(rd, 0, tmp3);
6665 tcg_gen_shri_i32(tmp, tmp, 16);
6666 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6667 neon_store_reg(rd, 1, tmp);
6668 tmp3 = tcg_temp_new_i32();
6669 tcg_gen_ext16u_i32(tmp3, tmp2);
6670 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6671 neon_store_reg(rd, 2, tmp3);
6672 tcg_gen_shri_i32(tmp2, tmp2, 16);
6673 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6674 neon_store_reg(rd, 3, tmp2);
6675 tcg_temp_free_i32(ahp);
6676 tcg_temp_free_ptr(fpst);
6679 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6680 if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6683 ptr1 = vfp_reg_ptr(true, rd);
6684 ptr2 = vfp_reg_ptr(true, rm);
6686 /* Bit 6 is the lowest opcode bit; it distinguishes between
6687 * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6689 tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6691 if (op == NEON_2RM_AESE) {
6692 gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6694 gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6696 tcg_temp_free_ptr(ptr1);
6697 tcg_temp_free_ptr(ptr2);
6698 tcg_temp_free_i32(tmp3);
6700 case NEON_2RM_SHA1H:
6701 if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6704 ptr1 = vfp_reg_ptr(true, rd);
6705 ptr2 = vfp_reg_ptr(true, rm);
6707 gen_helper_crypto_sha1h(ptr1, ptr2);
6709 tcg_temp_free_ptr(ptr1);
6710 tcg_temp_free_ptr(ptr2);
6712 case NEON_2RM_SHA1SU1:
6713 if ((rm | rd) & 1) {
6716 /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6718 if (!dc_isar_feature(aa32_sha2, s)) {
6721 } else if (!dc_isar_feature(aa32_sha1, s)) {
6724 ptr1 = vfp_reg_ptr(true, rd);
6725 ptr2 = vfp_reg_ptr(true, rm);
6727 gen_helper_crypto_sha256su0(ptr1, ptr2);
6729 gen_helper_crypto_sha1su1(ptr1, ptr2);
6731 tcg_temp_free_ptr(ptr1);
6732 tcg_temp_free_ptr(ptr2);
6736 tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6739 tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6742 tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6745 case NEON_2RM_VCEQ0:
6746 gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6748 case NEON_2RM_VCGT0:
6749 gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6751 case NEON_2RM_VCLE0:
6752 gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6754 case NEON_2RM_VCGE0:
6755 gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6757 case NEON_2RM_VCLT0:
6758 gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6763 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6764 tmp = neon_load_reg(rm, pass);
6766 case NEON_2RM_VREV32:
6768 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6769 case 1: gen_swap_half(tmp); break;
6773 case NEON_2RM_VREV16:
6774 gen_rev16(tmp, tmp);
6778 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6779 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6780 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6786 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6787 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6788 case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6793 gen_helper_neon_cnt_u8(tmp, tmp);
6795 case NEON_2RM_VQABS:
6798 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6801 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6804 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6809 case NEON_2RM_VQNEG:
6812 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6815 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6818 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6823 case NEON_2RM_VCGT0_F:
6825 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6826 tmp2 = tcg_const_i32(0);
6827 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6828 tcg_temp_free_i32(tmp2);
6829 tcg_temp_free_ptr(fpstatus);
6832 case NEON_2RM_VCGE0_F:
6834 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6835 tmp2 = tcg_const_i32(0);
6836 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6837 tcg_temp_free_i32(tmp2);
6838 tcg_temp_free_ptr(fpstatus);
6841 case NEON_2RM_VCEQ0_F:
6843 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6844 tmp2 = tcg_const_i32(0);
6845 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6846 tcg_temp_free_i32(tmp2);
6847 tcg_temp_free_ptr(fpstatus);
6850 case NEON_2RM_VCLE0_F:
6852 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6853 tmp2 = tcg_const_i32(0);
6854 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6855 tcg_temp_free_i32(tmp2);
6856 tcg_temp_free_ptr(fpstatus);
6859 case NEON_2RM_VCLT0_F:
6861 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6862 tmp2 = tcg_const_i32(0);
6863 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6864 tcg_temp_free_i32(tmp2);
6865 tcg_temp_free_ptr(fpstatus);
6868 case NEON_2RM_VABS_F:
6869 gen_helper_vfp_abss(tmp, tmp);
6871 case NEON_2RM_VNEG_F:
6872 gen_helper_vfp_negs(tmp, tmp);
6875 tmp2 = neon_load_reg(rd, pass);
6876 neon_store_reg(rm, pass, tmp2);
6879 tmp2 = neon_load_reg(rd, pass);
6881 case 0: gen_neon_trn_u8(tmp, tmp2); break;
6882 case 1: gen_neon_trn_u16(tmp, tmp2); break;
6885 neon_store_reg(rm, pass, tmp2);
6887 case NEON_2RM_VRINTN:
6888 case NEON_2RM_VRINTA:
6889 case NEON_2RM_VRINTM:
6890 case NEON_2RM_VRINTP:
6891 case NEON_2RM_VRINTZ:
6894 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6897 if (op == NEON_2RM_VRINTZ) {
6898 rmode = FPROUNDING_ZERO;
6900 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6903 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6904 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6906 gen_helper_rints(tmp, tmp, fpstatus);
6907 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6909 tcg_temp_free_ptr(fpstatus);
6910 tcg_temp_free_i32(tcg_rmode);
6913 case NEON_2RM_VRINTX:
6915 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6916 gen_helper_rints_exact(tmp, tmp, fpstatus);
6917 tcg_temp_free_ptr(fpstatus);
6920 case NEON_2RM_VCVTAU:
6921 case NEON_2RM_VCVTAS:
6922 case NEON_2RM_VCVTNU:
6923 case NEON_2RM_VCVTNS:
6924 case NEON_2RM_VCVTPU:
6925 case NEON_2RM_VCVTPS:
6926 case NEON_2RM_VCVTMU:
6927 case NEON_2RM_VCVTMS:
6929 bool is_signed = !extract32(insn, 7, 1);
6930 TCGv_ptr fpst = get_fpstatus_ptr(1);
6931 TCGv_i32 tcg_rmode, tcg_shift;
6932 int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6934 tcg_shift = tcg_const_i32(0);
6935 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6936 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6940 gen_helper_vfp_tosls(tmp, tmp,
6943 gen_helper_vfp_touls(tmp, tmp,
6947 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6949 tcg_temp_free_i32(tcg_rmode);
6950 tcg_temp_free_i32(tcg_shift);
6951 tcg_temp_free_ptr(fpst);
6954 case NEON_2RM_VRECPE:
6955 gen_helper_recpe_u32(tmp, tmp);
6957 case NEON_2RM_VRSQRTE:
6958 gen_helper_rsqrte_u32(tmp, tmp);
6960 case NEON_2RM_VRECPE_F:
6962 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6963 gen_helper_recpe_f32(tmp, tmp, fpstatus);
6964 tcg_temp_free_ptr(fpstatus);
6967 case NEON_2RM_VRSQRTE_F:
6969 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6970 gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6971 tcg_temp_free_ptr(fpstatus);
6974 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6976 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6977 gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6978 tcg_temp_free_ptr(fpstatus);
6981 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6983 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6984 gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6985 tcg_temp_free_ptr(fpstatus);
6988 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6990 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6991 gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6992 tcg_temp_free_ptr(fpstatus);
6995 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6997 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6998 gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6999 tcg_temp_free_ptr(fpstatus);
7003 /* Reserved op values were caught by the
7004 * neon_2rm_sizes[] check earlier.
7008 neon_store_reg(rd, pass, tmp);
7012 } else if ((insn & (1 << 10)) == 0) {
7014 int n = ((insn >> 8) & 3) + 1;
7015 if ((rn + n) > 32) {
7016 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
7017 * helper function running off the end of the register file.
7022 if (insn & (1 << 6)) {
7023 tmp = neon_load_reg(rd, 0);
7025 tmp = tcg_temp_new_i32();
7026 tcg_gen_movi_i32(tmp, 0);
7028 tmp2 = neon_load_reg(rm, 0);
7029 ptr1 = vfp_reg_ptr(true, rn);
7030 tmp5 = tcg_const_i32(n);
7031 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
7032 tcg_temp_free_i32(tmp);
7033 if (insn & (1 << 6)) {
7034 tmp = neon_load_reg(rd, 1);
7036 tmp = tcg_temp_new_i32();
7037 tcg_gen_movi_i32(tmp, 0);
7039 tmp3 = neon_load_reg(rm, 1);
7040 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
7041 tcg_temp_free_i32(tmp5);
7042 tcg_temp_free_ptr(ptr1);
7043 neon_store_reg(rd, 0, tmp2);
7044 neon_store_reg(rd, 1, tmp3);
7045 tcg_temp_free_i32(tmp);
7046 } else if ((insn & 0x380) == 0) {
7051 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
7054 if (insn & (1 << 16)) {
7056 element = (insn >> 17) & 7;
7057 } else if (insn & (1 << 17)) {
7059 element = (insn >> 18) & 3;
7062 element = (insn >> 19) & 1;
7064 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
7065 neon_element_offset(rm, element, size),
7066 q ? 16 : 8, q ? 16 : 8);
7075 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
7077 int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
7078 const ARMCPRegInfo *ri;
7080 cpnum = (insn >> 8) & 0xf;
7082 /* First check for coprocessor space used for XScale/iwMMXt insns */
7083 if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
7084 if (extract32(s->c15_cpar, cpnum, 1) == 0) {
7087 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7088 return disas_iwmmxt_insn(s, insn);
7089 } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
7090 return disas_dsp_insn(s, insn);
7095 /* Otherwise treat as a generic register access */
7096 is64 = (insn & (1 << 25)) == 0;
7097 if (!is64 && ((insn & (1 << 4)) == 0)) {
7105 opc1 = (insn >> 4) & 0xf;
7107 rt2 = (insn >> 16) & 0xf;
7109 crn = (insn >> 16) & 0xf;
7110 opc1 = (insn >> 21) & 7;
7111 opc2 = (insn >> 5) & 7;
7114 isread = (insn >> 20) & 1;
7115 rt = (insn >> 12) & 0xf;
7117 ri = get_arm_cp_reginfo(s->cp_regs,
7118 ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
7122 /* Check access permissions */
7123 if (!cp_access_ok(s->current_el, ri, isread)) {
7127 if (s->hstr_active || ri->accessfn ||
7128 (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7129 /* Emit code to perform further access permissions checks at
7130 * runtime; this may result in an exception.
7131 * Note that on XScale all cp0..c13 registers do an access check
7132 * call in order to handle c15_cpar.
7135 TCGv_i32 tcg_syn, tcg_isread;
7138 /* Note that since we are an implementation which takes an
7139 * exception on a trapped conditional instruction only if the
7140 * instruction passes its condition code check, we can take
7141 * advantage of the clause in the ARM ARM that allows us to set
7142 * the COND field in the instruction to 0xE in all cases.
7143 * We could fish the actual condition out of the insn (ARM)
7144 * or the condexec bits (Thumb) but it isn't necessary.
7149 syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7152 syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7158 syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7161 syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7166 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7167 * so this can only happen if this is an ARMv7 or earlier CPU,
7168 * in which case the syndrome information won't actually be
7171 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7172 syndrome = syn_uncategorized();
7176 gen_set_condexec(s);
7177 gen_set_pc_im(s, s->pc_curr);
7178 tmpptr = tcg_const_ptr(ri);
7179 tcg_syn = tcg_const_i32(syndrome);
7180 tcg_isread = tcg_const_i32(isread);
7181 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
7183 tcg_temp_free_ptr(tmpptr);
7184 tcg_temp_free_i32(tcg_syn);
7185 tcg_temp_free_i32(tcg_isread);
7186 } else if (ri->type & ARM_CP_RAISES_EXC) {
7188 * The readfn or writefn might raise an exception;
7189 * synchronize the CPU state in case it does.
7191 gen_set_condexec(s);
7192 gen_set_pc_im(s, s->pc_curr);
7195 /* Handle special cases first */
7196 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7203 gen_set_pc_im(s, s->base.pc_next);
7204 s->base.is_jmp = DISAS_WFI;
7210 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7219 if (ri->type & ARM_CP_CONST) {
7220 tmp64 = tcg_const_i64(ri->resetvalue);
7221 } else if (ri->readfn) {
7223 tmp64 = tcg_temp_new_i64();
7224 tmpptr = tcg_const_ptr(ri);
7225 gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7226 tcg_temp_free_ptr(tmpptr);
7228 tmp64 = tcg_temp_new_i64();
7229 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7231 tmp = tcg_temp_new_i32();
7232 tcg_gen_extrl_i64_i32(tmp, tmp64);
7233 store_reg(s, rt, tmp);
7234 tmp = tcg_temp_new_i32();
7235 tcg_gen_extrh_i64_i32(tmp, tmp64);
7236 tcg_temp_free_i64(tmp64);
7237 store_reg(s, rt2, tmp);
7240 if (ri->type & ARM_CP_CONST) {
7241 tmp = tcg_const_i32(ri->resetvalue);
7242 } else if (ri->readfn) {
7244 tmp = tcg_temp_new_i32();
7245 tmpptr = tcg_const_ptr(ri);
7246 gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7247 tcg_temp_free_ptr(tmpptr);
7249 tmp = load_cpu_offset(ri->fieldoffset);
7252 /* Destination register of r15 for 32 bit loads sets
7253 * the condition codes from the high 4 bits of the value
7256 tcg_temp_free_i32(tmp);
7258 store_reg(s, rt, tmp);
7263 if (ri->type & ARM_CP_CONST) {
7264 /* If not forbidden by access permissions, treat as WI */
7269 TCGv_i32 tmplo, tmphi;
7270 TCGv_i64 tmp64 = tcg_temp_new_i64();
7271 tmplo = load_reg(s, rt);
7272 tmphi = load_reg(s, rt2);
7273 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7274 tcg_temp_free_i32(tmplo);
7275 tcg_temp_free_i32(tmphi);
7277 TCGv_ptr tmpptr = tcg_const_ptr(ri);
7278 gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7279 tcg_temp_free_ptr(tmpptr);
7281 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7283 tcg_temp_free_i64(tmp64);
7288 tmp = load_reg(s, rt);
7289 tmpptr = tcg_const_ptr(ri);
7290 gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7291 tcg_temp_free_ptr(tmpptr);
7292 tcg_temp_free_i32(tmp);
7294 TCGv_i32 tmp = load_reg(s, rt);
7295 store_cpu_offset(tmp, ri->fieldoffset);
7300 /* I/O operations must end the TB here (whether read or write) */
7301 need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
7302 (ri->type & ARM_CP_IO));
7304 if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7306 * A write to any coprocessor register that ends a TB
7307 * must rebuild the hflags for the next TB.
7309 TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
7310 if (arm_dc_feature(s, ARM_FEATURE_M)) {
7311 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
7313 if (ri->type & ARM_CP_NEWEL) {
7314 gen_helper_rebuild_hflags_a32_newel(cpu_env);
7316 gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
7319 tcg_temp_free_i32(tcg_el);
7321 * We default to ending the TB on a coprocessor register write,
7322 * but allow this to be suppressed by the register definition
7323 * (usually only necessary to work around guest bugs).
7325 need_exit_tb = true;
7334 /* Unknown register; this might be a guest error or a QEMU
7335 * unimplemented feature.
7338 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7339 "64 bit system register cp:%d opc1: %d crm:%d "
7341 isread ? "read" : "write", cpnum, opc1, crm,
7342 s->ns ? "non-secure" : "secure");
7344 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7345 "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7347 isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7348 s->ns ? "non-secure" : "secure");
7355 /* Store a 64-bit value to a register pair. Clobbers val. */
7356 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7359 tmp = tcg_temp_new_i32();
7360 tcg_gen_extrl_i64_i32(tmp, val);
7361 store_reg(s, rlow, tmp);
7362 tmp = tcg_temp_new_i32();
7363 tcg_gen_extrh_i64_i32(tmp, val);
7364 store_reg(s, rhigh, tmp);
7367 /* load and add a 64-bit value from a register pair. */
7368 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7374 /* Load 64-bit value rd:rn. */
7375 tmpl = load_reg(s, rlow);
7376 tmph = load_reg(s, rhigh);
7377 tmp = tcg_temp_new_i64();
7378 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7379 tcg_temp_free_i32(tmpl);
7380 tcg_temp_free_i32(tmph);
7381 tcg_gen_add_i64(val, val, tmp);
7382 tcg_temp_free_i64(tmp);
7385 /* Set N and Z flags from hi|lo. */
7386 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7388 tcg_gen_mov_i32(cpu_NF, hi);
7389 tcg_gen_or_i32(cpu_ZF, lo, hi);
7392 /* Load/Store exclusive instructions are implemented by remembering
7393 the value/address loaded, and seeing if these are the same
7394 when the store is performed. This should be sufficient to implement
7395 the architecturally mandated semantics, and avoids having to monitor
7396 regular stores. The compare vs the remembered value is done during
7397 the cmpxchg operation, but we must compare the addresses manually. */
7398 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7399 TCGv_i32 addr, int size)
7401 TCGv_i32 tmp = tcg_temp_new_i32();
7402 MemOp opc = size | MO_ALIGN | s->be_data;
7407 TCGv_i32 tmp2 = tcg_temp_new_i32();
7408 TCGv_i64 t64 = tcg_temp_new_i64();
7410 /* For AArch32, architecturally the 32-bit word at the lowest
7411 * address is always Rt and the one at addr+4 is Rt2, even if
7412 * the CPU is big-endian. That means we don't want to do a
7413 * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7414 * for an architecturally 64-bit access, but instead do a
7415 * 64-bit access using MO_BE if appropriate and then split
7417 * This only makes a difference for BE32 user-mode, where
7418 * frob64() must not flip the two halves of the 64-bit data
7419 * but this code must treat BE32 user-mode like BE32 system.
7421 TCGv taddr = gen_aa32_addr(s, addr, opc);
7423 tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7424 tcg_temp_free(taddr);
7425 tcg_gen_mov_i64(cpu_exclusive_val, t64);
7426 if (s->be_data == MO_BE) {
7427 tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7429 tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7431 tcg_temp_free_i64(t64);
7433 store_reg(s, rt2, tmp2);
7435 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7436 tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7439 store_reg(s, rt, tmp);
7440 tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7443 static void gen_clrex(DisasContext *s)
7445 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7448 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7449 TCGv_i32 addr, int size)
7451 TCGv_i32 t0, t1, t2;
7454 TCGLabel *done_label;
7455 TCGLabel *fail_label;
7456 MemOp opc = size | MO_ALIGN | s->be_data;
7458 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7464 fail_label = gen_new_label();
7465 done_label = gen_new_label();
7466 extaddr = tcg_temp_new_i64();
7467 tcg_gen_extu_i32_i64(extaddr, addr);
7468 tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7469 tcg_temp_free_i64(extaddr);
7471 taddr = gen_aa32_addr(s, addr, opc);
7472 t0 = tcg_temp_new_i32();
7473 t1 = load_reg(s, rt);
7475 TCGv_i64 o64 = tcg_temp_new_i64();
7476 TCGv_i64 n64 = tcg_temp_new_i64();
7478 t2 = load_reg(s, rt2);
7479 /* For AArch32, architecturally the 32-bit word at the lowest
7480 * address is always Rt and the one at addr+4 is Rt2, even if
7481 * the CPU is big-endian. Since we're going to treat this as a
7482 * single 64-bit BE store, we need to put the two halves in the
7483 * opposite order for BE to LE, so that they end up in the right
7485 * We don't want gen_aa32_frob64() because that does the wrong
7486 * thing for BE32 usermode.
7488 if (s->be_data == MO_BE) {
7489 tcg_gen_concat_i32_i64(n64, t2, t1);
7491 tcg_gen_concat_i32_i64(n64, t1, t2);
7493 tcg_temp_free_i32(t2);
7495 tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7496 get_mem_index(s), opc);
7497 tcg_temp_free_i64(n64);
7499 tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7500 tcg_gen_extrl_i64_i32(t0, o64);
7502 tcg_temp_free_i64(o64);
7504 t2 = tcg_temp_new_i32();
7505 tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7506 tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7507 tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7508 tcg_temp_free_i32(t2);
7510 tcg_temp_free_i32(t1);
7511 tcg_temp_free(taddr);
7512 tcg_gen_mov_i32(cpu_R[rd], t0);
7513 tcg_temp_free_i32(t0);
7514 tcg_gen_br(done_label);
7516 gen_set_label(fail_label);
7517 tcg_gen_movi_i32(cpu_R[rd], 1);
7518 gen_set_label(done_label);
7519 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7525 * @mode: mode field from insn (which stack to store to)
7526 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7527 * @writeback: true if writeback bit set
7529 * Generate code for the SRS (Store Return State) insn.
7531 static void gen_srs(DisasContext *s,
7532 uint32_t mode, uint32_t amode, bool writeback)
7539 * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7540 * and specified mode is monitor mode
7541 * - UNDEFINED in Hyp mode
7542 * - UNPREDICTABLE in User or System mode
7543 * - UNPREDICTABLE if the specified mode is:
7544 * -- not implemented
7545 * -- not a valid mode number
7546 * -- a mode that's at a higher exception level
7547 * -- Monitor, if we are Non-secure
7548 * For the UNPREDICTABLE cases we choose to UNDEF.
7550 if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7551 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
7555 if (s->current_el == 0 || s->current_el == 2) {
7560 case ARM_CPU_MODE_USR:
7561 case ARM_CPU_MODE_FIQ:
7562 case ARM_CPU_MODE_IRQ:
7563 case ARM_CPU_MODE_SVC:
7564 case ARM_CPU_MODE_ABT:
7565 case ARM_CPU_MODE_UND:
7566 case ARM_CPU_MODE_SYS:
7568 case ARM_CPU_MODE_HYP:
7569 if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7573 case ARM_CPU_MODE_MON:
7574 /* No need to check specifically for "are we non-secure" because
7575 * we've already made EL0 UNDEF and handled the trap for S-EL1;
7576 * so if this isn't EL3 then we must be non-secure.
7578 if (s->current_el != 3) {
7587 unallocated_encoding(s);
7591 addr = tcg_temp_new_i32();
7592 tmp = tcg_const_i32(mode);
7593 /* get_r13_banked() will raise an exception if called from System mode */
7594 gen_set_condexec(s);
7595 gen_set_pc_im(s, s->pc_curr);
7596 gen_helper_get_r13_banked(addr, cpu_env, tmp);
7597 tcg_temp_free_i32(tmp);
7614 tcg_gen_addi_i32(addr, addr, offset);
7615 tmp = load_reg(s, 14);
7616 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7617 tcg_temp_free_i32(tmp);
7618 tmp = load_cpu_field(spsr);
7619 tcg_gen_addi_i32(addr, addr, 4);
7620 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7621 tcg_temp_free_i32(tmp);
7639 tcg_gen_addi_i32(addr, addr, offset);
7640 tmp = tcg_const_i32(mode);
7641 gen_helper_set_r13_banked(cpu_env, tmp, addr);
7642 tcg_temp_free_i32(tmp);
7644 tcg_temp_free_i32(addr);
7645 s->base.is_jmp = DISAS_UPDATE;
7648 /* Generate a label used for skipping this instruction */
7649 static void arm_gen_condlabel(DisasContext *s)
7652 s->condlabel = gen_new_label();
7657 /* Skip this instruction if the ARM condition is false */
7658 static void arm_skip_unless(DisasContext *s, uint32_t cond)
7660 arm_gen_condlabel(s);
7661 arm_gen_test_cc(cond ^ 1, s->condlabel);
7666 * Constant expanders for the decoders.
7669 static int negate(DisasContext *s, int x)
7674 static int plus_2(DisasContext *s, int x)
7679 static int times_2(DisasContext *s, int x)
7684 static int times_4(DisasContext *s, int x)
7689 /* Return only the rotation part of T32ExpandImm. */
7690 static int t32_expandimm_rot(DisasContext *s, int x)
7692 return x & 0xc00 ? extract32(x, 7, 5) : 0;
7695 /* Return the unrotated immediate from T32ExpandImm. */
7696 static int t32_expandimm_imm(DisasContext *s, int x)
7698 int imm = extract32(x, 0, 8);
7700 switch (extract32(x, 8, 4)) {
7702 /* Nothing to do. */
7704 case 1: /* 00XY00XY */
7707 case 2: /* XY00XY00 */
7710 case 3: /* XYXYXYXY */
7714 /* Rotated constant. */
7721 static int t32_branch24(DisasContext *s, int x)
7723 /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S. */
7724 x ^= !(x < 0) * (3 << 21);
7725 /* Append the final zero. */
7729 static int t16_setflags(DisasContext *s)
7731 return s->condexec_mask == 0;
7734 static int t16_push_list(DisasContext *s, int x)
7736 return (x & 0xff) | (x & 0x100) << (14 - 8);
7739 static int t16_pop_list(DisasContext *s, int x)
7741 return (x & 0xff) | (x & 0x100) << (15 - 8);
7745 * Include the generated decoders.
7748 #include "decode-a32.inc.c"
7749 #include "decode-a32-uncond.inc.c"
7750 #include "decode-t32.inc.c"
7751 #include "decode-t16.inc.c"
7753 /* Helpers to swap operands for reverse-subtract. */
7754 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7756 tcg_gen_sub_i32(dst, b, a);
7759 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7761 gen_sub_CC(dst, b, a);
7764 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7766 gen_sub_carry(dest, b, a);
7769 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7771 gen_sbc_CC(dest, b, a);
7775 * Helpers for the data processing routines.
7777 * After the computation store the results back.
7778 * This may be suppressed altogether (STREG_NONE), require a runtime
7779 * check against the stack limits (STREG_SP_CHECK), or generate an
7780 * exception return. Oh, or store into a register.
7782 * Always return true, indicating success for a trans_* function.
7791 static bool store_reg_kind(DisasContext *s, int rd,
7792 TCGv_i32 val, StoreRegKind kind)
7796 tcg_temp_free_i32(val);
7799 /* See ALUWritePC: Interworking only from a32 mode. */
7801 store_reg(s, rd, val);
7803 store_reg_bx(s, rd, val);
7806 case STREG_SP_CHECK:
7807 store_sp_checked(s, val);
7810 gen_exception_return(s, val);
7813 g_assert_not_reached();
7817 * Data Processing (register)
7819 * Operate, with set flags, one register source,
7820 * one immediate shifted register source, and a destination.
7822 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
7823 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7824 int logic_cc, StoreRegKind kind)
7826 TCGv_i32 tmp1, tmp2;
7828 tmp2 = load_reg(s, a->rm);
7829 gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7830 tmp1 = load_reg(s, a->rn);
7832 gen(tmp1, tmp1, tmp2);
7833 tcg_temp_free_i32(tmp2);
7838 return store_reg_kind(s, a->rd, tmp1, kind);
7841 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
7842 void (*gen)(TCGv_i32, TCGv_i32),
7843 int logic_cc, StoreRegKind kind)
7847 tmp = load_reg(s, a->rm);
7848 gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7854 return store_reg_kind(s, a->rd, tmp, kind);
7858 * Data-processing (register-shifted register)
7860 * Operate, with set flags, one register source,
7861 * one register shifted register source, and a destination.
7863 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
7864 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7865 int logic_cc, StoreRegKind kind)
7867 TCGv_i32 tmp1, tmp2;
7869 tmp1 = load_reg(s, a->rs);
7870 tmp2 = load_reg(s, a->rm);
7871 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7872 tmp1 = load_reg(s, a->rn);
7874 gen(tmp1, tmp1, tmp2);
7875 tcg_temp_free_i32(tmp2);
7880 return store_reg_kind(s, a->rd, tmp1, kind);
7883 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
7884 void (*gen)(TCGv_i32, TCGv_i32),
7885 int logic_cc, StoreRegKind kind)
7887 TCGv_i32 tmp1, tmp2;
7889 tmp1 = load_reg(s, a->rs);
7890 tmp2 = load_reg(s, a->rm);
7891 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7897 return store_reg_kind(s, a->rd, tmp2, kind);
7901 * Data-processing (immediate)
7903 * Operate, with set flags, one register source,
7904 * one rotated immediate, and a destination.
7906 * Note that logic_cc && a->rot setting CF based on the msb of the
7907 * immediate is the reason why we must pass in the unrotated form
7910 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
7911 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7912 int logic_cc, StoreRegKind kind)
7914 TCGv_i32 tmp1, tmp2;
7917 imm = ror32(a->imm, a->rot);
7918 if (logic_cc && a->rot) {
7919 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7921 tmp2 = tcg_const_i32(imm);
7922 tmp1 = load_reg(s, a->rn);
7924 gen(tmp1, tmp1, tmp2);
7925 tcg_temp_free_i32(tmp2);
7930 return store_reg_kind(s, a->rd, tmp1, kind);
7933 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
7934 void (*gen)(TCGv_i32, TCGv_i32),
7935 int logic_cc, StoreRegKind kind)
7940 imm = ror32(a->imm, a->rot);
7941 if (logic_cc && a->rot) {
7942 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7944 tmp = tcg_const_i32(imm);
7950 return store_reg_kind(s, a->rd, tmp, kind);
7953 #define DO_ANY3(NAME, OP, L, K) \
7954 static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a) \
7955 { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); } \
7956 static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a) \
7957 { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); } \
7958 static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a) \
7959 { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7961 #define DO_ANY2(NAME, OP, L, K) \
7962 static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a) \
7963 { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); } \
7964 static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a) \
7965 { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); } \
7966 static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a) \
7967 { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7969 #define DO_CMP2(NAME, OP, L) \
7970 static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a) \
7971 { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); } \
7972 static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a) \
7973 { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); } \
7974 static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a) \
7975 { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7977 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7978 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7979 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7980 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7982 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7983 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7984 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7985 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7987 DO_CMP2(TST, tcg_gen_and_i32, true)
7988 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7989 DO_CMP2(CMN, gen_add_CC, false)
7990 DO_CMP2(CMP, gen_sub_CC, false)
7992 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7993 a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7996 * Note for the computation of StoreRegKind we return out of the
7997 * middle of the functions that are expanded by DO_ANY3, and that
7998 * we modify a->s via that parameter before it is used by OP.
8000 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
8002 StoreRegKind ret = STREG_NORMAL;
8003 if (a->rd == 15 && a->s) {
8005 * See ALUExceptionReturn:
8006 * In User mode, UNPREDICTABLE; we choose UNDEF.
8007 * In Hyp mode, UNDEFINED.
8009 if (IS_USER(s) || s->current_el == 2) {
8010 unallocated_encoding(s);
8013 /* There is no writeback of nzcv to PSTATE. */
8015 ret = STREG_EXC_RET;
8016 } else if (a->rd == 13 && a->rn == 13) {
8017 ret = STREG_SP_CHECK;
8022 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
8024 StoreRegKind ret = STREG_NORMAL;
8025 if (a->rd == 15 && a->s) {
8027 * See ALUExceptionReturn:
8028 * In User mode, UNPREDICTABLE; we choose UNDEF.
8029 * In Hyp mode, UNDEFINED.
8031 if (IS_USER(s) || s->current_el == 2) {
8032 unallocated_encoding(s);
8035 /* There is no writeback of nzcv to PSTATE. */
8037 ret = STREG_EXC_RET;
8038 } else if (a->rd == 13) {
8039 ret = STREG_SP_CHECK;
8044 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
8047 * ORN is only available with T32, so there is no register-shifted-register
8048 * form of the insn. Using the DO_ANY3 macro would create an unused function.
8050 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
8052 return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
8055 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
8057 return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
8064 static bool trans_ADR(DisasContext *s, arg_ri *a)
8066 store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
8070 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
8074 if (!ENABLE_ARCH_6T2) {
8078 tmp = tcg_const_i32(a->imm);
8079 store_reg(s, a->rd, tmp);
8083 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
8087 if (!ENABLE_ARCH_6T2) {
8091 tmp = load_reg(s, a->rd);
8092 tcg_gen_ext16u_i32(tmp, tmp);
8093 tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
8094 store_reg(s, a->rd, tmp);
8099 * Multiply and multiply accumulate
8102 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
8106 t1 = load_reg(s, a->rn);
8107 t2 = load_reg(s, a->rm);
8108 tcg_gen_mul_i32(t1, t1, t2);
8109 tcg_temp_free_i32(t2);
8111 t2 = load_reg(s, a->ra);
8112 tcg_gen_add_i32(t1, t1, t2);
8113 tcg_temp_free_i32(t2);
8118 store_reg(s, a->rd, t1);
8122 static bool trans_MUL(DisasContext *s, arg_MUL *a)
8124 return op_mla(s, a, false);
8127 static bool trans_MLA(DisasContext *s, arg_MLA *a)
8129 return op_mla(s, a, true);
8132 static bool trans_MLS(DisasContext *s, arg_MLS *a)
8136 if (!ENABLE_ARCH_6T2) {
8139 t1 = load_reg(s, a->rn);
8140 t2 = load_reg(s, a->rm);
8141 tcg_gen_mul_i32(t1, t1, t2);
8142 tcg_temp_free_i32(t2);
8143 t2 = load_reg(s, a->ra);
8144 tcg_gen_sub_i32(t1, t2, t1);
8145 tcg_temp_free_i32(t2);
8146 store_reg(s, a->rd, t1);
8150 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
8152 TCGv_i32 t0, t1, t2, t3;
8154 t0 = load_reg(s, a->rm);
8155 t1 = load_reg(s, a->rn);
8157 tcg_gen_mulu2_i32(t0, t1, t0, t1);
8159 tcg_gen_muls2_i32(t0, t1, t0, t1);
8162 t2 = load_reg(s, a->ra);
8163 t3 = load_reg(s, a->rd);
8164 tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
8165 tcg_temp_free_i32(t2);
8166 tcg_temp_free_i32(t3);
8169 gen_logicq_cc(t0, t1);
8171 store_reg(s, a->ra, t0);
8172 store_reg(s, a->rd, t1);
8176 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
8178 return op_mlal(s, a, true, false);
8181 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
8183 return op_mlal(s, a, false, false);
8186 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
8188 return op_mlal(s, a, true, true);
8191 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
8193 return op_mlal(s, a, false, true);
8196 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
8198 TCGv_i32 t0, t1, t2, zero;
8201 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8206 t0 = load_reg(s, a->rm);
8207 t1 = load_reg(s, a->rn);
8208 tcg_gen_mulu2_i32(t0, t1, t0, t1);
8209 zero = tcg_const_i32(0);
8210 t2 = load_reg(s, a->ra);
8211 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8212 tcg_temp_free_i32(t2);
8213 t2 = load_reg(s, a->rd);
8214 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
8215 tcg_temp_free_i32(t2);
8216 tcg_temp_free_i32(zero);
8217 store_reg(s, a->ra, t0);
8218 store_reg(s, a->rd, t1);
8223 * Saturating addition and subtraction
8226 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
8231 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8232 : !ENABLE_ARCH_5TE) {
8236 t0 = load_reg(s, a->rm);
8237 t1 = load_reg(s, a->rn);
8239 gen_helper_add_saturate(t1, cpu_env, t1, t1);
8242 gen_helper_add_saturate(t0, cpu_env, t0, t1);
8244 gen_helper_sub_saturate(t0, cpu_env, t0, t1);
8246 tcg_temp_free_i32(t1);
8247 store_reg(s, a->rd, t0);
8251 #define DO_QADDSUB(NAME, ADD, DOUB) \
8252 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8254 return op_qaddsub(s, a, ADD, DOUB); \
8257 DO_QADDSUB(QADD, true, false)
8258 DO_QADDSUB(QSUB, false, false)
8259 DO_QADDSUB(QDADD, true, true)
8260 DO_QADDSUB(QDSUB, false, true)
8265 * Halfword multiply and multiply accumulate
8268 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
8269 int add_long, bool nt, bool mt)
8271 TCGv_i32 t0, t1, tl, th;
8274 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8275 : !ENABLE_ARCH_5TE) {
8279 t0 = load_reg(s, a->rn);
8280 t1 = load_reg(s, a->rm);
8281 gen_mulxy(t0, t1, nt, mt);
8282 tcg_temp_free_i32(t1);
8286 store_reg(s, a->rd, t0);
8289 t1 = load_reg(s, a->ra);
8290 gen_helper_add_setq(t0, cpu_env, t0, t1);
8291 tcg_temp_free_i32(t1);
8292 store_reg(s, a->rd, t0);
8295 tl = load_reg(s, a->ra);
8296 th = load_reg(s, a->rd);
8297 /* Sign-extend the 32-bit product to 64 bits. */
8298 t1 = tcg_temp_new_i32();
8299 tcg_gen_sari_i32(t1, t0, 31);
8300 tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
8301 tcg_temp_free_i32(t0);
8302 tcg_temp_free_i32(t1);
8303 store_reg(s, a->ra, tl);
8304 store_reg(s, a->rd, th);
8307 g_assert_not_reached();
8312 #define DO_SMLAX(NAME, add, nt, mt) \
8313 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
8315 return op_smlaxxx(s, a, add, nt, mt); \
8318 DO_SMLAX(SMULBB, 0, 0, 0)
8319 DO_SMLAX(SMULBT, 0, 0, 1)
8320 DO_SMLAX(SMULTB, 0, 1, 0)
8321 DO_SMLAX(SMULTT, 0, 1, 1)
8323 DO_SMLAX(SMLABB, 1, 0, 0)
8324 DO_SMLAX(SMLABT, 1, 0, 1)
8325 DO_SMLAX(SMLATB, 1, 1, 0)
8326 DO_SMLAX(SMLATT, 1, 1, 1)
8328 DO_SMLAX(SMLALBB, 2, 0, 0)
8329 DO_SMLAX(SMLALBT, 2, 0, 1)
8330 DO_SMLAX(SMLALTB, 2, 1, 0)
8331 DO_SMLAX(SMLALTT, 2, 1, 1)
8335 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
8339 if (!ENABLE_ARCH_5TE) {
8343 t0 = load_reg(s, a->rn);
8344 t1 = load_reg(s, a->rm);
8346 * Since the nominal result is product<47:16>, shift the 16-bit
8347 * input up by 16 bits, so that the result is at product<63:32>.
8350 tcg_gen_andi_i32(t1, t1, 0xffff0000);
8352 tcg_gen_shli_i32(t1, t1, 16);
8354 tcg_gen_muls2_i32(t0, t1, t0, t1);
8355 tcg_temp_free_i32(t0);
8357 t0 = load_reg(s, a->ra);
8358 gen_helper_add_setq(t1, cpu_env, t1, t0);
8359 tcg_temp_free_i32(t0);
8361 store_reg(s, a->rd, t1);
8365 #define DO_SMLAWX(NAME, add, mt) \
8366 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
8368 return op_smlawx(s, a, add, mt); \
8371 DO_SMLAWX(SMULWB, 0, 0)
8372 DO_SMLAWX(SMULWT, 0, 1)
8373 DO_SMLAWX(SMLAWB, 1, 0)
8374 DO_SMLAWX(SMLAWT, 1, 1)
8379 * MSR (immediate) and hints
8382 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
8385 * When running single-threaded TCG code, use the helper to ensure that
8386 * the next round-robin scheduled vCPU gets a crack. When running in
8387 * MTTCG we don't generate jumps to the helper as it won't affect the
8388 * scheduling of other vCPUs.
8390 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8391 gen_set_pc_im(s, s->base.pc_next);
8392 s->base.is_jmp = DISAS_YIELD;
8397 static bool trans_WFE(DisasContext *s, arg_WFE *a)
8400 * When running single-threaded TCG code, use the helper to ensure that
8401 * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
8402 * just skip this instruction. Currently the SEV/SEVL instructions,
8403 * which are *one* of many ways to wake the CPU from WFE, are not
8404 * implemented so we can't sleep like WFI does.
8406 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8407 gen_set_pc_im(s, s->base.pc_next);
8408 s->base.is_jmp = DISAS_WFE;
8413 static bool trans_WFI(DisasContext *s, arg_WFI *a)
8415 /* For WFI, halt the vCPU until an IRQ. */
8416 gen_set_pc_im(s, s->base.pc_next);
8417 s->base.is_jmp = DISAS_WFI;
8421 static bool trans_NOP(DisasContext *s, arg_NOP *a)
8426 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
8428 uint32_t val = ror32(a->imm, a->rot * 2);
8429 uint32_t mask = msr_mask(s, a->mask, a->r);
8431 if (gen_set_psr_im(s, mask, a->r, val)) {
8432 unallocated_encoding(s);
8438 * Cyclic Redundancy Check
8441 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
8443 TCGv_i32 t1, t2, t3;
8445 if (!dc_isar_feature(aa32_crc32, s)) {
8449 t1 = load_reg(s, a->rn);
8450 t2 = load_reg(s, a->rm);
8461 g_assert_not_reached();
8463 t3 = tcg_const_i32(1 << sz);
8465 gen_helper_crc32c(t1, t1, t2, t3);
8467 gen_helper_crc32(t1, t1, t2, t3);
8469 tcg_temp_free_i32(t2);
8470 tcg_temp_free_i32(t3);
8471 store_reg(s, a->rd, t1);
8475 #define DO_CRC32(NAME, c, sz) \
8476 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8477 { return op_crc32(s, a, c, sz); }
8479 DO_CRC32(CRC32B, false, MO_8)
8480 DO_CRC32(CRC32H, false, MO_16)
8481 DO_CRC32(CRC32W, false, MO_32)
8482 DO_CRC32(CRC32CB, true, MO_8)
8483 DO_CRC32(CRC32CH, true, MO_16)
8484 DO_CRC32(CRC32CW, true, MO_32)
8489 * Miscellaneous instructions
8492 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
8494 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8497 gen_mrs_banked(s, a->r, a->sysm, a->rd);
8501 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
8503 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8506 gen_msr_banked(s, a->r, a->sysm, a->rn);
8510 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
8514 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8519 unallocated_encoding(s);
8522 tmp = load_cpu_field(spsr);
8524 tmp = tcg_temp_new_i32();
8525 gen_helper_cpsr_read(tmp, cpu_env);
8527 store_reg(s, a->rd, tmp);
8531 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
8534 uint32_t mask = msr_mask(s, a->mask, a->r);
8536 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8539 tmp = load_reg(s, a->rn);
8540 if (gen_set_psr(s, mask, a->r, tmp)) {
8541 unallocated_encoding(s);
8546 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
8550 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8553 tmp = tcg_const_i32(a->sysm);
8554 gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8555 store_reg(s, a->rd, tmp);
8559 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
8563 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8566 addr = tcg_const_i32((a->mask << 10) | a->sysm);
8567 reg = load_reg(s, a->rn);
8568 gen_helper_v7m_msr(cpu_env, addr, reg);
8569 tcg_temp_free_i32(addr);
8570 tcg_temp_free_i32(reg);
8571 /* If we wrote to CONTROL, the EL might have changed */
8572 gen_helper_rebuild_hflags_m32_newel(cpu_env);
8577 static bool trans_BX(DisasContext *s, arg_BX *a)
8579 if (!ENABLE_ARCH_4T) {
8582 gen_bx_excret(s, load_reg(s, a->rm));
8586 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
8588 if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
8591 /* Trivial implementation equivalent to bx. */
8592 gen_bx(s, load_reg(s, a->rm));
8596 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
8600 if (!ENABLE_ARCH_5) {
8603 tmp = load_reg(s, a->rm);
8604 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8610 * BXNS/BLXNS: only exist for v8M with the security extensions,
8611 * and always UNDEF if NonSecure. We don't implement these in
8612 * the user-only mode either (in theory you can use them from
8613 * Secure User mode but they are too tied in to system emulation).
8615 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
8617 if (!s->v8m_secure || IS_USER_ONLY) {
8618 unallocated_encoding(s);
8625 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
8627 if (!s->v8m_secure || IS_USER_ONLY) {
8628 unallocated_encoding(s);
8630 gen_blxns(s, a->rm);
8635 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
8639 if (!ENABLE_ARCH_5) {
8642 tmp = load_reg(s, a->rm);
8643 tcg_gen_clzi_i32(tmp, tmp, 32);
8644 store_reg(s, a->rd, tmp);
8648 static bool trans_ERET(DisasContext *s, arg_ERET *a)
8652 if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8656 unallocated_encoding(s);
8659 if (s->current_el == 2) {
8660 /* ERET from Hyp uses ELR_Hyp, not LR */
8661 tmp = load_cpu_field(elr_el[2]);
8663 tmp = load_reg(s, 14);
8665 gen_exception_return(s, tmp);
8669 static bool trans_HLT(DisasContext *s, arg_HLT *a)
8675 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
8677 if (!ENABLE_ARCH_5) {
8680 if (arm_dc_feature(s, ARM_FEATURE_M) &&
8681 semihosting_enabled() &&
8682 #ifndef CONFIG_USER_ONLY
8686 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8688 gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8693 static bool trans_HVC(DisasContext *s, arg_HVC *a)
8695 if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
8699 unallocated_encoding(s);
8706 static bool trans_SMC(DisasContext *s, arg_SMC *a)
8708 if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
8712 unallocated_encoding(s);
8719 static bool trans_SG(DisasContext *s, arg_SG *a)
8721 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8722 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8727 * The bulk of the behaviour for this instruction is implemented
8728 * in v7m_handle_execute_nsc(), which deals with the insn when
8729 * it is executed by a CPU in non-secure state from memory
8730 * which is Secure & NonSecure-Callable.
8731 * Here we only need to handle the remaining cases:
8732 * * in NS memory (including the "security extension not
8733 * implemented" case) : NOP
8734 * * in S memory but CPU already secure (clear IT bits)
8735 * We know that the attribute for the memory this insn is
8736 * in must match the current CPU state, because otherwise
8737 * get_phys_addr_pmsav8 would have generated an exception.
8739 if (s->v8m_secure) {
8740 /* Like the IT insn, we don't need to generate any code */
8741 s->condexec_cond = 0;
8742 s->condexec_mask = 0;
8747 static bool trans_TT(DisasContext *s, arg_TT *a)
8751 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8752 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8755 if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
8756 /* We UNDEF for these UNPREDICTABLE cases */
8757 unallocated_encoding(s);
8760 if (a->A && !s->v8m_secure) {
8761 /* This case is UNDEFINED. */
8762 unallocated_encoding(s);
8766 addr = load_reg(s, a->rn);
8767 tmp = tcg_const_i32((a->A << 1) | a->T);
8768 gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8769 tcg_temp_free_i32(addr);
8770 store_reg(s, a->rd, tmp);
8775 * Load/store register index
8778 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
8782 /* ISS not valid if writeback */
8785 if (s->base.pc_next - s->pc_curr == 2) {
8794 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
8796 TCGv_i32 addr = load_reg(s, a->rn);
8798 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8799 gen_helper_v8m_stackcheck(cpu_env, addr);
8803 TCGv_i32 ofs = load_reg(s, a->rm);
8804 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8806 tcg_gen_add_i32(addr, addr, ofs);
8808 tcg_gen_sub_i32(addr, addr, ofs);
8810 tcg_temp_free_i32(ofs);
8815 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
8816 TCGv_i32 addr, int address_offset)
8819 TCGv_i32 ofs = load_reg(s, a->rm);
8820 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8822 tcg_gen_add_i32(addr, addr, ofs);
8824 tcg_gen_sub_i32(addr, addr, ofs);
8826 tcg_temp_free_i32(ofs);
8828 tcg_temp_free_i32(addr);
8831 tcg_gen_addi_i32(addr, addr, address_offset);
8832 store_reg(s, a->rn, addr);
8835 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
8836 MemOp mop, int mem_idx)
8838 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8841 addr = op_addr_rr_pre(s, a);
8843 tmp = tcg_temp_new_i32();
8844 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8845 disas_set_da_iss(s, mop, issinfo);
8848 * Perform base writeback before the loaded value to
8849 * ensure correct behavior with overlapping index registers.
8851 op_addr_rr_post(s, a, addr, 0);
8852 store_reg_from_load(s, a->rt, tmp);
8856 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
8857 MemOp mop, int mem_idx)
8859 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8862 addr = op_addr_rr_pre(s, a);
8864 tmp = load_reg(s, a->rt);
8865 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8866 disas_set_da_iss(s, mop, issinfo);
8867 tcg_temp_free_i32(tmp);
8869 op_addr_rr_post(s, a, addr, 0);
8873 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
8875 int mem_idx = get_mem_index(s);
8878 if (!ENABLE_ARCH_5TE) {
8882 unallocated_encoding(s);
8885 addr = op_addr_rr_pre(s, a);
8887 tmp = tcg_temp_new_i32();
8888 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8889 store_reg(s, a->rt, tmp);
8891 tcg_gen_addi_i32(addr, addr, 4);
8893 tmp = tcg_temp_new_i32();
8894 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8895 store_reg(s, a->rt + 1, tmp);
8897 /* LDRD w/ base writeback is undefined if the registers overlap. */
8898 op_addr_rr_post(s, a, addr, -4);
8902 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
8904 int mem_idx = get_mem_index(s);
8907 if (!ENABLE_ARCH_5TE) {
8911 unallocated_encoding(s);
8914 addr = op_addr_rr_pre(s, a);
8916 tmp = load_reg(s, a->rt);
8917 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8918 tcg_temp_free_i32(tmp);
8920 tcg_gen_addi_i32(addr, addr, 4);
8922 tmp = load_reg(s, a->rt + 1);
8923 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8924 tcg_temp_free_i32(tmp);
8926 op_addr_rr_post(s, a, addr, -4);
8931 * Load/store immediate index
8934 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8942 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8944 * Stackcheck. Here we know 'addr' is the current SP;
8945 * U is set if we're moving SP up, else down. It is
8946 * UNKNOWN whether the limit check triggers when SP starts
8947 * below the limit and ends up above it; we chose to do so.
8950 TCGv_i32 newsp = tcg_temp_new_i32();
8951 tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8952 gen_helper_v8m_stackcheck(cpu_env, newsp);
8953 tcg_temp_free_i32(newsp);
8955 gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8959 return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
8962 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
8963 TCGv_i32 addr, int address_offset)
8967 address_offset += a->imm;
8969 address_offset -= a->imm;
8972 tcg_temp_free_i32(addr);
8975 tcg_gen_addi_i32(addr, addr, address_offset);
8976 store_reg(s, a->rn, addr);
8979 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
8980 MemOp mop, int mem_idx)
8982 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8985 addr = op_addr_ri_pre(s, a);
8987 tmp = tcg_temp_new_i32();
8988 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8989 disas_set_da_iss(s, mop, issinfo);
8992 * Perform base writeback before the loaded value to
8993 * ensure correct behavior with overlapping index registers.
8995 op_addr_ri_post(s, a, addr, 0);
8996 store_reg_from_load(s, a->rt, tmp);
9000 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
9001 MemOp mop, int mem_idx)
9003 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
9006 addr = op_addr_ri_pre(s, a);
9008 tmp = load_reg(s, a->rt);
9009 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
9010 disas_set_da_iss(s, mop, issinfo);
9011 tcg_temp_free_i32(tmp);
9013 op_addr_ri_post(s, a, addr, 0);
9017 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
9019 int mem_idx = get_mem_index(s);
9022 addr = op_addr_ri_pre(s, a);
9024 tmp = tcg_temp_new_i32();
9025 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9026 store_reg(s, a->rt, tmp);
9028 tcg_gen_addi_i32(addr, addr, 4);
9030 tmp = tcg_temp_new_i32();
9031 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9032 store_reg(s, rt2, tmp);
9034 /* LDRD w/ base writeback is undefined if the registers overlap. */
9035 op_addr_ri_post(s, a, addr, -4);
9039 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
9041 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
9044 return op_ldrd_ri(s, a, a->rt + 1);
9047 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
9050 .u = a->u, .w = a->w, .p = a->p,
9051 .rn = a->rn, .rt = a->rt, .imm = a->imm
9053 return op_ldrd_ri(s, &b, a->rt2);
9056 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
9058 int mem_idx = get_mem_index(s);
9061 addr = op_addr_ri_pre(s, a);
9063 tmp = load_reg(s, a->rt);
9064 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9065 tcg_temp_free_i32(tmp);
9067 tcg_gen_addi_i32(addr, addr, 4);
9069 tmp = load_reg(s, rt2);
9070 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
9071 tcg_temp_free_i32(tmp);
9073 op_addr_ri_post(s, a, addr, -4);
9077 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
9079 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
9082 return op_strd_ri(s, a, a->rt + 1);
9085 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
9088 .u = a->u, .w = a->w, .p = a->p,
9089 .rn = a->rn, .rt = a->rt, .imm = a->imm
9091 return op_strd_ri(s, &b, a->rt2);
9094 #define DO_LDST(NAME, WHICH, MEMOP) \
9095 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a) \
9097 return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s)); \
9099 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a) \
9101 return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s)); \
9103 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a) \
9105 return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s)); \
9107 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a) \
9109 return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s)); \
9112 DO_LDST(LDR, load, MO_UL)
9113 DO_LDST(LDRB, load, MO_UB)
9114 DO_LDST(LDRH, load, MO_UW)
9115 DO_LDST(LDRSB, load, MO_SB)
9116 DO_LDST(LDRSH, load, MO_SW)
9118 DO_LDST(STR, store, MO_UL)
9119 DO_LDST(STRB, store, MO_UB)
9120 DO_LDST(STRH, store, MO_UW)
9125 * Synchronization primitives
9128 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
9134 addr = load_reg(s, a->rn);
9135 taddr = gen_aa32_addr(s, addr, opc);
9136 tcg_temp_free_i32(addr);
9138 tmp = load_reg(s, a->rt2);
9139 tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
9140 tcg_temp_free(taddr);
9142 store_reg(s, a->rt, tmp);
9146 static bool trans_SWP(DisasContext *s, arg_SWP *a)
9148 return op_swp(s, a, MO_UL | MO_ALIGN);
9151 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
9153 return op_swp(s, a, MO_UB);
9157 * Load/Store Exclusive and Load-Acquire/Store-Release
9160 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
9163 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9164 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9166 /* We UNDEF for these UNPREDICTABLE cases. */
9167 if (a->rd == 15 || a->rn == 15 || a->rt == 15
9168 || a->rd == a->rn || a->rd == a->rt
9169 || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
9173 || (!v8a && s->thumb && a->rt2 == 13)))) {
9174 unallocated_encoding(s);
9179 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9182 addr = tcg_temp_local_new_i32();
9183 load_reg_var(s, addr, a->rn);
9184 tcg_gen_addi_i32(addr, addr, a->imm);
9186 gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
9187 tcg_temp_free_i32(addr);
9191 static bool trans_STREX(DisasContext *s, arg_STREX *a)
9193 if (!ENABLE_ARCH_6) {
9196 return op_strex(s, a, MO_32, false);
9199 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
9201 if (!ENABLE_ARCH_6K) {
9204 /* We UNDEF for these UNPREDICTABLE cases. */
9206 unallocated_encoding(s);
9210 return op_strex(s, a, MO_64, false);
9213 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
9215 return op_strex(s, a, MO_64, false);
9218 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
9220 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9223 return op_strex(s, a, MO_8, false);
9226 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
9228 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9231 return op_strex(s, a, MO_16, false);
9234 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
9236 if (!ENABLE_ARCH_8) {
9239 return op_strex(s, a, MO_32, true);
9242 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
9244 if (!ENABLE_ARCH_8) {
9247 /* We UNDEF for these UNPREDICTABLE cases. */
9249 unallocated_encoding(s);
9253 return op_strex(s, a, MO_64, true);
9256 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
9258 if (!ENABLE_ARCH_8) {
9261 return op_strex(s, a, MO_64, true);
9264 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
9266 if (!ENABLE_ARCH_8) {
9269 return op_strex(s, a, MO_8, true);
9272 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
9274 if (!ENABLE_ARCH_8) {
9277 return op_strex(s, a, MO_16, true);
9280 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
9284 if (!ENABLE_ARCH_8) {
9287 /* We UNDEF for these UNPREDICTABLE cases. */
9288 if (a->rn == 15 || a->rt == 15) {
9289 unallocated_encoding(s);
9293 addr = load_reg(s, a->rn);
9294 tmp = load_reg(s, a->rt);
9295 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9296 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9297 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
9299 tcg_temp_free_i32(tmp);
9300 tcg_temp_free_i32(addr);
9304 static bool trans_STL(DisasContext *s, arg_STL *a)
9306 return op_stl(s, a, MO_UL);
9309 static bool trans_STLB(DisasContext *s, arg_STL *a)
9311 return op_stl(s, a, MO_UB);
9314 static bool trans_STLH(DisasContext *s, arg_STL *a)
9316 return op_stl(s, a, MO_UW);
9319 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
9322 /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
9323 bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
9325 /* We UNDEF for these UNPREDICTABLE cases. */
9326 if (a->rn == 15 || a->rt == 15
9327 || (!v8a && s->thumb && a->rt == 13)
9329 && (a->rt2 == 15 || a->rt == a->rt2
9330 || (!v8a && s->thumb && a->rt2 == 13)))) {
9331 unallocated_encoding(s);
9335 addr = tcg_temp_local_new_i32();
9336 load_reg_var(s, addr, a->rn);
9337 tcg_gen_addi_i32(addr, addr, a->imm);
9339 gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9340 tcg_temp_free_i32(addr);
9343 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9348 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
9350 if (!ENABLE_ARCH_6) {
9353 return op_ldrex(s, a, MO_32, false);
9356 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
9358 if (!ENABLE_ARCH_6K) {
9361 /* We UNDEF for these UNPREDICTABLE cases. */
9363 unallocated_encoding(s);
9367 return op_ldrex(s, a, MO_64, false);
9370 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
9372 return op_ldrex(s, a, MO_64, false);
9375 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
9377 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9380 return op_ldrex(s, a, MO_8, false);
9383 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
9385 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9388 return op_ldrex(s, a, MO_16, false);
9391 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
9393 if (!ENABLE_ARCH_8) {
9396 return op_ldrex(s, a, MO_32, true);
9399 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
9401 if (!ENABLE_ARCH_8) {
9404 /* We UNDEF for these UNPREDICTABLE cases. */
9406 unallocated_encoding(s);
9410 return op_ldrex(s, a, MO_64, true);
9413 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
9415 if (!ENABLE_ARCH_8) {
9418 return op_ldrex(s, a, MO_64, true);
9421 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
9423 if (!ENABLE_ARCH_8) {
9426 return op_ldrex(s, a, MO_8, true);
9429 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
9431 if (!ENABLE_ARCH_8) {
9434 return op_ldrex(s, a, MO_16, true);
9437 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
9441 if (!ENABLE_ARCH_8) {
9444 /* We UNDEF for these UNPREDICTABLE cases. */
9445 if (a->rn == 15 || a->rt == 15) {
9446 unallocated_encoding(s);
9450 addr = load_reg(s, a->rn);
9451 tmp = tcg_temp_new_i32();
9452 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9453 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
9454 tcg_temp_free_i32(addr);
9456 store_reg(s, a->rt, tmp);
9457 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9461 static bool trans_LDA(DisasContext *s, arg_LDA *a)
9463 return op_lda(s, a, MO_UL);
9466 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
9468 return op_lda(s, a, MO_UB);
9471 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
9473 return op_lda(s, a, MO_UW);
9477 * Media instructions
9480 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
9484 if (!ENABLE_ARCH_6) {
9488 t1 = load_reg(s, a->rn);
9489 t2 = load_reg(s, a->rm);
9490 gen_helper_usad8(t1, t1, t2);
9491 tcg_temp_free_i32(t2);
9493 t2 = load_reg(s, a->ra);
9494 tcg_gen_add_i32(t1, t1, t2);
9495 tcg_temp_free_i32(t2);
9497 store_reg(s, a->rd, t1);
9501 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
9504 int width = a->widthm1 + 1;
9507 if (!ENABLE_ARCH_6T2) {
9510 if (shift + width > 32) {
9511 /* UNPREDICTABLE; we choose to UNDEF */
9512 unallocated_encoding(s);
9516 tmp = load_reg(s, a->rn);
9518 tcg_gen_extract_i32(tmp, tmp, shift, width);
9520 tcg_gen_sextract_i32(tmp, tmp, shift, width);
9522 store_reg(s, a->rd, tmp);
9526 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
9528 return op_bfx(s, a, false);
9531 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
9533 return op_bfx(s, a, true);
9536 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
9539 int msb = a->msb, lsb = a->lsb;
9542 if (!ENABLE_ARCH_6T2) {
9546 /* UNPREDICTABLE; we choose to UNDEF */
9547 unallocated_encoding(s);
9551 width = msb + 1 - lsb;
9554 tmp = tcg_const_i32(0);
9557 tmp = load_reg(s, a->rn);
9560 TCGv_i32 tmp2 = load_reg(s, a->rd);
9561 tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9562 tcg_temp_free_i32(tmp2);
9564 store_reg(s, a->rd, tmp);
9568 static bool trans_UDF(DisasContext *s, arg_UDF *a)
9570 unallocated_encoding(s);
9575 * Parallel addition and subtraction
9578 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
9579 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9584 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9589 t0 = load_reg(s, a->rn);
9590 t1 = load_reg(s, a->rm);
9594 tcg_temp_free_i32(t1);
9595 store_reg(s, a->rd, t0);
9599 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
9600 void (*gen)(TCGv_i32, TCGv_i32,
9601 TCGv_i32, TCGv_ptr))
9607 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9612 t0 = load_reg(s, a->rn);
9613 t1 = load_reg(s, a->rm);
9615 ge = tcg_temp_new_ptr();
9616 tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9617 gen(t0, t0, t1, ge);
9619 tcg_temp_free_ptr(ge);
9620 tcg_temp_free_i32(t1);
9621 store_reg(s, a->rd, t0);
9625 #define DO_PAR_ADDSUB(NAME, helper) \
9626 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9628 return op_par_addsub(s, a, helper); \
9631 #define DO_PAR_ADDSUB_GE(NAME, helper) \
9632 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9634 return op_par_addsub_ge(s, a, helper); \
9637 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9638 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9639 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9640 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9641 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9642 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9644 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9645 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9646 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9647 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9648 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9649 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9651 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9652 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9653 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9654 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9655 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9656 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9658 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9659 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9660 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9661 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9662 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9663 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9665 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9666 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9667 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9668 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9669 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9670 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9672 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9673 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9674 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9675 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9676 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9677 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9679 #undef DO_PAR_ADDSUB
9680 #undef DO_PAR_ADDSUB_GE
9683 * Packing, unpacking, saturation, and reversal
9686 static bool trans_PKH(DisasContext *s, arg_PKH *a)
9692 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9697 tn = load_reg(s, a->rn);
9698 tm = load_reg(s, a->rm);
9704 tcg_gen_sari_i32(tm, tm, shift);
9705 tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
9708 tcg_gen_shli_i32(tm, tm, shift);
9709 tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
9711 tcg_temp_free_i32(tm);
9712 store_reg(s, a->rd, tn);
9716 static bool op_sat(DisasContext *s, arg_sat *a,
9717 void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9719 TCGv_i32 tmp, satimm;
9722 if (!ENABLE_ARCH_6) {
9726 tmp = load_reg(s, a->rn);
9728 tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
9730 tcg_gen_shli_i32(tmp, tmp, shift);
9733 satimm = tcg_const_i32(a->satimm);
9734 gen(tmp, cpu_env, tmp, satimm);
9735 tcg_temp_free_i32(satimm);
9737 store_reg(s, a->rd, tmp);
9741 static bool trans_SSAT(DisasContext *s, arg_sat *a)
9743 return op_sat(s, a, gen_helper_ssat);
9746 static bool trans_USAT(DisasContext *s, arg_sat *a)
9748 return op_sat(s, a, gen_helper_usat);
9751 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
9753 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9756 return op_sat(s, a, gen_helper_ssat16);
9759 static bool trans_USAT16(DisasContext *s, arg_sat *a)
9761 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9764 return op_sat(s, a, gen_helper_usat16);
9767 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
9768 void (*gen_extract)(TCGv_i32, TCGv_i32),
9769 void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9773 if (!ENABLE_ARCH_6) {
9777 tmp = load_reg(s, a->rm);
9779 * TODO: In many cases we could do a shift instead of a rotate.
9780 * Combined with a simple extend, that becomes an extract.
9782 tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
9783 gen_extract(tmp, tmp);
9786 TCGv_i32 tmp2 = load_reg(s, a->rn);
9787 gen_add(tmp, tmp, tmp2);
9788 tcg_temp_free_i32(tmp2);
9790 store_reg(s, a->rd, tmp);
9794 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
9796 return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9799 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
9801 return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9804 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
9806 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9809 return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9812 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
9814 return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9817 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
9819 return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9822 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
9824 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9827 return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9830 static bool trans_SEL(DisasContext *s, arg_rrr *a)
9832 TCGv_i32 t1, t2, t3;
9835 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9840 t1 = load_reg(s, a->rn);
9841 t2 = load_reg(s, a->rm);
9842 t3 = tcg_temp_new_i32();
9843 tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9844 gen_helper_sel_flags(t1, t3, t1, t2);
9845 tcg_temp_free_i32(t3);
9846 tcg_temp_free_i32(t2);
9847 store_reg(s, a->rd, t1);
9851 static bool op_rr(DisasContext *s, arg_rr *a,
9852 void (*gen)(TCGv_i32, TCGv_i32))
9856 tmp = load_reg(s, a->rm);
9858 store_reg(s, a->rd, tmp);
9862 static bool trans_REV(DisasContext *s, arg_rr *a)
9864 if (!ENABLE_ARCH_6) {
9867 return op_rr(s, a, tcg_gen_bswap32_i32);
9870 static bool trans_REV16(DisasContext *s, arg_rr *a)
9872 if (!ENABLE_ARCH_6) {
9875 return op_rr(s, a, gen_rev16);
9878 static bool trans_REVSH(DisasContext *s, arg_rr *a)
9880 if (!ENABLE_ARCH_6) {
9883 return op_rr(s, a, gen_revsh);
9886 static bool trans_RBIT(DisasContext *s, arg_rr *a)
9888 if (!ENABLE_ARCH_6T2) {
9891 return op_rr(s, a, gen_helper_rbit);
9895 * Signed multiply, signed and unsigned divide
9898 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9902 if (!ENABLE_ARCH_6) {
9906 t1 = load_reg(s, a->rn);
9907 t2 = load_reg(s, a->rm);
9911 gen_smul_dual(t1, t2);
9914 /* This subtraction cannot overflow. */
9915 tcg_gen_sub_i32(t1, t1, t2);
9918 * This addition cannot overflow 32 bits; however it may
9919 * overflow considered as a signed operation, in which case
9920 * we must set the Q flag.
9922 gen_helper_add_setq(t1, cpu_env, t1, t2);
9924 tcg_temp_free_i32(t2);
9927 t2 = load_reg(s, a->ra);
9928 gen_helper_add_setq(t1, cpu_env, t1, t2);
9929 tcg_temp_free_i32(t2);
9931 store_reg(s, a->rd, t1);
9935 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9937 return op_smlad(s, a, false, false);
9940 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9942 return op_smlad(s, a, true, false);
9945 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9947 return op_smlad(s, a, false, true);
9950 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9952 return op_smlad(s, a, true, true);
9955 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9960 if (!ENABLE_ARCH_6) {
9964 t1 = load_reg(s, a->rn);
9965 t2 = load_reg(s, a->rm);
9969 gen_smul_dual(t1, t2);
9971 l1 = tcg_temp_new_i64();
9972 l2 = tcg_temp_new_i64();
9973 tcg_gen_ext_i32_i64(l1, t1);
9974 tcg_gen_ext_i32_i64(l2, t2);
9975 tcg_temp_free_i32(t1);
9976 tcg_temp_free_i32(t2);
9979 tcg_gen_sub_i64(l1, l1, l2);
9981 tcg_gen_add_i64(l1, l1, l2);
9983 tcg_temp_free_i64(l2);
9985 gen_addq(s, l1, a->ra, a->rd);
9986 gen_storeq_reg(s, a->ra, a->rd, l1);
9987 tcg_temp_free_i64(l1);
9991 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9993 return op_smlald(s, a, false, false);
9996 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9998 return op_smlald(s, a, true, false);
10001 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
10003 return op_smlald(s, a, false, true);
10006 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
10008 return op_smlald(s, a, true, true);
10011 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
10016 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
10017 : !ENABLE_ARCH_6) {
10021 t1 = load_reg(s, a->rn);
10022 t2 = load_reg(s, a->rm);
10023 tcg_gen_muls2_i32(t2, t1, t1, t2);
10026 TCGv_i32 t3 = load_reg(s, a->ra);
10029 * For SMMLS, we need a 64-bit subtract. Borrow caused by
10030 * a non-zero multiplicand lowpart, and the correct result
10031 * lowpart for rounding.
10033 TCGv_i32 zero = tcg_const_i32(0);
10034 tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
10035 tcg_temp_free_i32(zero);
10037 tcg_gen_add_i32(t1, t1, t3);
10039 tcg_temp_free_i32(t3);
10043 * Adding 0x80000000 to the 64-bit quantity means that we have
10044 * carry in to the high word when the low word has the msb set.
10046 tcg_gen_shri_i32(t2, t2, 31);
10047 tcg_gen_add_i32(t1, t1, t2);
10049 tcg_temp_free_i32(t2);
10050 store_reg(s, a->rd, t1);
10054 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
10056 return op_smmla(s, a, false, false);
10059 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
10061 return op_smmla(s, a, true, false);
10064 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
10066 return op_smmla(s, a, false, true);
10069 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
10071 return op_smmla(s, a, true, true);
10074 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
10079 ? !dc_isar_feature(aa32_thumb_div, s)
10080 : !dc_isar_feature(aa32_arm_div, s)) {
10084 t1 = load_reg(s, a->rn);
10085 t2 = load_reg(s, a->rm);
10087 gen_helper_udiv(t1, t1, t2);
10089 gen_helper_sdiv(t1, t1, t2);
10091 tcg_temp_free_i32(t2);
10092 store_reg(s, a->rd, t1);
10096 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
10098 return op_div(s, a, false);
10101 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
10103 return op_div(s, a, true);
10107 * Block data transfer
10110 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
10112 TCGv_i32 addr = load_reg(s, a->rn);
10116 /* pre increment */
10117 tcg_gen_addi_i32(addr, addr, 4);
10119 /* pre decrement */
10120 tcg_gen_addi_i32(addr, addr, -(n * 4));
10122 } else if (!a->i && n != 1) {
10123 /* post decrement */
10124 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10127 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
10129 * If the writeback is incrementing SP rather than
10130 * decrementing it, and the initial SP is below the
10131 * stack limit but the final written-back SP would
10132 * be above, then then we must not perform any memory
10133 * accesses, but it is IMPDEF whether we generate
10134 * an exception. We choose to do so in this case.
10135 * At this point 'addr' is the lowest address, so
10136 * either the original SP (if incrementing) or our
10137 * final SP (if decrementing), so that's what we check.
10139 gen_helper_v8m_stackcheck(cpu_env, addr);
10145 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
10146 TCGv_i32 addr, int n)
10152 /* post increment */
10153 tcg_gen_addi_i32(addr, addr, 4);
10155 /* post decrement */
10156 tcg_gen_addi_i32(addr, addr, -(n * 4));
10158 } else if (!a->i && n != 1) {
10159 /* pre decrement */
10160 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10162 store_reg(s, a->rn, addr);
10164 tcg_temp_free_i32(addr);
10168 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
10170 int i, j, n, list, mem_idx;
10172 TCGv_i32 addr, tmp, tmp2;
10177 /* Only usable in supervisor mode. */
10178 unallocated_encoding(s);
10185 if (n < min_n || a->rn == 15) {
10186 unallocated_encoding(s);
10190 addr = op_addr_block_pre(s, a, n);
10191 mem_idx = get_mem_index(s);
10193 for (i = j = 0; i < 16; i++) {
10194 if (!(list & (1 << i))) {
10198 if (user && i != 15) {
10199 tmp = tcg_temp_new_i32();
10200 tmp2 = tcg_const_i32(i);
10201 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
10202 tcg_temp_free_i32(tmp2);
10204 tmp = load_reg(s, i);
10206 gen_aa32_st32(s, tmp, addr, mem_idx);
10207 tcg_temp_free_i32(tmp);
10209 /* No need to add after the last transfer. */
10211 tcg_gen_addi_i32(addr, addr, 4);
10215 op_addr_block_post(s, a, addr, n);
10219 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
10221 /* BitCount(list) < 1 is UNPREDICTABLE */
10222 return op_stm(s, a, 1);
10225 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
10227 /* Writeback register in register list is UNPREDICTABLE for T32. */
10228 if (a->w && (a->list & (1 << a->rn))) {
10229 unallocated_encoding(s);
10232 /* BitCount(list) < 2 is UNPREDICTABLE */
10233 return op_stm(s, a, 2);
10236 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
10238 int i, j, n, list, mem_idx;
10241 bool exc_return = false;
10242 TCGv_i32 addr, tmp, tmp2, loaded_var;
10245 /* LDM (user), LDM (exception return) */
10247 /* Only usable in supervisor mode. */
10248 unallocated_encoding(s);
10251 if (extract32(a->list, 15, 1)) {
10255 /* LDM (user) does not allow writeback. */
10257 unallocated_encoding(s);
10265 if (n < min_n || a->rn == 15) {
10266 unallocated_encoding(s);
10270 addr = op_addr_block_pre(s, a, n);
10271 mem_idx = get_mem_index(s);
10272 loaded_base = false;
10275 for (i = j = 0; i < 16; i++) {
10276 if (!(list & (1 << i))) {
10280 tmp = tcg_temp_new_i32();
10281 gen_aa32_ld32u(s, tmp, addr, mem_idx);
10283 tmp2 = tcg_const_i32(i);
10284 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
10285 tcg_temp_free_i32(tmp2);
10286 tcg_temp_free_i32(tmp);
10287 } else if (i == a->rn) {
10289 loaded_base = true;
10290 } else if (i == 15 && exc_return) {
10291 store_pc_exc_ret(s, tmp);
10293 store_reg_from_load(s, i, tmp);
10296 /* No need to add after the last transfer. */
10298 tcg_gen_addi_i32(addr, addr, 4);
10302 op_addr_block_post(s, a, addr, n);
10305 /* Note that we reject base == pc above. */
10306 store_reg(s, a->rn, loaded_var);
10310 /* Restore CPSR from SPSR. */
10311 tmp = load_cpu_field(spsr);
10312 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10315 gen_helper_cpsr_write_eret(cpu_env, tmp);
10316 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10319 tcg_temp_free_i32(tmp);
10320 /* Must exit loop to check un-masked IRQs */
10321 s->base.is_jmp = DISAS_EXIT;
10326 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
10329 * Writeback register in register list is UNPREDICTABLE
10330 * for ArchVersion() >= 7. Prior to v7, A32 would write
10331 * an UNKNOWN value to the base register.
10333 if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
10334 unallocated_encoding(s);
10337 /* BitCount(list) < 1 is UNPREDICTABLE */
10338 return do_ldm(s, a, 1);
10341 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
10343 /* Writeback register in register list is UNPREDICTABLE for T32. */
10344 if (a->w && (a->list & (1 << a->rn))) {
10345 unallocated_encoding(s);
10348 /* BitCount(list) < 2 is UNPREDICTABLE */
10349 return do_ldm(s, a, 2);
10352 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
10354 /* Writeback is conditional on the base register not being loaded. */
10355 a->w = !(a->list & (1 << a->rn));
10356 /* BitCount(list) < 1 is UNPREDICTABLE */
10357 return do_ldm(s, a, 1);
10361 * Branch, branch with link
10364 static bool trans_B(DisasContext *s, arg_i *a)
10366 gen_jmp(s, read_pc(s) + a->imm);
10370 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
10372 /* This has cond from encoding, required to be outside IT block. */
10373 if (a->cond >= 0xe) {
10376 if (s->condexec_mask) {
10377 unallocated_encoding(s);
10380 arm_skip_unless(s, a->cond);
10381 gen_jmp(s, read_pc(s) + a->imm);
10385 static bool trans_BL(DisasContext *s, arg_i *a)
10387 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10388 gen_jmp(s, read_pc(s) + a->imm);
10392 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
10396 /* For A32, ARCH(5) is checked near the start of the uncond block. */
10397 if (s->thumb && (a->imm & 2)) {
10400 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10401 tmp = tcg_const_i32(!s->thumb);
10402 store_cpu_field(tmp, thumb);
10403 gen_jmp(s, (read_pc(s) & ~3) + a->imm);
10407 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
10409 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10410 tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
10414 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
10416 TCGv_i32 tmp = tcg_temp_new_i32();
10418 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10419 tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
10420 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10425 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
10429 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10430 if (!ENABLE_ARCH_5) {
10433 tmp = tcg_temp_new_i32();
10434 tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
10435 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
10436 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10441 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
10443 TCGv_i32 addr, tmp;
10445 tmp = load_reg(s, a->rm);
10447 tcg_gen_add_i32(tmp, tmp, tmp);
10449 addr = load_reg(s, a->rn);
10450 tcg_gen_add_i32(addr, addr, tmp);
10452 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10453 half ? MO_UW | s->be_data : MO_UB);
10454 tcg_temp_free_i32(addr);
10456 tcg_gen_add_i32(tmp, tmp, tmp);
10457 tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10458 store_reg(s, 15, tmp);
10462 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
10464 return op_tbranch(s, a, false);
10467 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
10469 return op_tbranch(s, a, true);
10472 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
10474 TCGv_i32 tmp = load_reg(s, a->rn);
10476 arm_gen_condlabel(s);
10477 tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10478 tmp, 0, s->condlabel);
10479 tcg_temp_free_i32(tmp);
10480 gen_jmp(s, read_pc(s) + a->imm);
10485 * Supervisor call - both T32 & A32 come here so we need to check
10486 * which mode we are in when checking for semihosting.
10489 static bool trans_SVC(DisasContext *s, arg_SVC *a)
10491 const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
10493 if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
10494 #ifndef CONFIG_USER_ONLY
10497 (a->imm == semihost_imm)) {
10498 gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
10500 gen_set_pc_im(s, s->base.pc_next);
10501 s->svc_imm = a->imm;
10502 s->base.is_jmp = DISAS_SWI;
10508 * Unconditional system instructions
10511 static bool trans_RFE(DisasContext *s, arg_RFE *a)
10513 static const int8_t pre_offset[4] = {
10514 /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
10516 static const int8_t post_offset[4] = {
10517 /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
10519 TCGv_i32 addr, t1, t2;
10521 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10525 unallocated_encoding(s);
10529 addr = load_reg(s, a->rn);
10530 tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10532 /* Load PC into tmp and CPSR into tmp2. */
10533 t1 = tcg_temp_new_i32();
10534 gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10535 tcg_gen_addi_i32(addr, addr, 4);
10536 t2 = tcg_temp_new_i32();
10537 gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10540 /* Base writeback. */
10541 tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10542 store_reg(s, a->rn, addr);
10544 tcg_temp_free_i32(addr);
10546 gen_rfe(s, t1, t2);
10550 static bool trans_SRS(DisasContext *s, arg_SRS *a)
10552 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10555 gen_srs(s, a->mode, a->pu, a->w);
10559 static bool trans_CPS(DisasContext *s, arg_CPS *a)
10561 uint32_t mask, val;
10563 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10567 /* Implemented as NOP in user mode. */
10570 /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
10592 gen_set_psr_im(s, mask, 0, val);
10597 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
10599 TCGv_i32 tmp, addr, el;
10601 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10605 /* Implemented as NOP in user mode. */
10609 tmp = tcg_const_i32(a->im);
10612 addr = tcg_const_i32(19);
10613 gen_helper_v7m_msr(cpu_env, addr, tmp);
10614 tcg_temp_free_i32(addr);
10618 addr = tcg_const_i32(16);
10619 gen_helper_v7m_msr(cpu_env, addr, tmp);
10620 tcg_temp_free_i32(addr);
10622 el = tcg_const_i32(s->current_el);
10623 gen_helper_rebuild_hflags_m32(cpu_env, el);
10624 tcg_temp_free_i32(el);
10625 tcg_temp_free_i32(tmp);
10631 * Clear-Exclusive, Barriers
10634 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
10637 ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10638 : !ENABLE_ARCH_6K) {
10645 static bool trans_DSB(DisasContext *s, arg_DSB *a)
10647 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10650 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10654 static bool trans_DMB(DisasContext *s, arg_DMB *a)
10656 return trans_DSB(s, NULL);
10659 static bool trans_ISB(DisasContext *s, arg_ISB *a)
10661 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10665 * We need to break the TB after this insn to execute
10666 * self-modifying code correctly and also to take
10667 * any pending interrupts immediately.
10669 gen_goto_tb(s, 0, s->base.pc_next);
10673 static bool trans_SB(DisasContext *s, arg_SB *a)
10675 if (!dc_isar_feature(aa32_sb, s)) {
10679 * TODO: There is no speculation barrier opcode
10680 * for TCG; MB and end the TB instead.
10682 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10683 gen_goto_tb(s, 0, s->base.pc_next);
10687 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
10689 if (!ENABLE_ARCH_6) {
10692 if (a->E != (s->be_data == MO_BE)) {
10693 gen_helper_setend(cpu_env);
10694 s->base.is_jmp = DISAS_UPDATE;
10700 * Preload instructions
10701 * All are nops, contingent on the appropriate arch level.
10704 static bool trans_PLD(DisasContext *s, arg_PLD *a)
10706 return ENABLE_ARCH_5TE;
10709 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
10711 return arm_dc_feature(s, ARM_FEATURE_V7MP);
10714 static bool trans_PLI(DisasContext *s, arg_PLD *a)
10716 return ENABLE_ARCH_7;
10723 static bool trans_IT(DisasContext *s, arg_IT *a)
10725 int cond_mask = a->cond_mask;
10728 * No actual code generated for this insn, just setup state.
10730 * Combinations of firstcond and mask which set up an 0b1111
10731 * condition are UNPREDICTABLE; we take the CONSTRAINED
10732 * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10733 * i.e. both meaning "execute always".
10735 s->condexec_cond = (cond_mask >> 4) & 0xe;
10736 s->condexec_mask = cond_mask & 0x1f;
10744 static void disas_arm_insn(DisasContext *s, unsigned int insn)
10746 unsigned int cond = insn >> 28;
10748 /* M variants do not implement ARM mode; this must raise the INVSTATE
10749 * UsageFault exception.
10751 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10752 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10753 default_exception_el(s));
10758 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
10759 * choose to UNDEF. In ARMv5 and above the space is used
10760 * for miscellaneous unconditional instructions.
10764 /* Unconditional instructions. */
10765 /* TODO: Perhaps merge these into one decodetree output file. */
10766 if (disas_a32_uncond(s, insn) ||
10767 disas_vfp_uncond(s, insn) ||
10768 disas_neon_dp(s, insn) ||
10769 disas_neon_ls(s, insn) ||
10770 disas_neon_shared(s, insn)) {
10773 /* fall back to legacy decoder */
10775 if (((insn >> 25) & 7) == 1) {
10776 /* NEON Data processing. */
10777 if (disas_neon_data_insn(s, insn)) {
10782 if ((insn & 0x0e000f00) == 0x0c000100) {
10783 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10784 /* iWMMXt register transfer. */
10785 if (extract32(s->c15_cpar, 1, 1)) {
10786 if (!disas_iwmmxt_insn(s, insn)) {
10795 /* if not always execute, we generate a conditional jump to
10796 next instruction */
10797 arm_skip_unless(s, cond);
10800 /* TODO: Perhaps merge these into one decodetree output file. */
10801 if (disas_a32(s, insn) ||
10802 disas_vfp(s, insn)) {
10805 /* fall back to legacy decoder */
10807 switch ((insn >> 24) & 0xf) {
10811 if (((insn >> 8) & 0xe) == 10) {
10812 /* VFP, but failed disas_vfp. */
10815 if (disas_coproc_insn(s, insn)) {
10822 unallocated_encoding(s);
10827 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10830 * Return true if this is a 16 bit instruction. We must be precise
10831 * about this (matching the decode).
10833 if ((insn >> 11) < 0x1d) {
10834 /* Definitely a 16-bit instruction */
10838 /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10839 * first half of a 32-bit Thumb insn. Thumb-1 cores might
10840 * end up actually treating this as two 16-bit insns, though,
10841 * if it's half of a bl/blx pair that might span a page boundary.
10843 if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10844 arm_dc_feature(s, ARM_FEATURE_M)) {
10845 /* Thumb2 cores (including all M profile ones) always treat
10846 * 32-bit insns as 32-bit.
10851 if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10852 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10853 * is not on the next page; we merge this into a 32-bit
10858 /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10859 * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10860 * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10861 * -- handle as single 16 bit insn
10866 /* Translate a 32-bit thumb instruction. */
10867 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10870 * ARMv6-M supports a limited subset of Thumb2 instructions.
10871 * Other Thumb1 architectures allow only 32-bit
10872 * combined BL/BLX prefix and suffix.
10874 if (arm_dc_feature(s, ARM_FEATURE_M) &&
10875 !arm_dc_feature(s, ARM_FEATURE_V7)) {
10877 bool found = false;
10878 static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10879 0xf3b08040 /* dsb */,
10880 0xf3b08050 /* dmb */,
10881 0xf3b08060 /* isb */,
10882 0xf3e08000 /* mrs */,
10883 0xf000d000 /* bl */};
10884 static const uint32_t armv6m_mask[] = {0xffe0d000,
10891 for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10892 if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10900 } else if ((insn & 0xf800e800) != 0xf000e800) {
10904 if ((insn & 0xef000000) == 0xef000000) {
10906 * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10908 * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10910 uint32_t a32_insn = (insn & 0xe2ffffff) |
10911 ((insn & (1 << 28)) >> 4) | (1 << 28);
10913 if (disas_neon_dp(s, a32_insn)) {
10918 if ((insn & 0xff100000) == 0xf9000000) {
10920 * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10922 * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10924 uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
10926 if (disas_neon_ls(s, a32_insn)) {
10932 * TODO: Perhaps merge these into one decodetree output file.
10933 * Note disas_vfp is written for a32 with cond field in the
10934 * top nibble. The t32 encoding requires 0xe in the top nibble.
10936 if (disas_t32(s, insn) ||
10937 disas_vfp_uncond(s, insn) ||
10938 disas_neon_shared(s, insn) ||
10939 ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
10942 /* fall back to legacy decoder */
10944 switch ((insn >> 25) & 0xf) {
10945 case 0: case 1: case 2: case 3:
10946 /* 16-bit instructions. Should never happen. */
10948 case 6: case 7: case 14: case 15:
10950 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10951 /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10952 if (extract32(insn, 24, 2) == 3) {
10953 goto illegal_op; /* op0 = 0b11 : unallocated */
10956 if (((insn >> 8) & 0xe) == 10 &&
10957 dc_isar_feature(aa32_fpsp_v2, s)) {
10958 /* FP, and the CPU supports it */
10961 /* All other insns: NOCP */
10962 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
10963 syn_uncategorized(),
10964 default_exception_el(s));
10968 if (((insn >> 24) & 3) == 3) {
10969 /* Translate into the equivalent ARM encoding. */
10970 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10971 if (disas_neon_data_insn(s, insn)) {
10974 } else if (((insn >> 8) & 0xe) == 10) {
10975 /* VFP, but failed disas_vfp. */
10978 if (insn & (1 << 28))
10980 if (disas_coproc_insn(s, insn)) {
10989 unallocated_encoding(s);
10993 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10995 if (!disas_t16(s, insn)) {
10996 unallocated_encoding(s);
11000 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
11002 /* Return true if the insn at dc->base.pc_next might cross a page boundary.
11003 * (False positives are OK, false negatives are not.)
11004 * We know this is a Thumb insn, and our caller ensures we are
11005 * only called if dc->base.pc_next is less than 4 bytes from the page
11006 * boundary, so we cross the page if the first 16 bits indicate
11007 * that this is a 32 bit insn.
11009 uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
11011 return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
11014 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
11016 DisasContext *dc = container_of(dcbase, DisasContext, base);
11017 CPUARMState *env = cs->env_ptr;
11018 ARMCPU *cpu = env_archcpu(env);
11019 uint32_t tb_flags = dc->base.tb->flags;
11020 uint32_t condexec, core_mmu_idx;
11022 dc->isar = &cpu->isar;
11026 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11027 * there is no secure EL1, so we route exceptions to EL3.
11029 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11030 !arm_el_is_aa64(env, 3);
11031 dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
11032 dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
11033 condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
11034 dc->condexec_mask = (condexec & 0xf) << 1;
11035 dc->condexec_cond = condexec >> 4;
11037 core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
11038 dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
11039 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11040 #if !defined(CONFIG_USER_ONLY)
11041 dc->user = (dc->current_el == 0);
11043 dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
11045 if (arm_feature(env, ARM_FEATURE_M)) {
11046 dc->vfp_enabled = 1;
11047 dc->be_data = MO_TE;
11048 dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
11049 dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
11050 regime_is_secure(env, dc->mmu_idx);
11051 dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
11052 dc->v8m_fpccr_s_wrong =
11053 FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
11054 dc->v7m_new_fp_ctxt_needed =
11055 FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
11056 dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
11059 FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
11060 dc->debug_target_el =
11061 FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
11062 dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
11063 dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
11064 dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
11065 dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
11066 if (arm_feature(env, ARM_FEATURE_XSCALE)) {
11067 dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
11069 dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
11070 dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
11073 dc->cp_regs = cpu->cp_regs;
11074 dc->features = env->features;
11076 /* Single step state. The code-generation logic here is:
11078 * generate code with no special handling for single-stepping (except
11079 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11080 * this happens anyway because those changes are all system register or
11082 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11083 * emit code for one insn
11084 * emit code to clear PSTATE.SS
11085 * emit code to generate software step exception for completed step
11086 * end TB (as usual for having generated an exception)
11087 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11088 * emit code to generate a software step exception
11091 dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
11092 dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
11093 dc->is_ldex = false;
11095 dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
11097 /* If architectural single step active, limit to 1. */
11098 if (is_singlestepping(dc)) {
11099 dc->base.max_insns = 1;
11102 /* ARM is a fixed-length ISA. Bound the number of insns to execute
11103 to those left on the page. */
11105 int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
11106 dc->base.max_insns = MIN(dc->base.max_insns, bound);
11109 cpu_V0 = tcg_temp_new_i64();
11110 cpu_V1 = tcg_temp_new_i64();
11111 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
11112 cpu_M0 = tcg_temp_new_i64();
11115 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
11117 DisasContext *dc = container_of(dcbase, DisasContext, base);
11119 /* A note on handling of the condexec (IT) bits:
11121 * We want to avoid the overhead of having to write the updated condexec
11122 * bits back to the CPUARMState for every instruction in an IT block. So:
11123 * (1) if the condexec bits are not already zero then we write
11124 * zero back into the CPUARMState now. This avoids complications trying
11125 * to do it at the end of the block. (For example if we don't do this
11126 * it's hard to identify whether we can safely skip writing condexec
11127 * at the end of the TB, which we definitely want to do for the case
11128 * where a TB doesn't do anything with the IT state at all.)
11129 * (2) if we are going to leave the TB then we call gen_set_condexec()
11130 * which will write the correct value into CPUARMState if zero is wrong.
11131 * This is done both for leaving the TB at the end, and for leaving
11132 * it because of an exception we know will happen, which is done in
11133 * gen_exception_insn(). The latter is necessary because we need to
11134 * leave the TB with the PC/IT state just prior to execution of the
11135 * instruction which caused the exception.
11136 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
11137 * then the CPUARMState will be wrong and we need to reset it.
11138 * This is handled in the same way as restoration of the
11139 * PC in these situations; we save the value of the condexec bits
11140 * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
11141 * then uses this to restore them after an exception.
11143 * Note that there are no instructions which can read the condexec
11144 * bits, and none which can write non-static values to them, so
11145 * we don't need to care about whether CPUARMState is correct in the
11149 /* Reset the conditional execution bits immediately. This avoids
11150 complications trying to do it at the end of the block. */
11151 if (dc->condexec_mask || dc->condexec_cond) {
11152 TCGv_i32 tmp = tcg_temp_new_i32();
11153 tcg_gen_movi_i32(tmp, 0);
11154 store_cpu_field(tmp, condexec_bits);
11158 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
11160 DisasContext *dc = container_of(dcbase, DisasContext, base);
11162 tcg_gen_insn_start(dc->base.pc_next,
11163 (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
11165 dc->insn_start = tcg_last_op();
11168 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
11169 const CPUBreakpoint *bp)
11171 DisasContext *dc = container_of(dcbase, DisasContext, base);
11173 if (bp->flags & BP_CPU) {
11174 gen_set_condexec(dc);
11175 gen_set_pc_im(dc, dc->base.pc_next);
11176 gen_helper_check_breakpoints(cpu_env);
11177 /* End the TB early; it's likely not going to be executed */
11178 dc->base.is_jmp = DISAS_TOO_MANY;
11180 gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
11181 /* The address covered by the breakpoint must be
11182 included in [tb->pc, tb->pc + tb->size) in order
11183 to for it to be properly cleared -- thus we
11184 increment the PC here so that the logic setting
11185 tb->size below does the right thing. */
11186 /* TODO: Advance PC by correct instruction length to
11187 * avoid disassembler error messages */
11188 dc->base.pc_next += 2;
11189 dc->base.is_jmp = DISAS_NORETURN;
11195 static bool arm_pre_translate_insn(DisasContext *dc)
11197 #ifdef CONFIG_USER_ONLY
11198 /* Intercept jump to the magic kernel page. */
11199 if (dc->base.pc_next >= 0xffff0000) {
11200 /* We always get here via a jump, so know we are not in a
11201 conditional execution block. */
11202 gen_exception_internal(EXCP_KERNEL_TRAP);
11203 dc->base.is_jmp = DISAS_NORETURN;
11208 if (dc->ss_active && !dc->pstate_ss) {
11209 /* Singlestep state is Active-pending.
11210 * If we're in this state at the start of a TB then either
11211 * a) we just took an exception to an EL which is being debugged
11212 * and this is the first insn in the exception handler
11213 * b) debug exceptions were masked and we just unmasked them
11214 * without changing EL (eg by clearing PSTATE.D)
11215 * In either case we're going to take a swstep exception in the
11216 * "did not step an insn" case, and so the syndrome ISV and EX
11217 * bits should be zero.
11219 assert(dc->base.num_insns == 1);
11220 gen_swstep_exception(dc, 0, 0);
11221 dc->base.is_jmp = DISAS_NORETURN;
11228 static void arm_post_translate_insn(DisasContext *dc)
11230 if (dc->condjmp && !dc->base.is_jmp) {
11231 gen_set_label(dc->condlabel);
11234 translator_loop_temp_check(&dc->base);
11237 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11239 DisasContext *dc = container_of(dcbase, DisasContext, base);
11240 CPUARMState *env = cpu->env_ptr;
11243 if (arm_pre_translate_insn(dc)) {
11247 dc->pc_curr = dc->base.pc_next;
11248 insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
11250 dc->base.pc_next += 4;
11251 disas_arm_insn(dc, insn);
11253 arm_post_translate_insn(dc);
11255 /* ARM is a fixed-length ISA. We performed the cross-page check
11256 in init_disas_context by adjusting max_insns. */
11259 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
11261 /* Return true if this Thumb insn is always unconditional,
11262 * even inside an IT block. This is true of only a very few
11263 * instructions: BKPT, HLT, and SG.
11265 * A larger class of instructions are UNPREDICTABLE if used
11266 * inside an IT block; we do not need to detect those here, because
11267 * what we do by default (perform the cc check and update the IT
11268 * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
11269 * choice for those situations.
11271 * insn is either a 16-bit or a 32-bit instruction; the two are
11272 * distinguishable because for the 16-bit case the top 16 bits
11273 * are zeroes, and that isn't a valid 32-bit encoding.
11275 if ((insn & 0xffffff00) == 0xbe00) {
11280 if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
11281 !arm_dc_feature(s, ARM_FEATURE_M)) {
11282 /* HLT: v8A only. This is unconditional even when it is going to
11283 * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
11284 * For v7 cores this was a plain old undefined encoding and so
11285 * honours its cc check. (We might be using the encoding as
11286 * a semihosting trap, but we don't change the cc check behaviour
11287 * on that account, because a debugger connected to a real v7A
11288 * core and emulating semihosting traps by catching the UNDEF
11289 * exception would also only see cases where the cc check passed.
11290 * No guest code should be trying to do a HLT semihosting trap
11291 * in an IT block anyway.
11296 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
11297 arm_dc_feature(s, ARM_FEATURE_M)) {
11305 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11307 DisasContext *dc = container_of(dcbase, DisasContext, base);
11308 CPUARMState *env = cpu->env_ptr;
11312 if (arm_pre_translate_insn(dc)) {
11316 dc->pc_curr = dc->base.pc_next;
11317 insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11318 is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
11319 dc->base.pc_next += 2;
11321 uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11323 insn = insn << 16 | insn2;
11324 dc->base.pc_next += 2;
11328 if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
11329 uint32_t cond = dc->condexec_cond;
11332 * Conditionally skip the insn. Note that both 0xe and 0xf mean
11333 * "always"; 0xf is not "never".
11336 arm_skip_unless(dc, cond);
11341 disas_thumb_insn(dc, insn);
11343 disas_thumb2_insn(dc, insn);
11346 /* Advance the Thumb condexec condition. */
11347 if (dc->condexec_mask) {
11348 dc->condexec_cond = ((dc->condexec_cond & 0xe) |
11349 ((dc->condexec_mask >> 4) & 1));
11350 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11351 if (dc->condexec_mask == 0) {
11352 dc->condexec_cond = 0;
11356 arm_post_translate_insn(dc);
11358 /* Thumb is a variable-length ISA. Stop translation when the next insn
11359 * will touch a new page. This ensures that prefetch aborts occur at
11362 * We want to stop the TB if the next insn starts in a new page,
11363 * or if it spans between this page and the next. This means that
11364 * if we're looking at the last halfword in the page we need to
11365 * see if it's a 16-bit Thumb insn (which will fit in this TB)
11366 * or a 32-bit Thumb insn (which won't).
11367 * This is to avoid generating a silly TB with a single 16-bit insn
11368 * in it at the end of this page (which would execute correctly
11369 * but isn't very efficient).
11371 if (dc->base.is_jmp == DISAS_NEXT
11372 && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11373 || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
11374 && insn_crosses_page(env, dc)))) {
11375 dc->base.is_jmp = DISAS_TOO_MANY;
11379 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11381 DisasContext *dc = container_of(dcbase, DisasContext, base);
11383 if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11384 /* FIXME: This can theoretically happen with self-modifying code. */
11385 cpu_abort(cpu, "IO on conditional branch instruction");
11388 /* At this stage dc->condjmp will only be set when the skipped
11389 instruction was a conditional branch or trap, and the PC has
11390 already been written. */
11391 gen_set_condexec(dc);
11392 if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11393 /* Exception return branches need some special case code at the
11394 * end of the TB, which is complex enough that it has to
11395 * handle the single-step vs not and the condition-failed
11396 * insn codepath itself.
11398 gen_bx_excret_final_code(dc);
11399 } else if (unlikely(is_singlestepping(dc))) {
11400 /* Unconditional and "condition passed" instruction codepath. */
11401 switch (dc->base.is_jmp) {
11403 gen_ss_advance(dc);
11404 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11405 default_exception_el(dc));
11408 gen_ss_advance(dc);
11409 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11412 gen_ss_advance(dc);
11413 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11416 case DISAS_TOO_MANY:
11418 gen_set_pc_im(dc, dc->base.pc_next);
11421 /* FIXME: Single stepping a WFI insn will not halt the CPU. */
11422 gen_singlestep_exception(dc);
11424 case DISAS_NORETURN:
11428 /* While branches must always occur at the end of an IT block,
11429 there are a few other things that can cause us to terminate
11430 the TB in the middle of an IT block:
11431 - Exception generating instructions (bkpt, swi, undefined).
11433 - Hardware watchpoints.
11434 Hardware breakpoints have already been handled and skip this code.
11436 switch(dc->base.is_jmp) {
11438 case DISAS_TOO_MANY:
11439 gen_goto_tb(dc, 1, dc->base.pc_next);
11445 gen_set_pc_im(dc, dc->base.pc_next);
11448 /* indicate that the hash table must be used to find the next TB */
11449 tcg_gen_exit_tb(NULL, 0);
11451 case DISAS_NORETURN:
11452 /* nothing more to generate */
11456 TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11457 !(dc->insn & (1U << 31))) ? 2 : 4);
11459 gen_helper_wfi(cpu_env, tmp);
11460 tcg_temp_free_i32(tmp);
11461 /* The helper doesn't necessarily throw an exception, but we
11462 * must go back to the main loop to check for interrupts anyway.
11464 tcg_gen_exit_tb(NULL, 0);
11468 gen_helper_wfe(cpu_env);
11471 gen_helper_yield(cpu_env);
11474 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11475 default_exception_el(dc));
11478 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11481 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11487 /* "Condition failed" instruction codepath for the branch/trap insn */
11488 gen_set_label(dc->condlabel);
11489 gen_set_condexec(dc);
11490 if (unlikely(is_singlestepping(dc))) {
11491 gen_set_pc_im(dc, dc->base.pc_next);
11492 gen_singlestep_exception(dc);
11494 gen_goto_tb(dc, 1, dc->base.pc_next);
11499 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
11501 DisasContext *dc = container_of(dcbase, DisasContext, base);
11503 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11504 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11507 static const TranslatorOps arm_translator_ops = {
11508 .init_disas_context = arm_tr_init_disas_context,
11509 .tb_start = arm_tr_tb_start,
11510 .insn_start = arm_tr_insn_start,
11511 .breakpoint_check = arm_tr_breakpoint_check,
11512 .translate_insn = arm_tr_translate_insn,
11513 .tb_stop = arm_tr_tb_stop,
11514 .disas_log = arm_tr_disas_log,
11517 static const TranslatorOps thumb_translator_ops = {
11518 .init_disas_context = arm_tr_init_disas_context,
11519 .tb_start = arm_tr_tb_start,
11520 .insn_start = arm_tr_insn_start,
11521 .breakpoint_check = arm_tr_breakpoint_check,
11522 .translate_insn = thumb_tr_translate_insn,
11523 .tb_stop = arm_tr_tb_stop,
11524 .disas_log = arm_tr_disas_log,
11527 /* generate intermediate code for basic block 'tb'. */
11528 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
11530 DisasContext dc = { };
11531 const TranslatorOps *ops = &arm_translator_ops;
11533 if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
11534 ops = &thumb_translator_ops;
11536 #ifdef TARGET_AARCH64
11537 if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11538 ops = &aarch64_translator_ops;
11542 translator_loop(ops, &dc.base, cpu, tb, max_insns);
11545 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11546 target_ulong *data)
11550 env->condexec_bits = 0;
11551 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11553 env->regs[15] = data[0];
11554 env->condexec_bits = data[1];
11555 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;