4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include "qemu/osdep.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
28 #include "tcg-op-gvec.h"
30 #include "qemu/bitops.h"
32 #include "hw/semihosting/semihost.h"
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
37 #include "trace-tcg.h"
41 #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J dc_isar_feature(jazelle, s)
46 #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
54 #include "translate.h"
56 #if defined(CONFIG_USER_ONLY)
59 #define IS_USER(s) (s->user)
62 /* We reuse the same 64-bit temporaries for efficiency. */
63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
64 static TCGv_i32 cpu_R[16];
65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
66 TCGv_i64 cpu_exclusive_addr;
67 TCGv_i64 cpu_exclusive_val;
69 #include "exec/gen-icount.h"
71 static const char * const regnames[] =
72 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
73 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
75 /* Function prototypes for gen_ functions calling Neon helpers. */
76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
78 /* Function prototypes for gen_ functions for fix point conversions */
79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
81 /* initialize TCG globals. */
82 void arm_translate_init(void)
86 for (i = 0; i < 16; i++) {
87 cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
88 offsetof(CPUARMState, regs[i]),
91 cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
92 cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
93 cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
94 cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
96 cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
97 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
98 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
99 offsetof(CPUARMState, exclusive_val), "exclusive_val");
101 a64_translate_init();
104 /* Flags for the disas_set_da_iss info argument:
105 * lower bits hold the Rt register number, higher bits are flags.
107 typedef enum ISSInfo {
110 ISSInvalid = (1 << 5),
111 ISSIsAcqRel = (1 << 6),
112 ISSIsWrite = (1 << 7),
113 ISSIs16Bit = (1 << 8),
116 /* Save the syndrome information for a Data Abort */
117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
120 int sas = memop & MO_SIZE;
121 bool sse = memop & MO_SIGN;
122 bool is_acqrel = issinfo & ISSIsAcqRel;
123 bool is_write = issinfo & ISSIsWrite;
124 bool is_16bit = issinfo & ISSIs16Bit;
125 int srt = issinfo & ISSRegMask;
127 if (issinfo & ISSInvalid) {
128 /* Some callsites want to conditionally provide ISS info,
129 * eg "only if this was not a writeback"
135 /* For AArch32, insns where the src/dest is R15 never generate
136 * ISS information. Catching that here saves checking at all
142 syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
143 0, 0, 0, is_write, 0, is_16bit);
144 disas_set_insn_syndrome(s, syn);
147 static inline int get_a32_user_mem_index(DisasContext *s)
149 /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
151 * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
152 * otherwise, access as if at PL0.
154 switch (s->mmu_idx) {
155 case ARMMMUIdx_S1E2: /* this one is UNPREDICTABLE */
156 case ARMMMUIdx_S12NSE0:
157 case ARMMMUIdx_S12NSE1:
158 return arm_to_core_mmu_idx(ARMMMUIdx_S12NSE0);
160 case ARMMMUIdx_S1SE0:
161 case ARMMMUIdx_S1SE1:
162 return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0);
163 case ARMMMUIdx_MUser:
164 case ARMMMUIdx_MPriv:
165 return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
166 case ARMMMUIdx_MUserNegPri:
167 case ARMMMUIdx_MPrivNegPri:
168 return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
169 case ARMMMUIdx_MSUser:
170 case ARMMMUIdx_MSPriv:
171 return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
172 case ARMMMUIdx_MSUserNegPri:
173 case ARMMMUIdx_MSPrivNegPri:
174 return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
177 g_assert_not_reached();
181 static inline TCGv_i32 load_cpu_offset(int offset)
183 TCGv_i32 tmp = tcg_temp_new_i32();
184 tcg_gen_ld_i32(tmp, cpu_env, offset);
188 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
190 static inline void store_cpu_offset(TCGv_i32 var, int offset)
192 tcg_gen_st_i32(var, cpu_env, offset);
193 tcg_temp_free_i32(var);
196 #define store_cpu_field(var, name) \
197 store_cpu_offset(var, offsetof(CPUARMState, name))
199 /* The architectural value of PC. */
200 static uint32_t read_pc(DisasContext *s)
202 return s->pc_curr + (s->thumb ? 4 : 8);
205 /* Set a variable to the value of a CPU register. */
206 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
209 tcg_gen_movi_i32(var, read_pc(s));
211 tcg_gen_mov_i32(var, cpu_R[reg]);
215 /* Create a new temporary and set it to the value of a CPU register. */
216 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
218 TCGv_i32 tmp = tcg_temp_new_i32();
219 load_reg_var(s, tmp, reg);
224 * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
225 * This is used for load/store for which use of PC implies (literal),
226 * or ADD that implies ADR.
228 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
230 TCGv_i32 tmp = tcg_temp_new_i32();
233 tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
235 tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
240 /* Set a CPU register. The source must be a temporary and will be
242 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
245 /* In Thumb mode, we must ignore bit 0.
246 * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
247 * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
248 * We choose to ignore [1:0] in ARM mode for all architecture versions.
250 tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
251 s->base.is_jmp = DISAS_JUMP;
253 tcg_gen_mov_i32(cpu_R[reg], var);
254 tcg_temp_free_i32(var);
258 * Variant of store_reg which applies v8M stack-limit checks before updating
259 * SP. If the check fails this will result in an exception being taken.
260 * We disable the stack checks for CONFIG_USER_ONLY because we have
261 * no idea what the stack limits should be in that case.
262 * If stack checking is not being done this just acts like store_reg().
264 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
266 #ifndef CONFIG_USER_ONLY
267 if (s->v8m_stackcheck) {
268 gen_helper_v8m_stackcheck(cpu_env, var);
271 store_reg(s, 13, var);
274 /* Value extensions. */
275 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
276 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
277 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
278 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
280 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
281 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
284 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
286 TCGv_i32 tmp_mask = tcg_const_i32(mask);
287 gen_helper_cpsr_write(cpu_env, var, tmp_mask);
288 tcg_temp_free_i32(tmp_mask);
290 /* Set NZCV flags from the high 4 bits of var. */
291 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
293 static void gen_exception_internal(int excp)
295 TCGv_i32 tcg_excp = tcg_const_i32(excp);
297 assert(excp_is_internal(excp));
298 gen_helper_exception_internal(cpu_env, tcg_excp);
299 tcg_temp_free_i32(tcg_excp);
302 static void gen_step_complete_exception(DisasContext *s)
304 /* We just completed step of an insn. Move from Active-not-pending
305 * to Active-pending, and then also take the swstep exception.
306 * This corresponds to making the (IMPDEF) choice to prioritize
307 * swstep exceptions over asynchronous exceptions taken to an exception
308 * level where debug is disabled. This choice has the advantage that
309 * we do not need to maintain internal state corresponding to the
310 * ISV/EX syndrome bits between completion of the step and generation
311 * of the exception, and our syndrome information is always correct.
314 gen_swstep_exception(s, 1, s->is_ldex);
315 s->base.is_jmp = DISAS_NORETURN;
318 static void gen_singlestep_exception(DisasContext *s)
320 /* Generate the right kind of exception for singlestep, which is
321 * either the architectural singlestep or EXCP_DEBUG for QEMU's
322 * gdb singlestepping.
325 gen_step_complete_exception(s);
327 gen_exception_internal(EXCP_DEBUG);
331 static inline bool is_singlestepping(DisasContext *s)
333 /* Return true if we are singlestepping either because of
334 * architectural singlestep or QEMU gdbstub singlestep. This does
335 * not include the command line '-singlestep' mode which is rather
336 * misnamed as it only means "one instruction per TB" and doesn't
337 * affect the code we generate.
339 return s->base.singlestep_enabled || s->ss_active;
342 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
344 TCGv_i32 tmp1 = tcg_temp_new_i32();
345 TCGv_i32 tmp2 = tcg_temp_new_i32();
346 tcg_gen_ext16s_i32(tmp1, a);
347 tcg_gen_ext16s_i32(tmp2, b);
348 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
349 tcg_temp_free_i32(tmp2);
350 tcg_gen_sari_i32(a, a, 16);
351 tcg_gen_sari_i32(b, b, 16);
352 tcg_gen_mul_i32(b, b, a);
353 tcg_gen_mov_i32(a, tmp1);
354 tcg_temp_free_i32(tmp1);
357 /* Byteswap each halfword. */
358 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
360 TCGv_i32 tmp = tcg_temp_new_i32();
361 TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
362 tcg_gen_shri_i32(tmp, var, 8);
363 tcg_gen_and_i32(tmp, tmp, mask);
364 tcg_gen_and_i32(var, var, mask);
365 tcg_gen_shli_i32(var, var, 8);
366 tcg_gen_or_i32(dest, var, tmp);
367 tcg_temp_free_i32(mask);
368 tcg_temp_free_i32(tmp);
371 /* Byteswap low halfword and sign extend. */
372 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
374 tcg_gen_ext16u_i32(var, var);
375 tcg_gen_bswap16_i32(var, var);
376 tcg_gen_ext16s_i32(dest, var);
379 /* 32x32->64 multiply. Marks inputs as dead. */
380 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
382 TCGv_i32 lo = tcg_temp_new_i32();
383 TCGv_i32 hi = tcg_temp_new_i32();
386 tcg_gen_mulu2_i32(lo, hi, a, b);
387 tcg_temp_free_i32(a);
388 tcg_temp_free_i32(b);
390 ret = tcg_temp_new_i64();
391 tcg_gen_concat_i32_i64(ret, lo, hi);
392 tcg_temp_free_i32(lo);
393 tcg_temp_free_i32(hi);
398 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
400 TCGv_i32 lo = tcg_temp_new_i32();
401 TCGv_i32 hi = tcg_temp_new_i32();
404 tcg_gen_muls2_i32(lo, hi, a, b);
405 tcg_temp_free_i32(a);
406 tcg_temp_free_i32(b);
408 ret = tcg_temp_new_i64();
409 tcg_gen_concat_i32_i64(ret, lo, hi);
410 tcg_temp_free_i32(lo);
411 tcg_temp_free_i32(hi);
416 /* Swap low and high halfwords. */
417 static void gen_swap_half(TCGv_i32 var)
419 tcg_gen_rotri_i32(var, var, 16);
422 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
423 tmp = (t0 ^ t1) & 0x8000;
426 t0 = (t0 + t1) ^ tmp;
429 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
431 TCGv_i32 tmp = tcg_temp_new_i32();
432 tcg_gen_xor_i32(tmp, t0, t1);
433 tcg_gen_andi_i32(tmp, tmp, 0x8000);
434 tcg_gen_andi_i32(t0, t0, ~0x8000);
435 tcg_gen_andi_i32(t1, t1, ~0x8000);
436 tcg_gen_add_i32(t0, t0, t1);
437 tcg_gen_xor_i32(dest, t0, tmp);
438 tcg_temp_free_i32(tmp);
441 /* Set N and Z flags from var. */
442 static inline void gen_logic_CC(TCGv_i32 var)
444 tcg_gen_mov_i32(cpu_NF, var);
445 tcg_gen_mov_i32(cpu_ZF, var);
448 /* dest = T0 + T1 + CF. */
449 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
451 tcg_gen_add_i32(dest, t0, t1);
452 tcg_gen_add_i32(dest, dest, cpu_CF);
455 /* dest = T0 - T1 + CF - 1. */
456 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
458 tcg_gen_sub_i32(dest, t0, t1);
459 tcg_gen_add_i32(dest, dest, cpu_CF);
460 tcg_gen_subi_i32(dest, dest, 1);
463 /* dest = T0 + T1. Compute C, N, V and Z flags */
464 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
466 TCGv_i32 tmp = tcg_temp_new_i32();
467 tcg_gen_movi_i32(tmp, 0);
468 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
469 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
470 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
471 tcg_gen_xor_i32(tmp, t0, t1);
472 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
473 tcg_temp_free_i32(tmp);
474 tcg_gen_mov_i32(dest, cpu_NF);
477 /* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
478 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
480 TCGv_i32 tmp = tcg_temp_new_i32();
481 if (TCG_TARGET_HAS_add2_i32) {
482 tcg_gen_movi_i32(tmp, 0);
483 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
484 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
486 TCGv_i64 q0 = tcg_temp_new_i64();
487 TCGv_i64 q1 = tcg_temp_new_i64();
488 tcg_gen_extu_i32_i64(q0, t0);
489 tcg_gen_extu_i32_i64(q1, t1);
490 tcg_gen_add_i64(q0, q0, q1);
491 tcg_gen_extu_i32_i64(q1, cpu_CF);
492 tcg_gen_add_i64(q0, q0, q1);
493 tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
494 tcg_temp_free_i64(q0);
495 tcg_temp_free_i64(q1);
497 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
498 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
499 tcg_gen_xor_i32(tmp, t0, t1);
500 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
501 tcg_temp_free_i32(tmp);
502 tcg_gen_mov_i32(dest, cpu_NF);
505 /* dest = T0 - T1. Compute C, N, V and Z flags */
506 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
509 tcg_gen_sub_i32(cpu_NF, t0, t1);
510 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
511 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
512 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
513 tmp = tcg_temp_new_i32();
514 tcg_gen_xor_i32(tmp, t0, t1);
515 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
516 tcg_temp_free_i32(tmp);
517 tcg_gen_mov_i32(dest, cpu_NF);
520 /* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
521 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
523 TCGv_i32 tmp = tcg_temp_new_i32();
524 tcg_gen_not_i32(tmp, t1);
525 gen_adc_CC(dest, t0, tmp);
526 tcg_temp_free_i32(tmp);
529 #define GEN_SHIFT(name) \
530 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
532 TCGv_i32 tmp1, tmp2, tmp3; \
533 tmp1 = tcg_temp_new_i32(); \
534 tcg_gen_andi_i32(tmp1, t1, 0xff); \
535 tmp2 = tcg_const_i32(0); \
536 tmp3 = tcg_const_i32(0x1f); \
537 tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
538 tcg_temp_free_i32(tmp3); \
539 tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
540 tcg_gen_##name##_i32(dest, tmp2, tmp1); \
541 tcg_temp_free_i32(tmp2); \
542 tcg_temp_free_i32(tmp1); \
548 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
551 tmp1 = tcg_temp_new_i32();
552 tcg_gen_andi_i32(tmp1, t1, 0xff);
553 tmp2 = tcg_const_i32(0x1f);
554 tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
555 tcg_temp_free_i32(tmp2);
556 tcg_gen_sar_i32(dest, t0, tmp1);
557 tcg_temp_free_i32(tmp1);
560 static void shifter_out_im(TCGv_i32 var, int shift)
562 tcg_gen_extract_i32(cpu_CF, var, shift, 1);
565 /* Shift by immediate. Includes special handling for shift == 0. */
566 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
567 int shift, int flags)
573 shifter_out_im(var, 32 - shift);
574 tcg_gen_shli_i32(var, var, shift);
580 tcg_gen_shri_i32(cpu_CF, var, 31);
582 tcg_gen_movi_i32(var, 0);
585 shifter_out_im(var, shift - 1);
586 tcg_gen_shri_i32(var, var, shift);
593 shifter_out_im(var, shift - 1);
596 tcg_gen_sari_i32(var, var, shift);
598 case 3: /* ROR/RRX */
601 shifter_out_im(var, shift - 1);
602 tcg_gen_rotri_i32(var, var, shift); break;
604 TCGv_i32 tmp = tcg_temp_new_i32();
605 tcg_gen_shli_i32(tmp, cpu_CF, 31);
607 shifter_out_im(var, 0);
608 tcg_gen_shri_i32(var, var, 1);
609 tcg_gen_or_i32(var, var, tmp);
610 tcg_temp_free_i32(tmp);
615 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
616 TCGv_i32 shift, int flags)
620 case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
621 case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
622 case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
623 case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
628 gen_shl(var, var, shift);
631 gen_shr(var, var, shift);
634 gen_sar(var, var, shift);
636 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
637 tcg_gen_rotr_i32(var, var, shift); break;
640 tcg_temp_free_i32(shift);
644 * Generate a conditional based on ARM condition code cc.
645 * This is common between ARM and Aarch64 targets.
647 void arm_test_cc(DisasCompare *cmp, int cc)
678 case 8: /* hi: C && !Z */
679 case 9: /* ls: !C || Z -> !(C && !Z) */
681 value = tcg_temp_new_i32();
683 /* CF is 1 for C, so -CF is an all-bits-set mask for C;
684 ZF is non-zero for !Z; so AND the two subexpressions. */
685 tcg_gen_neg_i32(value, cpu_CF);
686 tcg_gen_and_i32(value, value, cpu_ZF);
689 case 10: /* ge: N == V -> N ^ V == 0 */
690 case 11: /* lt: N != V -> N ^ V != 0 */
691 /* Since we're only interested in the sign bit, == 0 is >= 0. */
693 value = tcg_temp_new_i32();
695 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
698 case 12: /* gt: !Z && N == V */
699 case 13: /* le: Z || N != V */
701 value = tcg_temp_new_i32();
703 /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
704 * the sign bit then AND with ZF to yield the result. */
705 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
706 tcg_gen_sari_i32(value, value, 31);
707 tcg_gen_andc_i32(value, cpu_ZF, value);
710 case 14: /* always */
711 case 15: /* always */
712 /* Use the ALWAYS condition, which will fold early.
713 * It doesn't matter what we use for the value. */
714 cond = TCG_COND_ALWAYS;
719 fprintf(stderr, "Bad condition code 0x%x\n", cc);
724 cond = tcg_invert_cond(cond);
730 cmp->value_global = global;
733 void arm_free_cc(DisasCompare *cmp)
735 if (!cmp->value_global) {
736 tcg_temp_free_i32(cmp->value);
740 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
742 tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
745 void arm_gen_test_cc(int cc, TCGLabel *label)
748 arm_test_cc(&cmp, cc);
749 arm_jump_cc(&cmp, label);
753 static inline void gen_set_condexec(DisasContext *s)
755 if (s->condexec_mask) {
756 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
757 TCGv_i32 tmp = tcg_temp_new_i32();
758 tcg_gen_movi_i32(tmp, val);
759 store_cpu_field(tmp, condexec_bits);
763 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
765 tcg_gen_movi_i32(cpu_R[15], val);
768 /* Set PC and Thumb state from an immediate address. */
769 static inline void gen_bx_im(DisasContext *s, uint32_t addr)
773 s->base.is_jmp = DISAS_JUMP;
774 if (s->thumb != (addr & 1)) {
775 tmp = tcg_temp_new_i32();
776 tcg_gen_movi_i32(tmp, addr & 1);
777 tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
778 tcg_temp_free_i32(tmp);
780 tcg_gen_movi_i32(cpu_R[15], addr & ~1);
783 /* Set PC and Thumb state from var. var is marked as dead. */
784 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
786 s->base.is_jmp = DISAS_JUMP;
787 tcg_gen_andi_i32(cpu_R[15], var, ~1);
788 tcg_gen_andi_i32(var, var, 1);
789 store_cpu_field(var, thumb);
793 * Set PC and Thumb state from var. var is marked as dead.
794 * For M-profile CPUs, include logic to detect exception-return
795 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
796 * and BX reg, and no others, and happens only for code in Handler mode.
797 * The Security Extension also requires us to check for the FNC_RETURN
798 * which signals a function return from non-secure state; this can happen
799 * in both Handler and Thread mode.
800 * To avoid having to do multiple comparisons in inline generated code,
801 * we make the check we do here loose, so it will match for EXC_RETURN
802 * in Thread mode. For system emulation do_v7m_exception_exit() checks
803 * for these spurious cases and returns without doing anything (giving
804 * the same behaviour as for a branch to a non-magic address).
806 * In linux-user mode it is unclear what the right behaviour for an
807 * attempted FNC_RETURN should be, because in real hardware this will go
808 * directly to Secure code (ie not the Linux kernel) which will then treat
809 * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
810 * attempt behave the way it would on a CPU without the security extension,
811 * which is to say "like a normal branch". That means we can simply treat
812 * all branches as normal with no magic address behaviour.
814 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
816 /* Generate the same code here as for a simple bx, but flag via
817 * s->base.is_jmp that we need to do the rest of the work later.
820 #ifndef CONFIG_USER_ONLY
821 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
822 (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
823 s->base.is_jmp = DISAS_BX_EXCRET;
828 static inline void gen_bx_excret_final_code(DisasContext *s)
830 /* Generate the code to finish possible exception return and end the TB */
831 TCGLabel *excret_label = gen_new_label();
834 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
835 /* Covers FNC_RETURN and EXC_RETURN magic */
836 min_magic = FNC_RETURN_MIN_MAGIC;
838 /* EXC_RETURN magic only */
839 min_magic = EXC_RETURN_MIN_MAGIC;
842 /* Is the new PC value in the magic range indicating exception return? */
843 tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
844 /* No: end the TB as we would for a DISAS_JMP */
845 if (is_singlestepping(s)) {
846 gen_singlestep_exception(s);
848 tcg_gen_exit_tb(NULL, 0);
850 gen_set_label(excret_label);
851 /* Yes: this is an exception return.
852 * At this point in runtime env->regs[15] and env->thumb will hold
853 * the exception-return magic number, which do_v7m_exception_exit()
854 * will read. Nothing else will be able to see those values because
855 * the cpu-exec main loop guarantees that we will always go straight
856 * from raising the exception to the exception-handling code.
858 * gen_ss_advance(s) does nothing on M profile currently but
859 * calling it is conceptually the right thing as we have executed
860 * this instruction (compare SWI, HVC, SMC handling).
863 gen_exception_internal(EXCP_EXCEPTION_EXIT);
866 static inline void gen_bxns(DisasContext *s, int rm)
868 TCGv_i32 var = load_reg(s, rm);
870 /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
871 * we need to sync state before calling it, but:
872 * - we don't need to do gen_set_pc_im() because the bxns helper will
873 * always set the PC itself
874 * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
875 * unless it's outside an IT block or the last insn in an IT block,
876 * so we know that condexec == 0 (already set at the top of the TB)
877 * is correct in the non-UNPREDICTABLE cases, and we can choose
878 * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
880 gen_helper_v7m_bxns(cpu_env, var);
881 tcg_temp_free_i32(var);
882 s->base.is_jmp = DISAS_EXIT;
885 static inline void gen_blxns(DisasContext *s, int rm)
887 TCGv_i32 var = load_reg(s, rm);
889 /* We don't need to sync condexec state, for the same reason as bxns.
890 * We do however need to set the PC, because the blxns helper reads it.
891 * The blxns helper may throw an exception.
893 gen_set_pc_im(s, s->base.pc_next);
894 gen_helper_v7m_blxns(cpu_env, var);
895 tcg_temp_free_i32(var);
896 s->base.is_jmp = DISAS_EXIT;
899 /* Variant of store_reg which uses branch&exchange logic when storing
900 to r15 in ARM architecture v7 and above. The source must be a temporary
901 and will be marked as dead. */
902 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
904 if (reg == 15 && ENABLE_ARCH_7) {
907 store_reg(s, reg, var);
911 /* Variant of store_reg which uses branch&exchange logic when storing
912 * to r15 in ARM architecture v5T and above. This is used for storing
913 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
914 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
915 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
917 if (reg == 15 && ENABLE_ARCH_5) {
918 gen_bx_excret(s, var);
920 store_reg(s, reg, var);
924 #ifdef CONFIG_USER_ONLY
925 #define IS_USER_ONLY 1
927 #define IS_USER_ONLY 0
930 /* Abstractions of "generate code to do a guest load/store for
931 * AArch32", where a vaddr is always 32 bits (and is zero
932 * extended if we're a 64 bit core) and data is also
933 * 32 bits unless specifically doing a 64 bit access.
934 * These functions work like tcg_gen_qemu_{ld,st}* except
935 * that the address argument is TCGv_i32 rather than TCGv.
938 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
940 TCGv addr = tcg_temp_new();
941 tcg_gen_extu_i32_tl(addr, a32);
943 /* Not needed for user-mode BE32, where we use MO_BE instead. */
944 if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
945 tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
950 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
951 int index, MemOp opc)
955 if (arm_dc_feature(s, ARM_FEATURE_M) &&
956 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
960 addr = gen_aa32_addr(s, a32, opc);
961 tcg_gen_qemu_ld_i32(val, addr, index, opc);
965 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
966 int index, MemOp opc)
970 if (arm_dc_feature(s, ARM_FEATURE_M) &&
971 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
975 addr = gen_aa32_addr(s, a32, opc);
976 tcg_gen_qemu_st_i32(val, addr, index, opc);
980 #define DO_GEN_LD(SUFF, OPC) \
981 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
982 TCGv_i32 a32, int index) \
984 gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
987 #define DO_GEN_ST(SUFF, OPC) \
988 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
989 TCGv_i32 a32, int index) \
991 gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
994 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
996 /* Not needed for user-mode BE32, where we use MO_BE instead. */
997 if (!IS_USER_ONLY && s->sctlr_b) {
998 tcg_gen_rotri_i64(val, val, 32);
1002 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1003 int index, MemOp opc)
1005 TCGv addr = gen_aa32_addr(s, a32, opc);
1006 tcg_gen_qemu_ld_i64(val, addr, index, opc);
1007 gen_aa32_frob64(s, val);
1008 tcg_temp_free(addr);
1011 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
1012 TCGv_i32 a32, int index)
1014 gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1017 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1018 int index, MemOp opc)
1020 TCGv addr = gen_aa32_addr(s, a32, opc);
1022 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1023 if (!IS_USER_ONLY && s->sctlr_b) {
1024 TCGv_i64 tmp = tcg_temp_new_i64();
1025 tcg_gen_rotri_i64(tmp, val, 32);
1026 tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1027 tcg_temp_free_i64(tmp);
1029 tcg_gen_qemu_st_i64(val, addr, index, opc);
1031 tcg_temp_free(addr);
1034 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1035 TCGv_i32 a32, int index)
1037 gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1040 DO_GEN_LD(8u, MO_UB)
1041 DO_GEN_LD(16u, MO_UW)
1042 DO_GEN_LD(32u, MO_UL)
1044 DO_GEN_ST(16, MO_UW)
1045 DO_GEN_ST(32, MO_UL)
1047 static inline void gen_hvc(DisasContext *s, int imm16)
1049 /* The pre HVC helper handles cases when HVC gets trapped
1050 * as an undefined insn by runtime configuration (ie before
1051 * the insn really executes).
1053 gen_set_pc_im(s, s->pc_curr);
1054 gen_helper_pre_hvc(cpu_env);
1055 /* Otherwise we will treat this as a real exception which
1056 * happens after execution of the insn. (The distinction matters
1057 * for the PC value reported to the exception handler and also
1058 * for single stepping.)
1061 gen_set_pc_im(s, s->base.pc_next);
1062 s->base.is_jmp = DISAS_HVC;
1065 static inline void gen_smc(DisasContext *s)
1067 /* As with HVC, we may take an exception either before or after
1068 * the insn executes.
1072 gen_set_pc_im(s, s->pc_curr);
1073 tmp = tcg_const_i32(syn_aa32_smc());
1074 gen_helper_pre_smc(cpu_env, tmp);
1075 tcg_temp_free_i32(tmp);
1076 gen_set_pc_im(s, s->base.pc_next);
1077 s->base.is_jmp = DISAS_SMC;
1080 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1082 gen_set_condexec(s);
1083 gen_set_pc_im(s, pc);
1084 gen_exception_internal(excp);
1085 s->base.is_jmp = DISAS_NORETURN;
1088 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1089 int syn, uint32_t target_el)
1091 gen_set_condexec(s);
1092 gen_set_pc_im(s, pc);
1093 gen_exception(excp, syn, target_el);
1094 s->base.is_jmp = DISAS_NORETURN;
1097 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1101 gen_set_condexec(s);
1102 gen_set_pc_im(s, s->pc_curr);
1103 tcg_syn = tcg_const_i32(syn);
1104 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1105 tcg_temp_free_i32(tcg_syn);
1106 s->base.is_jmp = DISAS_NORETURN;
1109 static void unallocated_encoding(DisasContext *s)
1111 /* Unallocated and reserved encodings are uncategorized */
1112 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1113 default_exception_el(s));
1116 /* Force a TB lookup after an instruction that changes the CPU state. */
1117 static inline void gen_lookup_tb(DisasContext *s)
1119 tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1120 s->base.is_jmp = DISAS_EXIT;
1123 static inline void gen_hlt(DisasContext *s, int imm)
1125 /* HLT. This has two purposes.
1126 * Architecturally, it is an external halting debug instruction.
1127 * Since QEMU doesn't implement external debug, we treat this as
1128 * it is required for halting debug disabled: it will UNDEF.
1129 * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1130 * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1131 * must trigger semihosting even for ARMv7 and earlier, where
1132 * HLT was an undefined encoding.
1133 * In system mode, we don't allow userspace access to
1134 * semihosting, to provide some semblance of security
1135 * (and for consistency with our 32-bit semihosting).
1137 if (semihosting_enabled() &&
1138 #ifndef CONFIG_USER_ONLY
1139 s->current_el != 0 &&
1141 (imm == (s->thumb ? 0x3c : 0xf000))) {
1142 gen_exception_internal_insn(s, s->base.pc_next, EXCP_SEMIHOST);
1146 unallocated_encoding(s);
1149 static TCGv_ptr get_fpstatus_ptr(int neon)
1151 TCGv_ptr statusptr = tcg_temp_new_ptr();
1154 offset = offsetof(CPUARMState, vfp.standard_fp_status);
1156 offset = offsetof(CPUARMState, vfp.fp_status);
1158 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1162 static inline long vfp_reg_offset(bool dp, unsigned reg)
1165 return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1167 long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1169 ofs += offsetof(CPU_DoubleU, l.upper);
1171 ofs += offsetof(CPU_DoubleU, l.lower);
1177 /* Return the offset of a 32-bit piece of a NEON register.
1178 zero is the least significant end of the register. */
1180 neon_reg_offset (int reg, int n)
1184 return vfp_reg_offset(0, sreg);
1187 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1188 * where 0 is the least significant end of the register.
1191 neon_element_offset(int reg, int element, MemOp size)
1193 int element_size = 1 << size;
1194 int ofs = element * element_size;
1195 #ifdef HOST_WORDS_BIGENDIAN
1196 /* Calculate the offset assuming fully little-endian,
1197 * then XOR to account for the order of the 8-byte units.
1199 if (element_size < 8) {
1200 ofs ^= 8 - element_size;
1203 return neon_reg_offset(reg, 0) + ofs;
1206 static TCGv_i32 neon_load_reg(int reg, int pass)
1208 TCGv_i32 tmp = tcg_temp_new_i32();
1209 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1213 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1215 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1219 tcg_gen_ld8u_i32(var, cpu_env, offset);
1222 tcg_gen_ld16u_i32(var, cpu_env, offset);
1225 tcg_gen_ld_i32(var, cpu_env, offset);
1228 g_assert_not_reached();
1232 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1234 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1238 tcg_gen_ld8u_i64(var, cpu_env, offset);
1241 tcg_gen_ld16u_i64(var, cpu_env, offset);
1244 tcg_gen_ld32u_i64(var, cpu_env, offset);
1247 tcg_gen_ld_i64(var, cpu_env, offset);
1250 g_assert_not_reached();
1254 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1256 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1257 tcg_temp_free_i32(var);
1260 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1262 long offset = neon_element_offset(reg, ele, size);
1266 tcg_gen_st8_i32(var, cpu_env, offset);
1269 tcg_gen_st16_i32(var, cpu_env, offset);
1272 tcg_gen_st_i32(var, cpu_env, offset);
1275 g_assert_not_reached();
1279 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1281 long offset = neon_element_offset(reg, ele, size);
1285 tcg_gen_st8_i64(var, cpu_env, offset);
1288 tcg_gen_st16_i64(var, cpu_env, offset);
1291 tcg_gen_st32_i64(var, cpu_env, offset);
1294 tcg_gen_st_i64(var, cpu_env, offset);
1297 g_assert_not_reached();
1301 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1303 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1306 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1308 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1311 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1313 tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1316 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1318 tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1321 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1323 TCGv_ptr ret = tcg_temp_new_ptr();
1324 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1328 #define ARM_CP_RW_BIT (1 << 20)
1330 /* Include the VFP decoder */
1331 #include "translate-vfp.inc.c"
1333 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1335 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1338 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1340 tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1343 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1345 TCGv_i32 var = tcg_temp_new_i32();
1346 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1350 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1352 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1353 tcg_temp_free_i32(var);
1356 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1358 iwmmxt_store_reg(cpu_M0, rn);
1361 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1363 iwmmxt_load_reg(cpu_M0, rn);
1366 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1368 iwmmxt_load_reg(cpu_V1, rn);
1369 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1372 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1374 iwmmxt_load_reg(cpu_V1, rn);
1375 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1378 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1380 iwmmxt_load_reg(cpu_V1, rn);
1381 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1384 #define IWMMXT_OP(name) \
1385 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1387 iwmmxt_load_reg(cpu_V1, rn); \
1388 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1391 #define IWMMXT_OP_ENV(name) \
1392 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1394 iwmmxt_load_reg(cpu_V1, rn); \
1395 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1398 #define IWMMXT_OP_ENV_SIZE(name) \
1399 IWMMXT_OP_ENV(name##b) \
1400 IWMMXT_OP_ENV(name##w) \
1401 IWMMXT_OP_ENV(name##l)
1403 #define IWMMXT_OP_ENV1(name) \
1404 static inline void gen_op_iwmmxt_##name##_M0(void) \
1406 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1420 IWMMXT_OP_ENV_SIZE(unpackl)
1421 IWMMXT_OP_ENV_SIZE(unpackh)
1423 IWMMXT_OP_ENV1(unpacklub)
1424 IWMMXT_OP_ENV1(unpackluw)
1425 IWMMXT_OP_ENV1(unpacklul)
1426 IWMMXT_OP_ENV1(unpackhub)
1427 IWMMXT_OP_ENV1(unpackhuw)
1428 IWMMXT_OP_ENV1(unpackhul)
1429 IWMMXT_OP_ENV1(unpacklsb)
1430 IWMMXT_OP_ENV1(unpacklsw)
1431 IWMMXT_OP_ENV1(unpacklsl)
1432 IWMMXT_OP_ENV1(unpackhsb)
1433 IWMMXT_OP_ENV1(unpackhsw)
1434 IWMMXT_OP_ENV1(unpackhsl)
1436 IWMMXT_OP_ENV_SIZE(cmpeq)
1437 IWMMXT_OP_ENV_SIZE(cmpgtu)
1438 IWMMXT_OP_ENV_SIZE(cmpgts)
1440 IWMMXT_OP_ENV_SIZE(mins)
1441 IWMMXT_OP_ENV_SIZE(minu)
1442 IWMMXT_OP_ENV_SIZE(maxs)
1443 IWMMXT_OP_ENV_SIZE(maxu)
1445 IWMMXT_OP_ENV_SIZE(subn)
1446 IWMMXT_OP_ENV_SIZE(addn)
1447 IWMMXT_OP_ENV_SIZE(subu)
1448 IWMMXT_OP_ENV_SIZE(addu)
1449 IWMMXT_OP_ENV_SIZE(subs)
1450 IWMMXT_OP_ENV_SIZE(adds)
1452 IWMMXT_OP_ENV(avgb0)
1453 IWMMXT_OP_ENV(avgb1)
1454 IWMMXT_OP_ENV(avgw0)
1455 IWMMXT_OP_ENV(avgw1)
1457 IWMMXT_OP_ENV(packuw)
1458 IWMMXT_OP_ENV(packul)
1459 IWMMXT_OP_ENV(packuq)
1460 IWMMXT_OP_ENV(packsw)
1461 IWMMXT_OP_ENV(packsl)
1462 IWMMXT_OP_ENV(packsq)
1464 static void gen_op_iwmmxt_set_mup(void)
1467 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1468 tcg_gen_ori_i32(tmp, tmp, 2);
1469 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1472 static void gen_op_iwmmxt_set_cup(void)
1475 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1476 tcg_gen_ori_i32(tmp, tmp, 1);
1477 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1480 static void gen_op_iwmmxt_setpsr_nz(void)
1482 TCGv_i32 tmp = tcg_temp_new_i32();
1483 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1484 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1487 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1489 iwmmxt_load_reg(cpu_V1, rn);
1490 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1491 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1494 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1501 rd = (insn >> 16) & 0xf;
1502 tmp = load_reg(s, rd);
1504 offset = (insn & 0xff) << ((insn >> 7) & 2);
1505 if (insn & (1 << 24)) {
1507 if (insn & (1 << 23))
1508 tcg_gen_addi_i32(tmp, tmp, offset);
1510 tcg_gen_addi_i32(tmp, tmp, -offset);
1511 tcg_gen_mov_i32(dest, tmp);
1512 if (insn & (1 << 21))
1513 store_reg(s, rd, tmp);
1515 tcg_temp_free_i32(tmp);
1516 } else if (insn & (1 << 21)) {
1518 tcg_gen_mov_i32(dest, tmp);
1519 if (insn & (1 << 23))
1520 tcg_gen_addi_i32(tmp, tmp, offset);
1522 tcg_gen_addi_i32(tmp, tmp, -offset);
1523 store_reg(s, rd, tmp);
1524 } else if (!(insn & (1 << 23)))
1529 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1531 int rd = (insn >> 0) & 0xf;
1534 if (insn & (1 << 8)) {
1535 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1538 tmp = iwmmxt_load_creg(rd);
1541 tmp = tcg_temp_new_i32();
1542 iwmmxt_load_reg(cpu_V0, rd);
1543 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1545 tcg_gen_andi_i32(tmp, tmp, mask);
1546 tcg_gen_mov_i32(dest, tmp);
1547 tcg_temp_free_i32(tmp);
1551 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1552 (ie. an undefined instruction). */
1553 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1556 int rdhi, rdlo, rd0, rd1, i;
1558 TCGv_i32 tmp, tmp2, tmp3;
1560 if ((insn & 0x0e000e00) == 0x0c000000) {
1561 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1563 rdlo = (insn >> 12) & 0xf;
1564 rdhi = (insn >> 16) & 0xf;
1565 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1566 iwmmxt_load_reg(cpu_V0, wrd);
1567 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1568 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1569 } else { /* TMCRR */
1570 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1571 iwmmxt_store_reg(cpu_V0, wrd);
1572 gen_op_iwmmxt_set_mup();
1577 wrd = (insn >> 12) & 0xf;
1578 addr = tcg_temp_new_i32();
1579 if (gen_iwmmxt_address(s, insn, addr)) {
1580 tcg_temp_free_i32(addr);
1583 if (insn & ARM_CP_RW_BIT) {
1584 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1585 tmp = tcg_temp_new_i32();
1586 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1587 iwmmxt_store_creg(wrd, tmp);
1590 if (insn & (1 << 8)) {
1591 if (insn & (1 << 22)) { /* WLDRD */
1592 gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1594 } else { /* WLDRW wRd */
1595 tmp = tcg_temp_new_i32();
1596 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1599 tmp = tcg_temp_new_i32();
1600 if (insn & (1 << 22)) { /* WLDRH */
1601 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1602 } else { /* WLDRB */
1603 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1607 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1608 tcg_temp_free_i32(tmp);
1610 gen_op_iwmmxt_movq_wRn_M0(wrd);
1613 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1614 tmp = iwmmxt_load_creg(wrd);
1615 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1617 gen_op_iwmmxt_movq_M0_wRn(wrd);
1618 tmp = tcg_temp_new_i32();
1619 if (insn & (1 << 8)) {
1620 if (insn & (1 << 22)) { /* WSTRD */
1621 gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1622 } else { /* WSTRW wRd */
1623 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1624 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1627 if (insn & (1 << 22)) { /* WSTRH */
1628 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1629 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1630 } else { /* WSTRB */
1631 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1632 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1636 tcg_temp_free_i32(tmp);
1638 tcg_temp_free_i32(addr);
1642 if ((insn & 0x0f000000) != 0x0e000000)
1645 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1646 case 0x000: /* WOR */
1647 wrd = (insn >> 12) & 0xf;
1648 rd0 = (insn >> 0) & 0xf;
1649 rd1 = (insn >> 16) & 0xf;
1650 gen_op_iwmmxt_movq_M0_wRn(rd0);
1651 gen_op_iwmmxt_orq_M0_wRn(rd1);
1652 gen_op_iwmmxt_setpsr_nz();
1653 gen_op_iwmmxt_movq_wRn_M0(wrd);
1654 gen_op_iwmmxt_set_mup();
1655 gen_op_iwmmxt_set_cup();
1657 case 0x011: /* TMCR */
1660 rd = (insn >> 12) & 0xf;
1661 wrd = (insn >> 16) & 0xf;
1663 case ARM_IWMMXT_wCID:
1664 case ARM_IWMMXT_wCASF:
1666 case ARM_IWMMXT_wCon:
1667 gen_op_iwmmxt_set_cup();
1669 case ARM_IWMMXT_wCSSF:
1670 tmp = iwmmxt_load_creg(wrd);
1671 tmp2 = load_reg(s, rd);
1672 tcg_gen_andc_i32(tmp, tmp, tmp2);
1673 tcg_temp_free_i32(tmp2);
1674 iwmmxt_store_creg(wrd, tmp);
1676 case ARM_IWMMXT_wCGR0:
1677 case ARM_IWMMXT_wCGR1:
1678 case ARM_IWMMXT_wCGR2:
1679 case ARM_IWMMXT_wCGR3:
1680 gen_op_iwmmxt_set_cup();
1681 tmp = load_reg(s, rd);
1682 iwmmxt_store_creg(wrd, tmp);
1688 case 0x100: /* WXOR */
1689 wrd = (insn >> 12) & 0xf;
1690 rd0 = (insn >> 0) & 0xf;
1691 rd1 = (insn >> 16) & 0xf;
1692 gen_op_iwmmxt_movq_M0_wRn(rd0);
1693 gen_op_iwmmxt_xorq_M0_wRn(rd1);
1694 gen_op_iwmmxt_setpsr_nz();
1695 gen_op_iwmmxt_movq_wRn_M0(wrd);
1696 gen_op_iwmmxt_set_mup();
1697 gen_op_iwmmxt_set_cup();
1699 case 0x111: /* TMRC */
1702 rd = (insn >> 12) & 0xf;
1703 wrd = (insn >> 16) & 0xf;
1704 tmp = iwmmxt_load_creg(wrd);
1705 store_reg(s, rd, tmp);
1707 case 0x300: /* WANDN */
1708 wrd = (insn >> 12) & 0xf;
1709 rd0 = (insn >> 0) & 0xf;
1710 rd1 = (insn >> 16) & 0xf;
1711 gen_op_iwmmxt_movq_M0_wRn(rd0);
1712 tcg_gen_neg_i64(cpu_M0, cpu_M0);
1713 gen_op_iwmmxt_andq_M0_wRn(rd1);
1714 gen_op_iwmmxt_setpsr_nz();
1715 gen_op_iwmmxt_movq_wRn_M0(wrd);
1716 gen_op_iwmmxt_set_mup();
1717 gen_op_iwmmxt_set_cup();
1719 case 0x200: /* WAND */
1720 wrd = (insn >> 12) & 0xf;
1721 rd0 = (insn >> 0) & 0xf;
1722 rd1 = (insn >> 16) & 0xf;
1723 gen_op_iwmmxt_movq_M0_wRn(rd0);
1724 gen_op_iwmmxt_andq_M0_wRn(rd1);
1725 gen_op_iwmmxt_setpsr_nz();
1726 gen_op_iwmmxt_movq_wRn_M0(wrd);
1727 gen_op_iwmmxt_set_mup();
1728 gen_op_iwmmxt_set_cup();
1730 case 0x810: case 0xa10: /* WMADD */
1731 wrd = (insn >> 12) & 0xf;
1732 rd0 = (insn >> 0) & 0xf;
1733 rd1 = (insn >> 16) & 0xf;
1734 gen_op_iwmmxt_movq_M0_wRn(rd0);
1735 if (insn & (1 << 21))
1736 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1738 gen_op_iwmmxt_madduq_M0_wRn(rd1);
1739 gen_op_iwmmxt_movq_wRn_M0(wrd);
1740 gen_op_iwmmxt_set_mup();
1742 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
1743 wrd = (insn >> 12) & 0xf;
1744 rd0 = (insn >> 16) & 0xf;
1745 rd1 = (insn >> 0) & 0xf;
1746 gen_op_iwmmxt_movq_M0_wRn(rd0);
1747 switch ((insn >> 22) & 3) {
1749 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1752 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1755 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1760 gen_op_iwmmxt_movq_wRn_M0(wrd);
1761 gen_op_iwmmxt_set_mup();
1762 gen_op_iwmmxt_set_cup();
1764 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
1765 wrd = (insn >> 12) & 0xf;
1766 rd0 = (insn >> 16) & 0xf;
1767 rd1 = (insn >> 0) & 0xf;
1768 gen_op_iwmmxt_movq_M0_wRn(rd0);
1769 switch ((insn >> 22) & 3) {
1771 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1774 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1777 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1782 gen_op_iwmmxt_movq_wRn_M0(wrd);
1783 gen_op_iwmmxt_set_mup();
1784 gen_op_iwmmxt_set_cup();
1786 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
1787 wrd = (insn >> 12) & 0xf;
1788 rd0 = (insn >> 16) & 0xf;
1789 rd1 = (insn >> 0) & 0xf;
1790 gen_op_iwmmxt_movq_M0_wRn(rd0);
1791 if (insn & (1 << 22))
1792 gen_op_iwmmxt_sadw_M0_wRn(rd1);
1794 gen_op_iwmmxt_sadb_M0_wRn(rd1);
1795 if (!(insn & (1 << 20)))
1796 gen_op_iwmmxt_addl_M0_wRn(wrd);
1797 gen_op_iwmmxt_movq_wRn_M0(wrd);
1798 gen_op_iwmmxt_set_mup();
1800 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
1801 wrd = (insn >> 12) & 0xf;
1802 rd0 = (insn >> 16) & 0xf;
1803 rd1 = (insn >> 0) & 0xf;
1804 gen_op_iwmmxt_movq_M0_wRn(rd0);
1805 if (insn & (1 << 21)) {
1806 if (insn & (1 << 20))
1807 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1809 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1811 if (insn & (1 << 20))
1812 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1814 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1816 gen_op_iwmmxt_movq_wRn_M0(wrd);
1817 gen_op_iwmmxt_set_mup();
1819 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
1820 wrd = (insn >> 12) & 0xf;
1821 rd0 = (insn >> 16) & 0xf;
1822 rd1 = (insn >> 0) & 0xf;
1823 gen_op_iwmmxt_movq_M0_wRn(rd0);
1824 if (insn & (1 << 21))
1825 gen_op_iwmmxt_macsw_M0_wRn(rd1);
1827 gen_op_iwmmxt_macuw_M0_wRn(rd1);
1828 if (!(insn & (1 << 20))) {
1829 iwmmxt_load_reg(cpu_V1, wrd);
1830 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1832 gen_op_iwmmxt_movq_wRn_M0(wrd);
1833 gen_op_iwmmxt_set_mup();
1835 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
1836 wrd = (insn >> 12) & 0xf;
1837 rd0 = (insn >> 16) & 0xf;
1838 rd1 = (insn >> 0) & 0xf;
1839 gen_op_iwmmxt_movq_M0_wRn(rd0);
1840 switch ((insn >> 22) & 3) {
1842 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1845 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1848 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1853 gen_op_iwmmxt_movq_wRn_M0(wrd);
1854 gen_op_iwmmxt_set_mup();
1855 gen_op_iwmmxt_set_cup();
1857 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
1858 wrd = (insn >> 12) & 0xf;
1859 rd0 = (insn >> 16) & 0xf;
1860 rd1 = (insn >> 0) & 0xf;
1861 gen_op_iwmmxt_movq_M0_wRn(rd0);
1862 if (insn & (1 << 22)) {
1863 if (insn & (1 << 20))
1864 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1866 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1868 if (insn & (1 << 20))
1869 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1871 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1873 gen_op_iwmmxt_movq_wRn_M0(wrd);
1874 gen_op_iwmmxt_set_mup();
1875 gen_op_iwmmxt_set_cup();
1877 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
1878 wrd = (insn >> 12) & 0xf;
1879 rd0 = (insn >> 16) & 0xf;
1880 rd1 = (insn >> 0) & 0xf;
1881 gen_op_iwmmxt_movq_M0_wRn(rd0);
1882 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1883 tcg_gen_andi_i32(tmp, tmp, 7);
1884 iwmmxt_load_reg(cpu_V1, rd1);
1885 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1886 tcg_temp_free_i32(tmp);
1887 gen_op_iwmmxt_movq_wRn_M0(wrd);
1888 gen_op_iwmmxt_set_mup();
1890 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
1891 if (((insn >> 6) & 3) == 3)
1893 rd = (insn >> 12) & 0xf;
1894 wrd = (insn >> 16) & 0xf;
1895 tmp = load_reg(s, rd);
1896 gen_op_iwmmxt_movq_M0_wRn(wrd);
1897 switch ((insn >> 6) & 3) {
1899 tmp2 = tcg_const_i32(0xff);
1900 tmp3 = tcg_const_i32((insn & 7) << 3);
1903 tmp2 = tcg_const_i32(0xffff);
1904 tmp3 = tcg_const_i32((insn & 3) << 4);
1907 tmp2 = tcg_const_i32(0xffffffff);
1908 tmp3 = tcg_const_i32((insn & 1) << 5);
1914 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1915 tcg_temp_free_i32(tmp3);
1916 tcg_temp_free_i32(tmp2);
1917 tcg_temp_free_i32(tmp);
1918 gen_op_iwmmxt_movq_wRn_M0(wrd);
1919 gen_op_iwmmxt_set_mup();
1921 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
1922 rd = (insn >> 12) & 0xf;
1923 wrd = (insn >> 16) & 0xf;
1924 if (rd == 15 || ((insn >> 22) & 3) == 3)
1926 gen_op_iwmmxt_movq_M0_wRn(wrd);
1927 tmp = tcg_temp_new_i32();
1928 switch ((insn >> 22) & 3) {
1930 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1931 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1933 tcg_gen_ext8s_i32(tmp, tmp);
1935 tcg_gen_andi_i32(tmp, tmp, 0xff);
1939 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1940 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1942 tcg_gen_ext16s_i32(tmp, tmp);
1944 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1948 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1949 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1952 store_reg(s, rd, tmp);
1954 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
1955 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1957 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1958 switch ((insn >> 22) & 3) {
1960 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1963 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1966 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1969 tcg_gen_shli_i32(tmp, tmp, 28);
1971 tcg_temp_free_i32(tmp);
1973 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
1974 if (((insn >> 6) & 3) == 3)
1976 rd = (insn >> 12) & 0xf;
1977 wrd = (insn >> 16) & 0xf;
1978 tmp = load_reg(s, rd);
1979 switch ((insn >> 6) & 3) {
1981 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1984 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1987 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1990 tcg_temp_free_i32(tmp);
1991 gen_op_iwmmxt_movq_wRn_M0(wrd);
1992 gen_op_iwmmxt_set_mup();
1994 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
1995 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1997 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1998 tmp2 = tcg_temp_new_i32();
1999 tcg_gen_mov_i32(tmp2, tmp);
2000 switch ((insn >> 22) & 3) {
2002 for (i = 0; i < 7; i ++) {
2003 tcg_gen_shli_i32(tmp2, tmp2, 4);
2004 tcg_gen_and_i32(tmp, tmp, tmp2);
2008 for (i = 0; i < 3; i ++) {
2009 tcg_gen_shli_i32(tmp2, tmp2, 8);
2010 tcg_gen_and_i32(tmp, tmp, tmp2);
2014 tcg_gen_shli_i32(tmp2, tmp2, 16);
2015 tcg_gen_and_i32(tmp, tmp, tmp2);
2019 tcg_temp_free_i32(tmp2);
2020 tcg_temp_free_i32(tmp);
2022 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
2023 wrd = (insn >> 12) & 0xf;
2024 rd0 = (insn >> 16) & 0xf;
2025 gen_op_iwmmxt_movq_M0_wRn(rd0);
2026 switch ((insn >> 22) & 3) {
2028 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2031 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2034 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2039 gen_op_iwmmxt_movq_wRn_M0(wrd);
2040 gen_op_iwmmxt_set_mup();
2042 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
2043 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2045 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2046 tmp2 = tcg_temp_new_i32();
2047 tcg_gen_mov_i32(tmp2, tmp);
2048 switch ((insn >> 22) & 3) {
2050 for (i = 0; i < 7; i ++) {
2051 tcg_gen_shli_i32(tmp2, tmp2, 4);
2052 tcg_gen_or_i32(tmp, tmp, tmp2);
2056 for (i = 0; i < 3; i ++) {
2057 tcg_gen_shli_i32(tmp2, tmp2, 8);
2058 tcg_gen_or_i32(tmp, tmp, tmp2);
2062 tcg_gen_shli_i32(tmp2, tmp2, 16);
2063 tcg_gen_or_i32(tmp, tmp, tmp2);
2067 tcg_temp_free_i32(tmp2);
2068 tcg_temp_free_i32(tmp);
2070 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
2071 rd = (insn >> 12) & 0xf;
2072 rd0 = (insn >> 16) & 0xf;
2073 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2075 gen_op_iwmmxt_movq_M0_wRn(rd0);
2076 tmp = tcg_temp_new_i32();
2077 switch ((insn >> 22) & 3) {
2079 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2082 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2085 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2088 store_reg(s, rd, tmp);
2090 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
2091 case 0x906: case 0xb06: case 0xd06: case 0xf06:
2092 wrd = (insn >> 12) & 0xf;
2093 rd0 = (insn >> 16) & 0xf;
2094 rd1 = (insn >> 0) & 0xf;
2095 gen_op_iwmmxt_movq_M0_wRn(rd0);
2096 switch ((insn >> 22) & 3) {
2098 if (insn & (1 << 21))
2099 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2101 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2104 if (insn & (1 << 21))
2105 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2107 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2110 if (insn & (1 << 21))
2111 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2113 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2118 gen_op_iwmmxt_movq_wRn_M0(wrd);
2119 gen_op_iwmmxt_set_mup();
2120 gen_op_iwmmxt_set_cup();
2122 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
2123 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2124 wrd = (insn >> 12) & 0xf;
2125 rd0 = (insn >> 16) & 0xf;
2126 gen_op_iwmmxt_movq_M0_wRn(rd0);
2127 switch ((insn >> 22) & 3) {
2129 if (insn & (1 << 21))
2130 gen_op_iwmmxt_unpacklsb_M0();
2132 gen_op_iwmmxt_unpacklub_M0();
2135 if (insn & (1 << 21))
2136 gen_op_iwmmxt_unpacklsw_M0();
2138 gen_op_iwmmxt_unpackluw_M0();
2141 if (insn & (1 << 21))
2142 gen_op_iwmmxt_unpacklsl_M0();
2144 gen_op_iwmmxt_unpacklul_M0();
2149 gen_op_iwmmxt_movq_wRn_M0(wrd);
2150 gen_op_iwmmxt_set_mup();
2151 gen_op_iwmmxt_set_cup();
2153 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
2154 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2155 wrd = (insn >> 12) & 0xf;
2156 rd0 = (insn >> 16) & 0xf;
2157 gen_op_iwmmxt_movq_M0_wRn(rd0);
2158 switch ((insn >> 22) & 3) {
2160 if (insn & (1 << 21))
2161 gen_op_iwmmxt_unpackhsb_M0();
2163 gen_op_iwmmxt_unpackhub_M0();
2166 if (insn & (1 << 21))
2167 gen_op_iwmmxt_unpackhsw_M0();
2169 gen_op_iwmmxt_unpackhuw_M0();
2172 if (insn & (1 << 21))
2173 gen_op_iwmmxt_unpackhsl_M0();
2175 gen_op_iwmmxt_unpackhul_M0();
2180 gen_op_iwmmxt_movq_wRn_M0(wrd);
2181 gen_op_iwmmxt_set_mup();
2182 gen_op_iwmmxt_set_cup();
2184 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2185 case 0x214: case 0x614: case 0xa14: case 0xe14:
2186 if (((insn >> 22) & 3) == 0)
2188 wrd = (insn >> 12) & 0xf;
2189 rd0 = (insn >> 16) & 0xf;
2190 gen_op_iwmmxt_movq_M0_wRn(rd0);
2191 tmp = tcg_temp_new_i32();
2192 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2193 tcg_temp_free_i32(tmp);
2196 switch ((insn >> 22) & 3) {
2198 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2201 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2204 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2207 tcg_temp_free_i32(tmp);
2208 gen_op_iwmmxt_movq_wRn_M0(wrd);
2209 gen_op_iwmmxt_set_mup();
2210 gen_op_iwmmxt_set_cup();
2212 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2213 case 0x014: case 0x414: case 0x814: case 0xc14:
2214 if (((insn >> 22) & 3) == 0)
2216 wrd = (insn >> 12) & 0xf;
2217 rd0 = (insn >> 16) & 0xf;
2218 gen_op_iwmmxt_movq_M0_wRn(rd0);
2219 tmp = tcg_temp_new_i32();
2220 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2221 tcg_temp_free_i32(tmp);
2224 switch ((insn >> 22) & 3) {
2226 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2229 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2232 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2235 tcg_temp_free_i32(tmp);
2236 gen_op_iwmmxt_movq_wRn_M0(wrd);
2237 gen_op_iwmmxt_set_mup();
2238 gen_op_iwmmxt_set_cup();
2240 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2241 case 0x114: case 0x514: case 0x914: case 0xd14:
2242 if (((insn >> 22) & 3) == 0)
2244 wrd = (insn >> 12) & 0xf;
2245 rd0 = (insn >> 16) & 0xf;
2246 gen_op_iwmmxt_movq_M0_wRn(rd0);
2247 tmp = tcg_temp_new_i32();
2248 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2249 tcg_temp_free_i32(tmp);
2252 switch ((insn >> 22) & 3) {
2254 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2257 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2260 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2263 tcg_temp_free_i32(tmp);
2264 gen_op_iwmmxt_movq_wRn_M0(wrd);
2265 gen_op_iwmmxt_set_mup();
2266 gen_op_iwmmxt_set_cup();
2268 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2269 case 0x314: case 0x714: case 0xb14: case 0xf14:
2270 if (((insn >> 22) & 3) == 0)
2272 wrd = (insn >> 12) & 0xf;
2273 rd0 = (insn >> 16) & 0xf;
2274 gen_op_iwmmxt_movq_M0_wRn(rd0);
2275 tmp = tcg_temp_new_i32();
2276 switch ((insn >> 22) & 3) {
2278 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2279 tcg_temp_free_i32(tmp);
2282 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2285 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2286 tcg_temp_free_i32(tmp);
2289 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2292 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2293 tcg_temp_free_i32(tmp);
2296 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2299 tcg_temp_free_i32(tmp);
2300 gen_op_iwmmxt_movq_wRn_M0(wrd);
2301 gen_op_iwmmxt_set_mup();
2302 gen_op_iwmmxt_set_cup();
2304 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2305 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2306 wrd = (insn >> 12) & 0xf;
2307 rd0 = (insn >> 16) & 0xf;
2308 rd1 = (insn >> 0) & 0xf;
2309 gen_op_iwmmxt_movq_M0_wRn(rd0);
2310 switch ((insn >> 22) & 3) {
2312 if (insn & (1 << 21))
2313 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2315 gen_op_iwmmxt_minub_M0_wRn(rd1);
2318 if (insn & (1 << 21))
2319 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2321 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2324 if (insn & (1 << 21))
2325 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2327 gen_op_iwmmxt_minul_M0_wRn(rd1);
2332 gen_op_iwmmxt_movq_wRn_M0(wrd);
2333 gen_op_iwmmxt_set_mup();
2335 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2336 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2337 wrd = (insn >> 12) & 0xf;
2338 rd0 = (insn >> 16) & 0xf;
2339 rd1 = (insn >> 0) & 0xf;
2340 gen_op_iwmmxt_movq_M0_wRn(rd0);
2341 switch ((insn >> 22) & 3) {
2343 if (insn & (1 << 21))
2344 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2346 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2349 if (insn & (1 << 21))
2350 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2352 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2355 if (insn & (1 << 21))
2356 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2358 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2363 gen_op_iwmmxt_movq_wRn_M0(wrd);
2364 gen_op_iwmmxt_set_mup();
2366 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2367 case 0x402: case 0x502: case 0x602: case 0x702:
2368 wrd = (insn >> 12) & 0xf;
2369 rd0 = (insn >> 16) & 0xf;
2370 rd1 = (insn >> 0) & 0xf;
2371 gen_op_iwmmxt_movq_M0_wRn(rd0);
2372 tmp = tcg_const_i32((insn >> 20) & 3);
2373 iwmmxt_load_reg(cpu_V1, rd1);
2374 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2375 tcg_temp_free_i32(tmp);
2376 gen_op_iwmmxt_movq_wRn_M0(wrd);
2377 gen_op_iwmmxt_set_mup();
2379 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2380 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2381 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2382 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2383 wrd = (insn >> 12) & 0xf;
2384 rd0 = (insn >> 16) & 0xf;
2385 rd1 = (insn >> 0) & 0xf;
2386 gen_op_iwmmxt_movq_M0_wRn(rd0);
2387 switch ((insn >> 20) & 0xf) {
2389 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2392 gen_op_iwmmxt_subub_M0_wRn(rd1);
2395 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2398 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2401 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2404 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2407 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2410 gen_op_iwmmxt_subul_M0_wRn(rd1);
2413 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2418 gen_op_iwmmxt_movq_wRn_M0(wrd);
2419 gen_op_iwmmxt_set_mup();
2420 gen_op_iwmmxt_set_cup();
2422 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2423 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2424 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2425 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2426 wrd = (insn >> 12) & 0xf;
2427 rd0 = (insn >> 16) & 0xf;
2428 gen_op_iwmmxt_movq_M0_wRn(rd0);
2429 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2430 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2431 tcg_temp_free_i32(tmp);
2432 gen_op_iwmmxt_movq_wRn_M0(wrd);
2433 gen_op_iwmmxt_set_mup();
2434 gen_op_iwmmxt_set_cup();
2436 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2437 case 0x418: case 0x518: case 0x618: case 0x718:
2438 case 0x818: case 0x918: case 0xa18: case 0xb18:
2439 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2440 wrd = (insn >> 12) & 0xf;
2441 rd0 = (insn >> 16) & 0xf;
2442 rd1 = (insn >> 0) & 0xf;
2443 gen_op_iwmmxt_movq_M0_wRn(rd0);
2444 switch ((insn >> 20) & 0xf) {
2446 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2449 gen_op_iwmmxt_addub_M0_wRn(rd1);
2452 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2455 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2458 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2461 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2464 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2467 gen_op_iwmmxt_addul_M0_wRn(rd1);
2470 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2475 gen_op_iwmmxt_movq_wRn_M0(wrd);
2476 gen_op_iwmmxt_set_mup();
2477 gen_op_iwmmxt_set_cup();
2479 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2480 case 0x408: case 0x508: case 0x608: case 0x708:
2481 case 0x808: case 0x908: case 0xa08: case 0xb08:
2482 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2483 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2485 wrd = (insn >> 12) & 0xf;
2486 rd0 = (insn >> 16) & 0xf;
2487 rd1 = (insn >> 0) & 0xf;
2488 gen_op_iwmmxt_movq_M0_wRn(rd0);
2489 switch ((insn >> 22) & 3) {
2491 if (insn & (1 << 21))
2492 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2494 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2497 if (insn & (1 << 21))
2498 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2500 gen_op_iwmmxt_packul_M0_wRn(rd1);
2503 if (insn & (1 << 21))
2504 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2506 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2509 gen_op_iwmmxt_movq_wRn_M0(wrd);
2510 gen_op_iwmmxt_set_mup();
2511 gen_op_iwmmxt_set_cup();
2513 case 0x201: case 0x203: case 0x205: case 0x207:
2514 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2515 case 0x211: case 0x213: case 0x215: case 0x217:
2516 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2517 wrd = (insn >> 5) & 0xf;
2518 rd0 = (insn >> 12) & 0xf;
2519 rd1 = (insn >> 0) & 0xf;
2520 if (rd0 == 0xf || rd1 == 0xf)
2522 gen_op_iwmmxt_movq_M0_wRn(wrd);
2523 tmp = load_reg(s, rd0);
2524 tmp2 = load_reg(s, rd1);
2525 switch ((insn >> 16) & 0xf) {
2526 case 0x0: /* TMIA */
2527 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2529 case 0x8: /* TMIAPH */
2530 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2532 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2533 if (insn & (1 << 16))
2534 tcg_gen_shri_i32(tmp, tmp, 16);
2535 if (insn & (1 << 17))
2536 tcg_gen_shri_i32(tmp2, tmp2, 16);
2537 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2540 tcg_temp_free_i32(tmp2);
2541 tcg_temp_free_i32(tmp);
2544 tcg_temp_free_i32(tmp2);
2545 tcg_temp_free_i32(tmp);
2546 gen_op_iwmmxt_movq_wRn_M0(wrd);
2547 gen_op_iwmmxt_set_mup();
2556 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2557 (ie. an undefined instruction). */
2558 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2560 int acc, rd0, rd1, rdhi, rdlo;
2563 if ((insn & 0x0ff00f10) == 0x0e200010) {
2564 /* Multiply with Internal Accumulate Format */
2565 rd0 = (insn >> 12) & 0xf;
2567 acc = (insn >> 5) & 7;
2572 tmp = load_reg(s, rd0);
2573 tmp2 = load_reg(s, rd1);
2574 switch ((insn >> 16) & 0xf) {
2576 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2578 case 0x8: /* MIAPH */
2579 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2581 case 0xc: /* MIABB */
2582 case 0xd: /* MIABT */
2583 case 0xe: /* MIATB */
2584 case 0xf: /* MIATT */
2585 if (insn & (1 << 16))
2586 tcg_gen_shri_i32(tmp, tmp, 16);
2587 if (insn & (1 << 17))
2588 tcg_gen_shri_i32(tmp2, tmp2, 16);
2589 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2594 tcg_temp_free_i32(tmp2);
2595 tcg_temp_free_i32(tmp);
2597 gen_op_iwmmxt_movq_wRn_M0(acc);
2601 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2602 /* Internal Accumulator Access Format */
2603 rdhi = (insn >> 16) & 0xf;
2604 rdlo = (insn >> 12) & 0xf;
2610 if (insn & ARM_CP_RW_BIT) { /* MRA */
2611 iwmmxt_load_reg(cpu_V0, acc);
2612 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2613 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2614 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2616 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2617 iwmmxt_store_reg(cpu_V0, acc);
2625 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2626 #define VFP_SREG(insn, bigbit, smallbit) \
2627 ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2628 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2629 if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
2630 reg = (((insn) >> (bigbit)) & 0x0f) \
2631 | (((insn) >> ((smallbit) - 4)) & 0x10); \
2633 if (insn & (1 << (smallbit))) \
2635 reg = ((insn) >> (bigbit)) & 0x0f; \
2638 #define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2639 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2640 #define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
2641 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2642 #define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
2643 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2645 static void gen_neon_dup_low16(TCGv_i32 var)
2647 TCGv_i32 tmp = tcg_temp_new_i32();
2648 tcg_gen_ext16u_i32(var, var);
2649 tcg_gen_shli_i32(tmp, var, 16);
2650 tcg_gen_or_i32(var, var, tmp);
2651 tcg_temp_free_i32(tmp);
2654 static void gen_neon_dup_high16(TCGv_i32 var)
2656 TCGv_i32 tmp = tcg_temp_new_i32();
2657 tcg_gen_andi_i32(var, var, 0xffff0000);
2658 tcg_gen_shri_i32(tmp, var, 16);
2659 tcg_gen_or_i32(var, var, tmp);
2660 tcg_temp_free_i32(tmp);
2664 * Disassemble a VFP instruction. Returns nonzero if an error occurred
2665 * (ie. an undefined instruction).
2667 static int disas_vfp_insn(DisasContext *s, uint32_t insn)
2669 if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
2674 * If the decodetree decoder handles this insn it will always
2675 * emit code to either execute the insn or generate an appropriate
2676 * exception; so we don't need to ever return non-zero to tell
2677 * the calling code to emit an UNDEF exception.
2679 if (extract32(insn, 28, 4) == 0xf) {
2680 if (disas_vfp_uncond(s, insn)) {
2684 if (disas_vfp(s, insn)) {
2688 /* If the decodetree decoder didn't handle this insn, it must be UNDEF */
2692 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2694 #ifndef CONFIG_USER_ONLY
2695 return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2696 ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2702 static void gen_goto_ptr(void)
2704 tcg_gen_lookup_and_goto_ptr();
2707 /* This will end the TB but doesn't guarantee we'll return to
2708 * cpu_loop_exec. Any live exit_requests will be processed as we
2709 * enter the next TB.
2711 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2713 if (use_goto_tb(s, dest)) {
2715 gen_set_pc_im(s, dest);
2716 tcg_gen_exit_tb(s->base.tb, n);
2718 gen_set_pc_im(s, dest);
2721 s->base.is_jmp = DISAS_NORETURN;
2724 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2726 if (unlikely(is_singlestepping(s))) {
2727 /* An indirect jump so that we still trigger the debug exception. */
2732 gen_goto_tb(s, 0, dest);
2736 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2739 tcg_gen_sari_i32(t0, t0, 16);
2743 tcg_gen_sari_i32(t1, t1, 16);
2746 tcg_gen_mul_i32(t0, t0, t1);
2749 /* Return the mask of PSR bits set by a MSR instruction. */
2750 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2755 if (flags & (1 << 0))
2757 if (flags & (1 << 1))
2759 if (flags & (1 << 2))
2761 if (flags & (1 << 3))
2764 /* Mask out undefined bits. */
2765 mask &= ~CPSR_RESERVED;
2766 if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
2769 if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
2770 mask &= ~CPSR_Q; /* V5TE in reality*/
2772 if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
2773 mask &= ~(CPSR_E | CPSR_GE);
2775 if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
2778 /* Mask out execution state and reserved bits. */
2780 mask &= ~(CPSR_EXEC | CPSR_RESERVED);
2782 /* Mask out privileged bits. */
2788 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2789 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2793 /* ??? This is also undefined in system mode. */
2797 tmp = load_cpu_field(spsr);
2798 tcg_gen_andi_i32(tmp, tmp, ~mask);
2799 tcg_gen_andi_i32(t0, t0, mask);
2800 tcg_gen_or_i32(tmp, tmp, t0);
2801 store_cpu_field(tmp, spsr);
2803 gen_set_cpsr(t0, mask);
2805 tcg_temp_free_i32(t0);
2810 /* Returns nonzero if access to the PSR is not permitted. */
2811 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2814 tmp = tcg_temp_new_i32();
2815 tcg_gen_movi_i32(tmp, val);
2816 return gen_set_psr(s, mask, spsr, tmp);
2819 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2820 int *tgtmode, int *regno)
2822 /* Decode the r and sysm fields of MSR/MRS banked accesses into
2823 * the target mode and register number, and identify the various
2824 * unpredictable cases.
2825 * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2826 * + executed in user mode
2827 * + using R15 as the src/dest register
2828 * + accessing an unimplemented register
2829 * + accessing a register that's inaccessible at current PL/security state*
2830 * + accessing a register that you could access with a different insn
2831 * We choose to UNDEF in all these cases.
2832 * Since we don't know which of the various AArch32 modes we are in
2833 * we have to defer some checks to runtime.
2834 * Accesses to Monitor mode registers from Secure EL1 (which implies
2835 * that EL3 is AArch64) must trap to EL3.
2837 * If the access checks fail this function will emit code to take
2838 * an exception and return false. Otherwise it will return true,
2839 * and set *tgtmode and *regno appropriately.
2841 int exc_target = default_exception_el(s);
2843 /* These instructions are present only in ARMv8, or in ARMv7 with the
2844 * Virtualization Extensions.
2846 if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2847 !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2851 if (IS_USER(s) || rn == 15) {
2855 /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2856 * of registers into (r, sysm).
2859 /* SPSRs for other modes */
2861 case 0xe: /* SPSR_fiq */
2862 *tgtmode = ARM_CPU_MODE_FIQ;
2864 case 0x10: /* SPSR_irq */
2865 *tgtmode = ARM_CPU_MODE_IRQ;
2867 case 0x12: /* SPSR_svc */
2868 *tgtmode = ARM_CPU_MODE_SVC;
2870 case 0x14: /* SPSR_abt */
2871 *tgtmode = ARM_CPU_MODE_ABT;
2873 case 0x16: /* SPSR_und */
2874 *tgtmode = ARM_CPU_MODE_UND;
2876 case 0x1c: /* SPSR_mon */
2877 *tgtmode = ARM_CPU_MODE_MON;
2879 case 0x1e: /* SPSR_hyp */
2880 *tgtmode = ARM_CPU_MODE_HYP;
2882 default: /* unallocated */
2885 /* We arbitrarily assign SPSR a register number of 16. */
2888 /* general purpose registers for other modes */
2890 case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
2891 *tgtmode = ARM_CPU_MODE_USR;
2894 case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
2895 *tgtmode = ARM_CPU_MODE_FIQ;
2898 case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2899 *tgtmode = ARM_CPU_MODE_IRQ;
2900 *regno = sysm & 1 ? 13 : 14;
2902 case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2903 *tgtmode = ARM_CPU_MODE_SVC;
2904 *regno = sysm & 1 ? 13 : 14;
2906 case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2907 *tgtmode = ARM_CPU_MODE_ABT;
2908 *regno = sysm & 1 ? 13 : 14;
2910 case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2911 *tgtmode = ARM_CPU_MODE_UND;
2912 *regno = sysm & 1 ? 13 : 14;
2914 case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2915 *tgtmode = ARM_CPU_MODE_MON;
2916 *regno = sysm & 1 ? 13 : 14;
2918 case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2919 *tgtmode = ARM_CPU_MODE_HYP;
2920 /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2921 *regno = sysm & 1 ? 13 : 17;
2923 default: /* unallocated */
2928 /* Catch the 'accessing inaccessible register' cases we can detect
2929 * at translate time.
2932 case ARM_CPU_MODE_MON:
2933 if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2936 if (s->current_el == 1) {
2937 /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2938 * then accesses to Mon registers trap to EL3
2944 case ARM_CPU_MODE_HYP:
2946 * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2947 * (and so we can forbid accesses from EL2 or below). elr_hyp
2948 * can be accessed also from Hyp mode, so forbid accesses from
2951 if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2952 (s->current_el < 3 && *regno != 17)) {
2963 /* If we get here then some access check did not pass */
2964 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2965 syn_uncategorized(), exc_target);
2969 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2971 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2972 int tgtmode = 0, regno = 0;
2974 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) {
2978 /* Sync state because msr_banked() can raise exceptions */
2979 gen_set_condexec(s);
2980 gen_set_pc_im(s, s->pc_curr);
2981 tcg_reg = load_reg(s, rn);
2982 tcg_tgtmode = tcg_const_i32(tgtmode);
2983 tcg_regno = tcg_const_i32(regno);
2984 gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2985 tcg_temp_free_i32(tcg_tgtmode);
2986 tcg_temp_free_i32(tcg_regno);
2987 tcg_temp_free_i32(tcg_reg);
2988 s->base.is_jmp = DISAS_UPDATE;
2991 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2993 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2994 int tgtmode = 0, regno = 0;
2996 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) {
3000 /* Sync state because mrs_banked() can raise exceptions */
3001 gen_set_condexec(s);
3002 gen_set_pc_im(s, s->pc_curr);
3003 tcg_reg = tcg_temp_new_i32();
3004 tcg_tgtmode = tcg_const_i32(tgtmode);
3005 tcg_regno = tcg_const_i32(regno);
3006 gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
3007 tcg_temp_free_i32(tcg_tgtmode);
3008 tcg_temp_free_i32(tcg_regno);
3009 store_reg(s, rn, tcg_reg);
3010 s->base.is_jmp = DISAS_UPDATE;
3013 /* Store value to PC as for an exception return (ie don't
3014 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
3015 * will do the masking based on the new value of the Thumb bit.
3017 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
3019 tcg_gen_mov_i32(cpu_R[15], pc);
3020 tcg_temp_free_i32(pc);
3023 /* Generate a v6 exception return. Marks both values as dead. */
3024 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
3026 store_pc_exc_ret(s, pc);
3027 /* The cpsr_write_eret helper will mask the low bits of PC
3028 * appropriately depending on the new Thumb bit, so it must
3029 * be called after storing the new PC.
3031 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
3034 gen_helper_cpsr_write_eret(cpu_env, cpsr);
3035 tcg_temp_free_i32(cpsr);
3036 /* Must exit loop to check un-masked IRQs */
3037 s->base.is_jmp = DISAS_EXIT;
3040 /* Generate an old-style exception return. Marks pc as dead. */
3041 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
3043 gen_rfe(s, pc, load_cpu_field(spsr));
3046 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3048 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
3051 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
3052 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
3053 case 2: tcg_gen_add_i32(t0, t0, t1); break;
3058 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3061 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3062 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3063 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3068 /* 32-bit pairwise ops end up the same as the elementwise versions. */
3069 #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
3070 #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
3071 #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
3072 #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
3074 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3075 switch ((size << 1) | u) { \
3077 gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3080 gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3083 gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3086 gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3089 gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3092 gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3094 default: return 1; \
3097 #define GEN_NEON_INTEGER_OP(name) do { \
3098 switch ((size << 1) | u) { \
3100 gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3103 gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3106 gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3109 gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3112 gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3115 gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3117 default: return 1; \
3120 static TCGv_i32 neon_load_scratch(int scratch)
3122 TCGv_i32 tmp = tcg_temp_new_i32();
3123 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3127 static void neon_store_scratch(int scratch, TCGv_i32 var)
3129 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3130 tcg_temp_free_i32(var);
3133 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3137 tmp = neon_load_reg(reg & 7, reg >> 4);
3139 gen_neon_dup_high16(tmp);
3141 gen_neon_dup_low16(tmp);
3144 tmp = neon_load_reg(reg & 15, reg >> 4);
3149 static int gen_neon_unzip(int rd, int rm, int size, int q)
3153 if (!q && size == 2) {
3156 pd = vfp_reg_ptr(true, rd);
3157 pm = vfp_reg_ptr(true, rm);
3161 gen_helper_neon_qunzip8(pd, pm);
3164 gen_helper_neon_qunzip16(pd, pm);
3167 gen_helper_neon_qunzip32(pd, pm);
3175 gen_helper_neon_unzip8(pd, pm);
3178 gen_helper_neon_unzip16(pd, pm);
3184 tcg_temp_free_ptr(pd);
3185 tcg_temp_free_ptr(pm);
3189 static int gen_neon_zip(int rd, int rm, int size, int q)
3193 if (!q && size == 2) {
3196 pd = vfp_reg_ptr(true, rd);
3197 pm = vfp_reg_ptr(true, rm);
3201 gen_helper_neon_qzip8(pd, pm);
3204 gen_helper_neon_qzip16(pd, pm);
3207 gen_helper_neon_qzip32(pd, pm);
3215 gen_helper_neon_zip8(pd, pm);
3218 gen_helper_neon_zip16(pd, pm);
3224 tcg_temp_free_ptr(pd);
3225 tcg_temp_free_ptr(pm);
3229 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3233 rd = tcg_temp_new_i32();
3234 tmp = tcg_temp_new_i32();
3236 tcg_gen_shli_i32(rd, t0, 8);
3237 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3238 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3239 tcg_gen_or_i32(rd, rd, tmp);
3241 tcg_gen_shri_i32(t1, t1, 8);
3242 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3243 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3244 tcg_gen_or_i32(t1, t1, tmp);
3245 tcg_gen_mov_i32(t0, rd);
3247 tcg_temp_free_i32(tmp);
3248 tcg_temp_free_i32(rd);
3251 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3255 rd = tcg_temp_new_i32();
3256 tmp = tcg_temp_new_i32();
3258 tcg_gen_shli_i32(rd, t0, 16);
3259 tcg_gen_andi_i32(tmp, t1, 0xffff);
3260 tcg_gen_or_i32(rd, rd, tmp);
3261 tcg_gen_shri_i32(t1, t1, 16);
3262 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3263 tcg_gen_or_i32(t1, t1, tmp);
3264 tcg_gen_mov_i32(t0, rd);
3266 tcg_temp_free_i32(tmp);
3267 tcg_temp_free_i32(rd);
3275 } const neon_ls_element_type[11] = {
3289 /* Translate a NEON load/store element instruction. Return nonzero if the
3290 instruction is invalid. */
3291 static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
3311 /* FIXME: this access check should not take precedence over UNDEF
3312 * for invalid encodings; we will generate incorrect syndrome information
3313 * for attempts to execute invalid vfp/neon encodings with FP disabled.
3315 if (s->fp_excp_el) {
3316 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
3317 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
3321 if (!s->vfp_enabled)
3323 VFP_DREG_D(rd, insn);
3324 rn = (insn >> 16) & 0xf;
3326 load = (insn & (1 << 21)) != 0;
3327 endian = s->be_data;
3328 mmu_idx = get_mem_index(s);
3329 if ((insn & (1 << 23)) == 0) {
3330 /* Load store all elements. */
3331 op = (insn >> 8) & 0xf;
3332 size = (insn >> 6) & 3;
3335 /* Catch UNDEF cases for bad values of align field */
3338 if (((insn >> 5) & 1) == 1) {
3343 if (((insn >> 4) & 3) == 3) {
3350 nregs = neon_ls_element_type[op].nregs;
3351 interleave = neon_ls_element_type[op].interleave;
3352 spacing = neon_ls_element_type[op].spacing;
3353 if (size == 3 && (interleave | spacing) != 1) {
3356 /* For our purposes, bytes are always little-endian. */
3360 /* Consecutive little-endian elements from a single register
3361 * can be promoted to a larger little-endian operation.
3363 if (interleave == 1 && endian == MO_LE) {
3366 tmp64 = tcg_temp_new_i64();
3367 addr = tcg_temp_new_i32();
3368 tmp2 = tcg_const_i32(1 << size);
3369 load_reg_var(s, addr, rn);
3370 for (reg = 0; reg < nregs; reg++) {
3371 for (n = 0; n < 8 >> size; n++) {
3373 for (xs = 0; xs < interleave; xs++) {
3374 int tt = rd + reg + spacing * xs;
3377 gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
3378 neon_store_element64(tt, n, size, tmp64);
3380 neon_load_element64(tmp64, tt, n, size);
3381 gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
3383 tcg_gen_add_i32(addr, addr, tmp2);
3387 tcg_temp_free_i32(addr);
3388 tcg_temp_free_i32(tmp2);
3389 tcg_temp_free_i64(tmp64);
3390 stride = nregs * interleave * 8;
3392 size = (insn >> 10) & 3;
3394 /* Load single element to all lanes. */
3395 int a = (insn >> 4) & 1;
3399 size = (insn >> 6) & 3;
3400 nregs = ((insn >> 8) & 3) + 1;
3403 if (nregs != 4 || a == 0) {
3406 /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
3409 if (nregs == 1 && a == 1 && size == 0) {
3412 if (nregs == 3 && a == 1) {
3415 addr = tcg_temp_new_i32();
3416 load_reg_var(s, addr, rn);
3418 /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
3419 * VLD2/3/4 to all lanes: bit 5 indicates register stride.
3421 stride = (insn & (1 << 5)) ? 2 : 1;
3422 vec_size = nregs == 1 ? stride * 8 : 8;
3424 tmp = tcg_temp_new_i32();
3425 for (reg = 0; reg < nregs; reg++) {
3426 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
3428 if ((rd & 1) && vec_size == 16) {
3429 /* We cannot write 16 bytes at once because the
3430 * destination is unaligned.
3432 tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
3434 tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
3435 neon_reg_offset(rd, 0), 8, 8);
3437 tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
3438 vec_size, vec_size, tmp);
3440 tcg_gen_addi_i32(addr, addr, 1 << size);
3443 tcg_temp_free_i32(tmp);
3444 tcg_temp_free_i32(addr);
3445 stride = (1 << size) * nregs;
3447 /* Single element. */
3448 int idx = (insn >> 4) & 0xf;
3452 reg_idx = (insn >> 5) & 7;
3456 reg_idx = (insn >> 6) & 3;
3457 stride = (insn & (1 << 5)) ? 2 : 1;
3460 reg_idx = (insn >> 7) & 1;
3461 stride = (insn & (1 << 6)) ? 2 : 1;
3466 nregs = ((insn >> 8) & 3) + 1;
3467 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
3470 if (((idx & (1 << size)) != 0) ||
3471 (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
3476 if ((idx & 1) != 0) {
3481 if (size == 2 && (idx & 2) != 0) {
3486 if ((size == 2) && ((idx & 3) == 3)) {
3493 if ((rd + stride * (nregs - 1)) > 31) {
3494 /* Attempts to write off the end of the register file
3495 * are UNPREDICTABLE; we choose to UNDEF because otherwise
3496 * the neon_load_reg() would write off the end of the array.
3500 tmp = tcg_temp_new_i32();
3501 addr = tcg_temp_new_i32();
3502 load_reg_var(s, addr, rn);
3503 for (reg = 0; reg < nregs; reg++) {
3505 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
3507 neon_store_element(rd, reg_idx, size, tmp);
3508 } else { /* Store */
3509 neon_load_element(tmp, rd, reg_idx, size);
3510 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
3514 tcg_gen_addi_i32(addr, addr, 1 << size);
3516 tcg_temp_free_i32(addr);
3517 tcg_temp_free_i32(tmp);
3518 stride = nregs * (1 << size);
3524 base = load_reg(s, rn);
3526 tcg_gen_addi_i32(base, base, stride);
3529 index = load_reg(s, rm);
3530 tcg_gen_add_i32(base, base, index);
3531 tcg_temp_free_i32(index);
3533 store_reg(s, rn, base);
3538 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3541 case 0: gen_helper_neon_narrow_u8(dest, src); break;
3542 case 1: gen_helper_neon_narrow_u16(dest, src); break;
3543 case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3548 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3551 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3552 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3553 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3558 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3561 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3562 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3563 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3568 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3571 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3572 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3573 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3578 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3584 case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3585 case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3590 case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3591 case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3598 case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3599 case 2: gen_helper_neon_shl_u32(var, var, shift); break;
3604 case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3605 case 2: gen_helper_neon_shl_s32(var, var, shift); break;
3612 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3616 case 0: gen_helper_neon_widen_u8(dest, src); break;
3617 case 1: gen_helper_neon_widen_u16(dest, src); break;
3618 case 2: tcg_gen_extu_i32_i64(dest, src); break;
3623 case 0: gen_helper_neon_widen_s8(dest, src); break;
3624 case 1: gen_helper_neon_widen_s16(dest, src); break;
3625 case 2: tcg_gen_ext_i32_i64(dest, src); break;
3629 tcg_temp_free_i32(src);
3632 static inline void gen_neon_addl(int size)
3635 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3636 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3637 case 2: tcg_gen_add_i64(CPU_V001); break;
3642 static inline void gen_neon_subl(int size)
3645 case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3646 case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3647 case 2: tcg_gen_sub_i64(CPU_V001); break;
3652 static inline void gen_neon_negl(TCGv_i64 var, int size)
3655 case 0: gen_helper_neon_negl_u16(var, var); break;
3656 case 1: gen_helper_neon_negl_u32(var, var); break;
3658 tcg_gen_neg_i64(var, var);
3664 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3667 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3668 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3673 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3678 switch ((size << 1) | u) {
3679 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3680 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3681 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3682 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3684 tmp = gen_muls_i64_i32(a, b);
3685 tcg_gen_mov_i64(dest, tmp);
3686 tcg_temp_free_i64(tmp);
3689 tmp = gen_mulu_i64_i32(a, b);
3690 tcg_gen_mov_i64(dest, tmp);
3691 tcg_temp_free_i64(tmp);
3696 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3697 Don't forget to clean them now. */
3699 tcg_temp_free_i32(a);
3700 tcg_temp_free_i32(b);
3704 static void gen_neon_narrow_op(int op, int u, int size,
3705 TCGv_i32 dest, TCGv_i64 src)
3709 gen_neon_unarrow_sats(size, dest, src);
3711 gen_neon_narrow(size, dest, src);
3715 gen_neon_narrow_satu(size, dest, src);
3717 gen_neon_narrow_sats(size, dest, src);
3722 /* Symbolic constants for op fields for Neon 3-register same-length.
3723 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
3726 #define NEON_3R_VHADD 0
3727 #define NEON_3R_VQADD 1
3728 #define NEON_3R_VRHADD 2
3729 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
3730 #define NEON_3R_VHSUB 4
3731 #define NEON_3R_VQSUB 5
3732 #define NEON_3R_VCGT 6
3733 #define NEON_3R_VCGE 7
3734 #define NEON_3R_VSHL 8
3735 #define NEON_3R_VQSHL 9
3736 #define NEON_3R_VRSHL 10
3737 #define NEON_3R_VQRSHL 11
3738 #define NEON_3R_VMAX 12
3739 #define NEON_3R_VMIN 13
3740 #define NEON_3R_VABD 14
3741 #define NEON_3R_VABA 15
3742 #define NEON_3R_VADD_VSUB 16
3743 #define NEON_3R_VTST_VCEQ 17
3744 #define NEON_3R_VML 18 /* VMLA, VMLS */
3745 #define NEON_3R_VMUL 19
3746 #define NEON_3R_VPMAX 20
3747 #define NEON_3R_VPMIN 21
3748 #define NEON_3R_VQDMULH_VQRDMULH 22
3749 #define NEON_3R_VPADD_VQRDMLAH 23
3750 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
3751 #define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
3752 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
3753 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
3754 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
3755 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
3756 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
3757 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
3759 static const uint8_t neon_3r_sizes[] = {
3760 [NEON_3R_VHADD] = 0x7,
3761 [NEON_3R_VQADD] = 0xf,
3762 [NEON_3R_VRHADD] = 0x7,
3763 [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
3764 [NEON_3R_VHSUB] = 0x7,
3765 [NEON_3R_VQSUB] = 0xf,
3766 [NEON_3R_VCGT] = 0x7,
3767 [NEON_3R_VCGE] = 0x7,
3768 [NEON_3R_VSHL] = 0xf,
3769 [NEON_3R_VQSHL] = 0xf,
3770 [NEON_3R_VRSHL] = 0xf,
3771 [NEON_3R_VQRSHL] = 0xf,
3772 [NEON_3R_VMAX] = 0x7,
3773 [NEON_3R_VMIN] = 0x7,
3774 [NEON_3R_VABD] = 0x7,
3775 [NEON_3R_VABA] = 0x7,
3776 [NEON_3R_VADD_VSUB] = 0xf,
3777 [NEON_3R_VTST_VCEQ] = 0x7,
3778 [NEON_3R_VML] = 0x7,
3779 [NEON_3R_VMUL] = 0x7,
3780 [NEON_3R_VPMAX] = 0x7,
3781 [NEON_3R_VPMIN] = 0x7,
3782 [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
3783 [NEON_3R_VPADD_VQRDMLAH] = 0x7,
3784 [NEON_3R_SHA] = 0xf, /* size field encodes op type */
3785 [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
3786 [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
3787 [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
3788 [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
3789 [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
3790 [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
3791 [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
3794 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3795 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3798 #define NEON_2RM_VREV64 0
3799 #define NEON_2RM_VREV32 1
3800 #define NEON_2RM_VREV16 2
3801 #define NEON_2RM_VPADDL 4
3802 #define NEON_2RM_VPADDL_U 5
3803 #define NEON_2RM_AESE 6 /* Includes AESD */
3804 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3805 #define NEON_2RM_VCLS 8
3806 #define NEON_2RM_VCLZ 9
3807 #define NEON_2RM_VCNT 10
3808 #define NEON_2RM_VMVN 11
3809 #define NEON_2RM_VPADAL 12
3810 #define NEON_2RM_VPADAL_U 13
3811 #define NEON_2RM_VQABS 14
3812 #define NEON_2RM_VQNEG 15
3813 #define NEON_2RM_VCGT0 16
3814 #define NEON_2RM_VCGE0 17
3815 #define NEON_2RM_VCEQ0 18
3816 #define NEON_2RM_VCLE0 19
3817 #define NEON_2RM_VCLT0 20
3818 #define NEON_2RM_SHA1H 21
3819 #define NEON_2RM_VABS 22
3820 #define NEON_2RM_VNEG 23
3821 #define NEON_2RM_VCGT0_F 24
3822 #define NEON_2RM_VCGE0_F 25
3823 #define NEON_2RM_VCEQ0_F 26
3824 #define NEON_2RM_VCLE0_F 27
3825 #define NEON_2RM_VCLT0_F 28
3826 #define NEON_2RM_VABS_F 30
3827 #define NEON_2RM_VNEG_F 31
3828 #define NEON_2RM_VSWP 32
3829 #define NEON_2RM_VTRN 33
3830 #define NEON_2RM_VUZP 34
3831 #define NEON_2RM_VZIP 35
3832 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3833 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3834 #define NEON_2RM_VSHLL 38
3835 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3836 #define NEON_2RM_VRINTN 40
3837 #define NEON_2RM_VRINTX 41
3838 #define NEON_2RM_VRINTA 42
3839 #define NEON_2RM_VRINTZ 43
3840 #define NEON_2RM_VCVT_F16_F32 44
3841 #define NEON_2RM_VRINTM 45
3842 #define NEON_2RM_VCVT_F32_F16 46
3843 #define NEON_2RM_VRINTP 47
3844 #define NEON_2RM_VCVTAU 48
3845 #define NEON_2RM_VCVTAS 49
3846 #define NEON_2RM_VCVTNU 50
3847 #define NEON_2RM_VCVTNS 51
3848 #define NEON_2RM_VCVTPU 52
3849 #define NEON_2RM_VCVTPS 53
3850 #define NEON_2RM_VCVTMU 54
3851 #define NEON_2RM_VCVTMS 55
3852 #define NEON_2RM_VRECPE 56
3853 #define NEON_2RM_VRSQRTE 57
3854 #define NEON_2RM_VRECPE_F 58
3855 #define NEON_2RM_VRSQRTE_F 59
3856 #define NEON_2RM_VCVT_FS 60
3857 #define NEON_2RM_VCVT_FU 61
3858 #define NEON_2RM_VCVT_SF 62
3859 #define NEON_2RM_VCVT_UF 63
3861 static bool neon_2rm_is_v8_op(int op)
3863 /* Return true if this neon 2reg-misc op is ARMv8 and up */
3865 case NEON_2RM_VRINTN:
3866 case NEON_2RM_VRINTA:
3867 case NEON_2RM_VRINTM:
3868 case NEON_2RM_VRINTP:
3869 case NEON_2RM_VRINTZ:
3870 case NEON_2RM_VRINTX:
3871 case NEON_2RM_VCVTAU:
3872 case NEON_2RM_VCVTAS:
3873 case NEON_2RM_VCVTNU:
3874 case NEON_2RM_VCVTNS:
3875 case NEON_2RM_VCVTPU:
3876 case NEON_2RM_VCVTPS:
3877 case NEON_2RM_VCVTMU:
3878 case NEON_2RM_VCVTMS:
3885 /* Each entry in this array has bit n set if the insn allows
3886 * size value n (otherwise it will UNDEF). Since unallocated
3887 * op values will have no bits set they always UNDEF.
3889 static const uint8_t neon_2rm_sizes[] = {
3890 [NEON_2RM_VREV64] = 0x7,
3891 [NEON_2RM_VREV32] = 0x3,
3892 [NEON_2RM_VREV16] = 0x1,
3893 [NEON_2RM_VPADDL] = 0x7,
3894 [NEON_2RM_VPADDL_U] = 0x7,
3895 [NEON_2RM_AESE] = 0x1,
3896 [NEON_2RM_AESMC] = 0x1,
3897 [NEON_2RM_VCLS] = 0x7,
3898 [NEON_2RM_VCLZ] = 0x7,
3899 [NEON_2RM_VCNT] = 0x1,
3900 [NEON_2RM_VMVN] = 0x1,
3901 [NEON_2RM_VPADAL] = 0x7,
3902 [NEON_2RM_VPADAL_U] = 0x7,
3903 [NEON_2RM_VQABS] = 0x7,
3904 [NEON_2RM_VQNEG] = 0x7,
3905 [NEON_2RM_VCGT0] = 0x7,
3906 [NEON_2RM_VCGE0] = 0x7,
3907 [NEON_2RM_VCEQ0] = 0x7,
3908 [NEON_2RM_VCLE0] = 0x7,
3909 [NEON_2RM_VCLT0] = 0x7,
3910 [NEON_2RM_SHA1H] = 0x4,
3911 [NEON_2RM_VABS] = 0x7,
3912 [NEON_2RM_VNEG] = 0x7,
3913 [NEON_2RM_VCGT0_F] = 0x4,
3914 [NEON_2RM_VCGE0_F] = 0x4,
3915 [NEON_2RM_VCEQ0_F] = 0x4,
3916 [NEON_2RM_VCLE0_F] = 0x4,
3917 [NEON_2RM_VCLT0_F] = 0x4,
3918 [NEON_2RM_VABS_F] = 0x4,
3919 [NEON_2RM_VNEG_F] = 0x4,
3920 [NEON_2RM_VSWP] = 0x1,
3921 [NEON_2RM_VTRN] = 0x7,
3922 [NEON_2RM_VUZP] = 0x7,
3923 [NEON_2RM_VZIP] = 0x7,
3924 [NEON_2RM_VMOVN] = 0x7,
3925 [NEON_2RM_VQMOVN] = 0x7,
3926 [NEON_2RM_VSHLL] = 0x7,
3927 [NEON_2RM_SHA1SU1] = 0x4,
3928 [NEON_2RM_VRINTN] = 0x4,
3929 [NEON_2RM_VRINTX] = 0x4,
3930 [NEON_2RM_VRINTA] = 0x4,
3931 [NEON_2RM_VRINTZ] = 0x4,
3932 [NEON_2RM_VCVT_F16_F32] = 0x2,
3933 [NEON_2RM_VRINTM] = 0x4,
3934 [NEON_2RM_VCVT_F32_F16] = 0x2,
3935 [NEON_2RM_VRINTP] = 0x4,
3936 [NEON_2RM_VCVTAU] = 0x4,
3937 [NEON_2RM_VCVTAS] = 0x4,
3938 [NEON_2RM_VCVTNU] = 0x4,
3939 [NEON_2RM_VCVTNS] = 0x4,
3940 [NEON_2RM_VCVTPU] = 0x4,
3941 [NEON_2RM_VCVTPS] = 0x4,
3942 [NEON_2RM_VCVTMU] = 0x4,
3943 [NEON_2RM_VCVTMS] = 0x4,
3944 [NEON_2RM_VRECPE] = 0x4,
3945 [NEON_2RM_VRSQRTE] = 0x4,
3946 [NEON_2RM_VRECPE_F] = 0x4,
3947 [NEON_2RM_VRSQRTE_F] = 0x4,
3948 [NEON_2RM_VCVT_FS] = 0x4,
3949 [NEON_2RM_VCVT_FU] = 0x4,
3950 [NEON_2RM_VCVT_SF] = 0x4,
3951 [NEON_2RM_VCVT_UF] = 0x4,
3955 /* Expand v8.1 simd helper. */
3956 static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
3957 int q, int rd, int rn, int rm)
3959 if (dc_isar_feature(aa32_rdm, s)) {
3960 int opr_sz = (1 + q) * 8;
3961 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
3962 vfp_reg_offset(1, rn),
3963 vfp_reg_offset(1, rm), cpu_env,
3964 opr_sz, opr_sz, 0, fn);
3970 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3972 tcg_gen_vec_sar8i_i64(a, a, shift);
3973 tcg_gen_vec_add8_i64(d, d, a);
3976 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3978 tcg_gen_vec_sar16i_i64(a, a, shift);
3979 tcg_gen_vec_add16_i64(d, d, a);
3982 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3984 tcg_gen_sari_i32(a, a, shift);
3985 tcg_gen_add_i32(d, d, a);
3988 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3990 tcg_gen_sari_i64(a, a, shift);
3991 tcg_gen_add_i64(d, d, a);
3994 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3996 tcg_gen_sari_vec(vece, a, a, sh);
3997 tcg_gen_add_vec(vece, d, d, a);
4000 static const TCGOpcode vecop_list_ssra[] = {
4001 INDEX_op_sari_vec, INDEX_op_add_vec, 0
4004 const GVecGen2i ssra_op[4] = {
4005 { .fni8 = gen_ssra8_i64,
4006 .fniv = gen_ssra_vec,
4008 .opt_opc = vecop_list_ssra,
4010 { .fni8 = gen_ssra16_i64,
4011 .fniv = gen_ssra_vec,
4013 .opt_opc = vecop_list_ssra,
4015 { .fni4 = gen_ssra32_i32,
4016 .fniv = gen_ssra_vec,
4018 .opt_opc = vecop_list_ssra,
4020 { .fni8 = gen_ssra64_i64,
4021 .fniv = gen_ssra_vec,
4022 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4023 .opt_opc = vecop_list_ssra,
4028 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4030 tcg_gen_vec_shr8i_i64(a, a, shift);
4031 tcg_gen_vec_add8_i64(d, d, a);
4034 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4036 tcg_gen_vec_shr16i_i64(a, a, shift);
4037 tcg_gen_vec_add16_i64(d, d, a);
4040 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4042 tcg_gen_shri_i32(a, a, shift);
4043 tcg_gen_add_i32(d, d, a);
4046 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4048 tcg_gen_shri_i64(a, a, shift);
4049 tcg_gen_add_i64(d, d, a);
4052 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4054 tcg_gen_shri_vec(vece, a, a, sh);
4055 tcg_gen_add_vec(vece, d, d, a);
4058 static const TCGOpcode vecop_list_usra[] = {
4059 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4062 const GVecGen2i usra_op[4] = {
4063 { .fni8 = gen_usra8_i64,
4064 .fniv = gen_usra_vec,
4066 .opt_opc = vecop_list_usra,
4068 { .fni8 = gen_usra16_i64,
4069 .fniv = gen_usra_vec,
4071 .opt_opc = vecop_list_usra,
4073 { .fni4 = gen_usra32_i32,
4074 .fniv = gen_usra_vec,
4076 .opt_opc = vecop_list_usra,
4078 { .fni8 = gen_usra64_i64,
4079 .fniv = gen_usra_vec,
4080 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4082 .opt_opc = vecop_list_usra,
4086 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4088 uint64_t mask = dup_const(MO_8, 0xff >> shift);
4089 TCGv_i64 t = tcg_temp_new_i64();
4091 tcg_gen_shri_i64(t, a, shift);
4092 tcg_gen_andi_i64(t, t, mask);
4093 tcg_gen_andi_i64(d, d, ~mask);
4094 tcg_gen_or_i64(d, d, t);
4095 tcg_temp_free_i64(t);
4098 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4100 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4101 TCGv_i64 t = tcg_temp_new_i64();
4103 tcg_gen_shri_i64(t, a, shift);
4104 tcg_gen_andi_i64(t, t, mask);
4105 tcg_gen_andi_i64(d, d, ~mask);
4106 tcg_gen_or_i64(d, d, t);
4107 tcg_temp_free_i64(t);
4110 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4112 tcg_gen_shri_i32(a, a, shift);
4113 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4116 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4118 tcg_gen_shri_i64(a, a, shift);
4119 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4122 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4125 tcg_gen_mov_vec(d, a);
4127 TCGv_vec t = tcg_temp_new_vec_matching(d);
4128 TCGv_vec m = tcg_temp_new_vec_matching(d);
4130 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4131 tcg_gen_shri_vec(vece, t, a, sh);
4132 tcg_gen_and_vec(vece, d, d, m);
4133 tcg_gen_or_vec(vece, d, d, t);
4135 tcg_temp_free_vec(t);
4136 tcg_temp_free_vec(m);
4140 static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
4142 const GVecGen2i sri_op[4] = {
4143 { .fni8 = gen_shr8_ins_i64,
4144 .fniv = gen_shr_ins_vec,
4146 .opt_opc = vecop_list_sri,
4148 { .fni8 = gen_shr16_ins_i64,
4149 .fniv = gen_shr_ins_vec,
4151 .opt_opc = vecop_list_sri,
4153 { .fni4 = gen_shr32_ins_i32,
4154 .fniv = gen_shr_ins_vec,
4156 .opt_opc = vecop_list_sri,
4158 { .fni8 = gen_shr64_ins_i64,
4159 .fniv = gen_shr_ins_vec,
4160 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4162 .opt_opc = vecop_list_sri,
4166 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4168 uint64_t mask = dup_const(MO_8, 0xff << shift);
4169 TCGv_i64 t = tcg_temp_new_i64();
4171 tcg_gen_shli_i64(t, a, shift);
4172 tcg_gen_andi_i64(t, t, mask);
4173 tcg_gen_andi_i64(d, d, ~mask);
4174 tcg_gen_or_i64(d, d, t);
4175 tcg_temp_free_i64(t);
4178 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4180 uint64_t mask = dup_const(MO_16, 0xffff << shift);
4181 TCGv_i64 t = tcg_temp_new_i64();
4183 tcg_gen_shli_i64(t, a, shift);
4184 tcg_gen_andi_i64(t, t, mask);
4185 tcg_gen_andi_i64(d, d, ~mask);
4186 tcg_gen_or_i64(d, d, t);
4187 tcg_temp_free_i64(t);
4190 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4192 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4195 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4197 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4200 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4203 tcg_gen_mov_vec(d, a);
4205 TCGv_vec t = tcg_temp_new_vec_matching(d);
4206 TCGv_vec m = tcg_temp_new_vec_matching(d);
4208 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4209 tcg_gen_shli_vec(vece, t, a, sh);
4210 tcg_gen_and_vec(vece, d, d, m);
4211 tcg_gen_or_vec(vece, d, d, t);
4213 tcg_temp_free_vec(t);
4214 tcg_temp_free_vec(m);
4218 static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
4220 const GVecGen2i sli_op[4] = {
4221 { .fni8 = gen_shl8_ins_i64,
4222 .fniv = gen_shl_ins_vec,
4224 .opt_opc = vecop_list_sli,
4226 { .fni8 = gen_shl16_ins_i64,
4227 .fniv = gen_shl_ins_vec,
4229 .opt_opc = vecop_list_sli,
4231 { .fni4 = gen_shl32_ins_i32,
4232 .fniv = gen_shl_ins_vec,
4234 .opt_opc = vecop_list_sli,
4236 { .fni8 = gen_shl64_ins_i64,
4237 .fniv = gen_shl_ins_vec,
4238 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4240 .opt_opc = vecop_list_sli,
4244 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4246 gen_helper_neon_mul_u8(a, a, b);
4247 gen_helper_neon_add_u8(d, d, a);
4250 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4252 gen_helper_neon_mul_u8(a, a, b);
4253 gen_helper_neon_sub_u8(d, d, a);
4256 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4258 gen_helper_neon_mul_u16(a, a, b);
4259 gen_helper_neon_add_u16(d, d, a);
4262 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4264 gen_helper_neon_mul_u16(a, a, b);
4265 gen_helper_neon_sub_u16(d, d, a);
4268 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4270 tcg_gen_mul_i32(a, a, b);
4271 tcg_gen_add_i32(d, d, a);
4274 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4276 tcg_gen_mul_i32(a, a, b);
4277 tcg_gen_sub_i32(d, d, a);
4280 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4282 tcg_gen_mul_i64(a, a, b);
4283 tcg_gen_add_i64(d, d, a);
4286 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4288 tcg_gen_mul_i64(a, a, b);
4289 tcg_gen_sub_i64(d, d, a);
4292 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4294 tcg_gen_mul_vec(vece, a, a, b);
4295 tcg_gen_add_vec(vece, d, d, a);
4298 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4300 tcg_gen_mul_vec(vece, a, a, b);
4301 tcg_gen_sub_vec(vece, d, d, a);
4304 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4305 * these tables are shared with AArch64 which does support them.
4308 static const TCGOpcode vecop_list_mla[] = {
4309 INDEX_op_mul_vec, INDEX_op_add_vec, 0
4312 static const TCGOpcode vecop_list_mls[] = {
4313 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4316 const GVecGen3 mla_op[4] = {
4317 { .fni4 = gen_mla8_i32,
4318 .fniv = gen_mla_vec,
4320 .opt_opc = vecop_list_mla,
4322 { .fni4 = gen_mla16_i32,
4323 .fniv = gen_mla_vec,
4325 .opt_opc = vecop_list_mla,
4327 { .fni4 = gen_mla32_i32,
4328 .fniv = gen_mla_vec,
4330 .opt_opc = vecop_list_mla,
4332 { .fni8 = gen_mla64_i64,
4333 .fniv = gen_mla_vec,
4334 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4336 .opt_opc = vecop_list_mla,
4340 const GVecGen3 mls_op[4] = {
4341 { .fni4 = gen_mls8_i32,
4342 .fniv = gen_mls_vec,
4344 .opt_opc = vecop_list_mls,
4346 { .fni4 = gen_mls16_i32,
4347 .fniv = gen_mls_vec,
4349 .opt_opc = vecop_list_mls,
4351 { .fni4 = gen_mls32_i32,
4352 .fniv = gen_mls_vec,
4354 .opt_opc = vecop_list_mls,
4356 { .fni8 = gen_mls64_i64,
4357 .fniv = gen_mls_vec,
4358 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4360 .opt_opc = vecop_list_mls,
4364 /* CMTST : test is "if (X & Y != 0)". */
4365 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4367 tcg_gen_and_i32(d, a, b);
4368 tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4369 tcg_gen_neg_i32(d, d);
4372 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4374 tcg_gen_and_i64(d, a, b);
4375 tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4376 tcg_gen_neg_i64(d, d);
4379 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4381 tcg_gen_and_vec(vece, d, a, b);
4382 tcg_gen_dupi_vec(vece, a, 0);
4383 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4386 static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
4388 const GVecGen3 cmtst_op[4] = {
4389 { .fni4 = gen_helper_neon_tst_u8,
4390 .fniv = gen_cmtst_vec,
4391 .opt_opc = vecop_list_cmtst,
4393 { .fni4 = gen_helper_neon_tst_u16,
4394 .fniv = gen_cmtst_vec,
4395 .opt_opc = vecop_list_cmtst,
4397 { .fni4 = gen_cmtst_i32,
4398 .fniv = gen_cmtst_vec,
4399 .opt_opc = vecop_list_cmtst,
4401 { .fni8 = gen_cmtst_i64,
4402 .fniv = gen_cmtst_vec,
4403 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4404 .opt_opc = vecop_list_cmtst,
4408 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4409 TCGv_vec a, TCGv_vec b)
4411 TCGv_vec x = tcg_temp_new_vec_matching(t);
4412 tcg_gen_add_vec(vece, x, a, b);
4413 tcg_gen_usadd_vec(vece, t, a, b);
4414 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4415 tcg_gen_or_vec(vece, sat, sat, x);
4416 tcg_temp_free_vec(x);
4419 static const TCGOpcode vecop_list_uqadd[] = {
4420 INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4423 const GVecGen4 uqadd_op[4] = {
4424 { .fniv = gen_uqadd_vec,
4425 .fno = gen_helper_gvec_uqadd_b,
4427 .opt_opc = vecop_list_uqadd,
4429 { .fniv = gen_uqadd_vec,
4430 .fno = gen_helper_gvec_uqadd_h,
4432 .opt_opc = vecop_list_uqadd,
4434 { .fniv = gen_uqadd_vec,
4435 .fno = gen_helper_gvec_uqadd_s,
4437 .opt_opc = vecop_list_uqadd,
4439 { .fniv = gen_uqadd_vec,
4440 .fno = gen_helper_gvec_uqadd_d,
4442 .opt_opc = vecop_list_uqadd,
4446 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4447 TCGv_vec a, TCGv_vec b)
4449 TCGv_vec x = tcg_temp_new_vec_matching(t);
4450 tcg_gen_add_vec(vece, x, a, b);
4451 tcg_gen_ssadd_vec(vece, t, a, b);
4452 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4453 tcg_gen_or_vec(vece, sat, sat, x);
4454 tcg_temp_free_vec(x);
4457 static const TCGOpcode vecop_list_sqadd[] = {
4458 INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4461 const GVecGen4 sqadd_op[4] = {
4462 { .fniv = gen_sqadd_vec,
4463 .fno = gen_helper_gvec_sqadd_b,
4464 .opt_opc = vecop_list_sqadd,
4467 { .fniv = gen_sqadd_vec,
4468 .fno = gen_helper_gvec_sqadd_h,
4469 .opt_opc = vecop_list_sqadd,
4472 { .fniv = gen_sqadd_vec,
4473 .fno = gen_helper_gvec_sqadd_s,
4474 .opt_opc = vecop_list_sqadd,
4477 { .fniv = gen_sqadd_vec,
4478 .fno = gen_helper_gvec_sqadd_d,
4479 .opt_opc = vecop_list_sqadd,
4484 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4485 TCGv_vec a, TCGv_vec b)
4487 TCGv_vec x = tcg_temp_new_vec_matching(t);
4488 tcg_gen_sub_vec(vece, x, a, b);
4489 tcg_gen_ussub_vec(vece, t, a, b);
4490 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4491 tcg_gen_or_vec(vece, sat, sat, x);
4492 tcg_temp_free_vec(x);
4495 static const TCGOpcode vecop_list_uqsub[] = {
4496 INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4499 const GVecGen4 uqsub_op[4] = {
4500 { .fniv = gen_uqsub_vec,
4501 .fno = gen_helper_gvec_uqsub_b,
4502 .opt_opc = vecop_list_uqsub,
4505 { .fniv = gen_uqsub_vec,
4506 .fno = gen_helper_gvec_uqsub_h,
4507 .opt_opc = vecop_list_uqsub,
4510 { .fniv = gen_uqsub_vec,
4511 .fno = gen_helper_gvec_uqsub_s,
4512 .opt_opc = vecop_list_uqsub,
4515 { .fniv = gen_uqsub_vec,
4516 .fno = gen_helper_gvec_uqsub_d,
4517 .opt_opc = vecop_list_uqsub,
4522 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4523 TCGv_vec a, TCGv_vec b)
4525 TCGv_vec x = tcg_temp_new_vec_matching(t);
4526 tcg_gen_sub_vec(vece, x, a, b);
4527 tcg_gen_sssub_vec(vece, t, a, b);
4528 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4529 tcg_gen_or_vec(vece, sat, sat, x);
4530 tcg_temp_free_vec(x);
4533 static const TCGOpcode vecop_list_sqsub[] = {
4534 INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4537 const GVecGen4 sqsub_op[4] = {
4538 { .fniv = gen_sqsub_vec,
4539 .fno = gen_helper_gvec_sqsub_b,
4540 .opt_opc = vecop_list_sqsub,
4543 { .fniv = gen_sqsub_vec,
4544 .fno = gen_helper_gvec_sqsub_h,
4545 .opt_opc = vecop_list_sqsub,
4548 { .fniv = gen_sqsub_vec,
4549 .fno = gen_helper_gvec_sqsub_s,
4550 .opt_opc = vecop_list_sqsub,
4553 { .fniv = gen_sqsub_vec,
4554 .fno = gen_helper_gvec_sqsub_d,
4555 .opt_opc = vecop_list_sqsub,
4560 /* Translate a NEON data processing instruction. Return nonzero if the
4561 instruction is invalid.
4562 We process data in a mixture of 32-bit and 64-bit chunks.
4563 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
4565 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
4569 int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
4578 TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
4579 TCGv_ptr ptr1, ptr2, ptr3;
4582 /* FIXME: this access check should not take precedence over UNDEF
4583 * for invalid encodings; we will generate incorrect syndrome information
4584 * for attempts to execute invalid vfp/neon encodings with FP disabled.
4586 if (s->fp_excp_el) {
4587 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
4588 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
4592 if (!s->vfp_enabled)
4594 q = (insn & (1 << 6)) != 0;
4595 u = (insn >> 24) & 1;
4596 VFP_DREG_D(rd, insn);
4597 VFP_DREG_N(rn, insn);
4598 VFP_DREG_M(rm, insn);
4599 size = (insn >> 20) & 3;
4600 vec_size = q ? 16 : 8;
4601 rd_ofs = neon_reg_offset(rd, 0);
4602 rn_ofs = neon_reg_offset(rn, 0);
4603 rm_ofs = neon_reg_offset(rm, 0);
4605 if ((insn & (1 << 23)) == 0) {
4606 /* Three register same length. */
4607 op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
4608 /* Catch invalid op and bad size combinations: UNDEF */
4609 if ((neon_3r_sizes[op] & (1 << size)) == 0) {
4612 /* All insns of this form UNDEF for either this condition or the
4613 * superset of cases "Q==1"; we catch the latter later.
4615 if (q && ((rd | rn | rm) & 1)) {
4620 /* The SHA-1/SHA-256 3-register instructions require special
4621 * treatment here, as their size field is overloaded as an
4622 * op type selector, and they all consume their input in a
4628 if (!u) { /* SHA-1 */
4629 if (!dc_isar_feature(aa32_sha1, s)) {
4632 ptr1 = vfp_reg_ptr(true, rd);
4633 ptr2 = vfp_reg_ptr(true, rn);
4634 ptr3 = vfp_reg_ptr(true, rm);
4635 tmp4 = tcg_const_i32(size);
4636 gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
4637 tcg_temp_free_i32(tmp4);
4638 } else { /* SHA-256 */
4639 if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
4642 ptr1 = vfp_reg_ptr(true, rd);
4643 ptr2 = vfp_reg_ptr(true, rn);
4644 ptr3 = vfp_reg_ptr(true, rm);
4647 gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
4650 gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
4653 gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
4657 tcg_temp_free_ptr(ptr1);
4658 tcg_temp_free_ptr(ptr2);
4659 tcg_temp_free_ptr(ptr3);
4662 case NEON_3R_VPADD_VQRDMLAH:
4669 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
4672 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
4677 case NEON_3R_VFM_VQRDMLSH:
4688 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
4691 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
4696 case NEON_3R_LOGIC: /* Logic ops. */
4697 switch ((u << 2) | size) {
4699 tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
4700 vec_size, vec_size);
4703 tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
4704 vec_size, vec_size);
4707 tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
4708 vec_size, vec_size);
4711 tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
4712 vec_size, vec_size);
4715 tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
4716 vec_size, vec_size);
4719 tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
4720 vec_size, vec_size);
4723 tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
4724 vec_size, vec_size);
4727 tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
4728 vec_size, vec_size);
4733 case NEON_3R_VADD_VSUB:
4735 tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
4736 vec_size, vec_size);
4738 tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
4739 vec_size, vec_size);
4744 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4745 rn_ofs, rm_ofs, vec_size, vec_size,
4746 (u ? uqadd_op : sqadd_op) + size);
4750 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4751 rn_ofs, rm_ofs, vec_size, vec_size,
4752 (u ? uqsub_op : sqsub_op) + size);
4755 case NEON_3R_VMUL: /* VMUL */
4757 /* Polynomial case allows only P8 and is handled below. */
4762 tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
4763 vec_size, vec_size);
4768 case NEON_3R_VML: /* VMLA, VMLS */
4769 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
4770 u ? &mls_op[size] : &mla_op[size]);
4773 case NEON_3R_VTST_VCEQ:
4775 tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
4776 vec_size, vec_size);
4778 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
4779 vec_size, vec_size, &cmtst_op[size]);
4784 tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
4785 rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
4789 tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
4790 rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
4795 tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
4796 vec_size, vec_size);
4798 tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
4799 vec_size, vec_size);
4804 tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
4805 vec_size, vec_size);
4807 tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
4808 vec_size, vec_size);
4814 /* 64-bit element instructions. */
4815 for (pass = 0; pass < (q ? 2 : 1); pass++) {
4816 neon_load_reg64(cpu_V0, rn + pass);
4817 neon_load_reg64(cpu_V1, rm + pass);
4821 gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
4823 gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
4828 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
4831 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
4837 gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
4839 gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
4842 case NEON_3R_VQRSHL:
4844 gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
4847 gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
4854 neon_store_reg64(cpu_V0, rd + pass);
4863 case NEON_3R_VQRSHL:
4866 /* Shift instruction operands are reversed. */
4872 case NEON_3R_VPADD_VQRDMLAH:
4877 case NEON_3R_FLOAT_ARITH:
4878 pairwise = (u && size < 2); /* if VPADD (float) */
4880 case NEON_3R_FLOAT_MINMAX:
4881 pairwise = u; /* if VPMIN/VPMAX (float) */
4883 case NEON_3R_FLOAT_CMP:
4885 /* no encoding for U=0 C=1x */
4889 case NEON_3R_FLOAT_ACMP:
4894 case NEON_3R_FLOAT_MISC:
4895 /* VMAXNM/VMINNM in ARMv8 */
4896 if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
4900 case NEON_3R_VFM_VQRDMLSH:
4901 if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
4909 if (pairwise && q) {
4910 /* All the pairwise insns UNDEF if Q is set */
4914 for (pass = 0; pass < (q ? 4 : 2); pass++) {
4919 tmp = neon_load_reg(rn, 0);
4920 tmp2 = neon_load_reg(rn, 1);
4922 tmp = neon_load_reg(rm, 0);
4923 tmp2 = neon_load_reg(rm, 1);
4927 tmp = neon_load_reg(rn, pass);
4928 tmp2 = neon_load_reg(rm, pass);
4932 GEN_NEON_INTEGER_OP(hadd);
4934 case NEON_3R_VRHADD:
4935 GEN_NEON_INTEGER_OP(rhadd);
4938 GEN_NEON_INTEGER_OP(hsub);
4941 GEN_NEON_INTEGER_OP(shl);
4944 GEN_NEON_INTEGER_OP_ENV(qshl);
4947 GEN_NEON_INTEGER_OP(rshl);
4949 case NEON_3R_VQRSHL:
4950 GEN_NEON_INTEGER_OP_ENV(qrshl);
4953 GEN_NEON_INTEGER_OP(abd);
4956 GEN_NEON_INTEGER_OP(abd);
4957 tcg_temp_free_i32(tmp2);
4958 tmp2 = neon_load_reg(rd, pass);
4959 gen_neon_add(size, tmp, tmp2);
4962 /* VMUL.P8; other cases already eliminated. */
4963 gen_helper_neon_mul_p8(tmp, tmp, tmp2);
4966 GEN_NEON_INTEGER_OP(pmax);
4969 GEN_NEON_INTEGER_OP(pmin);
4971 case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
4972 if (!u) { /* VQDMULH */
4975 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
4978 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
4982 } else { /* VQRDMULH */
4985 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
4988 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
4994 case NEON_3R_VPADD_VQRDMLAH:
4996 case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
4997 case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
4998 case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
5002 case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
5004 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5005 switch ((u << 2) | size) {
5008 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5011 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
5014 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
5019 tcg_temp_free_ptr(fpstatus);
5022 case NEON_3R_FLOAT_MULTIPLY:
5024 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5025 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5027 tcg_temp_free_i32(tmp2);
5028 tmp2 = neon_load_reg(rd, pass);
5030 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5032 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5035 tcg_temp_free_ptr(fpstatus);
5038 case NEON_3R_FLOAT_CMP:
5040 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5042 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5045 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5047 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5050 tcg_temp_free_ptr(fpstatus);
5053 case NEON_3R_FLOAT_ACMP:
5055 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5057 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5059 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5061 tcg_temp_free_ptr(fpstatus);
5064 case NEON_3R_FLOAT_MINMAX:
5066 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5068 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5070 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5072 tcg_temp_free_ptr(fpstatus);
5075 case NEON_3R_FLOAT_MISC:
5078 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5080 gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5082 gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5084 tcg_temp_free_ptr(fpstatus);
5087 gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5089 gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5093 case NEON_3R_VFM_VQRDMLSH:
5095 /* VFMA, VFMS: fused multiply-add */
5096 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5097 TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5100 gen_helper_vfp_negs(tmp, tmp);
5102 gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5103 tcg_temp_free_i32(tmp3);
5104 tcg_temp_free_ptr(fpstatus);
5110 tcg_temp_free_i32(tmp2);
5112 /* Save the result. For elementwise operations we can put it
5113 straight into the destination register. For pairwise operations
5114 we have to be careful to avoid clobbering the source operands. */
5115 if (pairwise && rd == rm) {
5116 neon_store_scratch(pass, tmp);
5118 neon_store_reg(rd, pass, tmp);
5122 if (pairwise && rd == rm) {
5123 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5124 tmp = neon_load_scratch(pass);
5125 neon_store_reg(rd, pass, tmp);
5128 /* End of 3 register same size operations. */
5129 } else if (insn & (1 << 4)) {
5130 if ((insn & 0x00380080) != 0) {
5131 /* Two registers and shift. */
5132 op = (insn >> 8) & 0xf;
5133 if (insn & (1 << 7)) {
5141 while ((insn & (1 << (size + 19))) == 0)
5144 shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5146 /* Shift by immediate:
5147 VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
5148 if (q && ((rd | rm) & 1)) {
5151 if (!u && (op == 4 || op == 6)) {
5154 /* Right shifts are encoded as N - shift, where N is the
5155 element size in bits. */
5157 shift = shift - (1 << (size + 3));
5162 /* Right shift comes here negative. */
5164 /* Shifts larger than the element size are architecturally
5165 * valid. Unsigned results in all zeros; signed results
5169 tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5170 MIN(shift, (8 << size) - 1),
5171 vec_size, vec_size);
5172 } else if (shift >= 8 << size) {
5173 tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
5175 tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5176 vec_size, vec_size);
5181 /* Right shift comes here negative. */
5183 /* Shifts larger than the element size are architecturally
5184 * valid. Unsigned results in all zeros; signed results
5188 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5189 MIN(shift, (8 << size) - 1),
5191 } else if (shift >= 8 << size) {
5194 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5195 shift, &usra_op[size]);
5203 /* Right shift comes here negative. */
5205 /* Shift out of range leaves destination unchanged. */
5206 if (shift < 8 << size) {
5207 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5208 shift, &sri_op[size]);
5212 case 5: /* VSHL, VSLI */
5214 /* Shift out of range leaves destination unchanged. */
5215 if (shift < 8 << size) {
5216 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
5217 vec_size, shift, &sli_op[size]);
5220 /* Shifts larger than the element size are
5221 * architecturally valid and results in zero.
5223 if (shift >= 8 << size) {
5224 tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
5226 tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5227 vec_size, vec_size);
5239 /* To avoid excessive duplication of ops we implement shift
5240 * by immediate using the variable shift operations.
5242 imm = dup_const(size, shift);
5244 for (pass = 0; pass < count; pass++) {
5246 neon_load_reg64(cpu_V0, rm + pass);
5247 tcg_gen_movi_i64(cpu_V1, imm);
5252 gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5254 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5256 case 6: /* VQSHLU */
5257 gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5262 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5265 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5270 g_assert_not_reached();
5274 neon_load_reg64(cpu_V1, rd + pass);
5275 tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5277 neon_store_reg64(cpu_V0, rd + pass);
5278 } else { /* size < 3 */
5279 /* Operands in T0 and T1. */
5280 tmp = neon_load_reg(rm, pass);
5281 tmp2 = tcg_temp_new_i32();
5282 tcg_gen_movi_i32(tmp2, imm);
5286 GEN_NEON_INTEGER_OP(rshl);
5288 case 6: /* VQSHLU */
5291 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5295 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5299 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5307 GEN_NEON_INTEGER_OP_ENV(qshl);
5310 g_assert_not_reached();
5312 tcg_temp_free_i32(tmp2);
5316 tmp2 = neon_load_reg(rd, pass);
5317 gen_neon_add(size, tmp, tmp2);
5318 tcg_temp_free_i32(tmp2);
5320 neon_store_reg(rd, pass, tmp);
5323 } else if (op < 10) {
5324 /* Shift by immediate and narrow:
5325 VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
5326 int input_unsigned = (op == 8) ? !u : u;
5330 shift = shift - (1 << (size + 3));
5333 tmp64 = tcg_const_i64(shift);
5334 neon_load_reg64(cpu_V0, rm);
5335 neon_load_reg64(cpu_V1, rm + 1);
5336 for (pass = 0; pass < 2; pass++) {
5344 if (input_unsigned) {
5345 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5347 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5350 if (input_unsigned) {
5351 gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5353 gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5356 tmp = tcg_temp_new_i32();
5357 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5358 neon_store_reg(rd, pass, tmp);
5360 tcg_temp_free_i64(tmp64);
5363 imm = (uint16_t)shift;
5367 imm = (uint32_t)shift;
5369 tmp2 = tcg_const_i32(imm);
5370 tmp4 = neon_load_reg(rm + 1, 0);
5371 tmp5 = neon_load_reg(rm + 1, 1);
5372 for (pass = 0; pass < 2; pass++) {
5374 tmp = neon_load_reg(rm, 0);
5378 gen_neon_shift_narrow(size, tmp, tmp2, q,
5381 tmp3 = neon_load_reg(rm, 1);
5385 gen_neon_shift_narrow(size, tmp3, tmp2, q,
5387 tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5388 tcg_temp_free_i32(tmp);
5389 tcg_temp_free_i32(tmp3);
5390 tmp = tcg_temp_new_i32();
5391 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5392 neon_store_reg(rd, pass, tmp);
5394 tcg_temp_free_i32(tmp2);
5396 } else if (op == 10) {
5398 if (q || (rd & 1)) {
5401 tmp = neon_load_reg(rm, 0);
5402 tmp2 = neon_load_reg(rm, 1);
5403 for (pass = 0; pass < 2; pass++) {
5407 gen_neon_widen(cpu_V0, tmp, size, u);
5410 /* The shift is less than the width of the source
5411 type, so we can just shift the whole register. */
5412 tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5413 /* Widen the result of shift: we need to clear
5414 * the potential overflow bits resulting from
5415 * left bits of the narrow input appearing as
5416 * right bits of left the neighbour narrow
5418 if (size < 2 || !u) {
5421 imm = (0xffu >> (8 - shift));
5423 } else if (size == 1) {
5424 imm = 0xffff >> (16 - shift);
5427 imm = 0xffffffff >> (32 - shift);
5430 imm64 = imm | (((uint64_t)imm) << 32);
5434 tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5437 neon_store_reg64(cpu_V0, rd + pass);
5439 } else if (op >= 14) {
5440 /* VCVT fixed-point. */
5443 VFPGenFixPointFn *fn;
5445 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5451 fn = gen_helper_vfp_ultos;
5453 fn = gen_helper_vfp_sltos;
5457 fn = gen_helper_vfp_touls_round_to_zero;
5459 fn = gen_helper_vfp_tosls_round_to_zero;
5463 /* We have already masked out the must-be-1 top bit of imm6,
5464 * hence this 32-shift where the ARM ARM has 64-imm6.
5467 fpst = get_fpstatus_ptr(1);
5468 shiftv = tcg_const_i32(shift);
5469 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5470 TCGv_i32 tmpf = neon_load_reg(rm, pass);
5471 fn(tmpf, tmpf, shiftv, fpst);
5472 neon_store_reg(rd, pass, tmpf);
5474 tcg_temp_free_ptr(fpst);
5475 tcg_temp_free_i32(shiftv);
5479 } else { /* (insn & 0x00380080) == 0 */
5480 int invert, reg_ofs, vec_size;
5482 if (q && (rd & 1)) {
5486 op = (insn >> 8) & 0xf;
5487 /* One register and immediate. */
5488 imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5489 invert = (insn & (1 << 5)) != 0;
5490 /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5491 * We choose to not special-case this and will behave as if a
5492 * valid constant encoding of 0 had been given.
5511 imm = (imm << 8) | (imm << 24);
5514 imm = (imm << 8) | 0xff;
5517 imm = (imm << 16) | 0xffff;
5520 imm |= (imm << 8) | (imm << 16) | (imm << 24);
5529 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5530 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5537 reg_ofs = neon_reg_offset(rd, 0);
5538 vec_size = q ? 16 : 8;
5540 if (op & 1 && op < 12) {
5542 /* The immediate value has already been inverted,
5543 * so BIC becomes AND.
5545 tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
5546 vec_size, vec_size);
5548 tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
5549 vec_size, vec_size);
5553 if (op == 14 && invert) {
5554 TCGv_i64 t64 = tcg_temp_new_i64();
5556 for (pass = 0; pass <= q; ++pass) {
5560 for (n = 0; n < 8; n++) {
5561 if (imm & (1 << (n + pass * 8))) {
5562 val |= 0xffull << (n * 8);
5565 tcg_gen_movi_i64(t64, val);
5566 neon_store_reg64(t64, rd + pass);
5568 tcg_temp_free_i64(t64);
5570 tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
5574 } else { /* (insn & 0x00800010 == 0x00800000) */
5576 op = (insn >> 8) & 0xf;
5577 if ((insn & (1 << 6)) == 0) {
5578 /* Three registers of different lengths. */
5582 /* undefreq: bit 0 : UNDEF if size == 0
5583 * bit 1 : UNDEF if size == 1
5584 * bit 2 : UNDEF if size == 2
5585 * bit 3 : UNDEF if U == 1
5586 * Note that [2:0] set implies 'always UNDEF'
5589 /* prewiden, src1_wide, src2_wide, undefreq */
5590 static const int neon_3reg_wide[16][4] = {
5591 {1, 0, 0, 0}, /* VADDL */
5592 {1, 1, 0, 0}, /* VADDW */
5593 {1, 0, 0, 0}, /* VSUBL */
5594 {1, 1, 0, 0}, /* VSUBW */
5595 {0, 1, 1, 0}, /* VADDHN */
5596 {0, 0, 0, 0}, /* VABAL */
5597 {0, 1, 1, 0}, /* VSUBHN */
5598 {0, 0, 0, 0}, /* VABDL */
5599 {0, 0, 0, 0}, /* VMLAL */
5600 {0, 0, 0, 9}, /* VQDMLAL */
5601 {0, 0, 0, 0}, /* VMLSL */
5602 {0, 0, 0, 9}, /* VQDMLSL */
5603 {0, 0, 0, 0}, /* Integer VMULL */
5604 {0, 0, 0, 1}, /* VQDMULL */
5605 {0, 0, 0, 0xa}, /* Polynomial VMULL */
5606 {0, 0, 0, 7}, /* Reserved: always UNDEF */
5609 prewiden = neon_3reg_wide[op][0];
5610 src1_wide = neon_3reg_wide[op][1];
5611 src2_wide = neon_3reg_wide[op][2];
5612 undefreq = neon_3reg_wide[op][3];
5614 if ((undefreq & (1 << size)) ||
5615 ((undefreq & 8) && u)) {
5618 if ((src1_wide && (rn & 1)) ||
5619 (src2_wide && (rm & 1)) ||
5620 (!src2_wide && (rd & 1))) {
5624 /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
5625 * outside the loop below as it only performs a single pass.
5627 if (op == 14 && size == 2) {
5628 TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
5630 if (!dc_isar_feature(aa32_pmull, s)) {
5633 tcg_rn = tcg_temp_new_i64();
5634 tcg_rm = tcg_temp_new_i64();
5635 tcg_rd = tcg_temp_new_i64();
5636 neon_load_reg64(tcg_rn, rn);
5637 neon_load_reg64(tcg_rm, rm);
5638 gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
5639 neon_store_reg64(tcg_rd, rd);
5640 gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
5641 neon_store_reg64(tcg_rd, rd + 1);
5642 tcg_temp_free_i64(tcg_rn);
5643 tcg_temp_free_i64(tcg_rm);
5644 tcg_temp_free_i64(tcg_rd);
5648 /* Avoid overlapping operands. Wide source operands are
5649 always aligned so will never overlap with wide
5650 destinations in problematic ways. */
5651 if (rd == rm && !src2_wide) {
5652 tmp = neon_load_reg(rm, 1);
5653 neon_store_scratch(2, tmp);
5654 } else if (rd == rn && !src1_wide) {
5655 tmp = neon_load_reg(rn, 1);
5656 neon_store_scratch(2, tmp);
5659 for (pass = 0; pass < 2; pass++) {
5661 neon_load_reg64(cpu_V0, rn + pass);
5664 if (pass == 1 && rd == rn) {
5665 tmp = neon_load_scratch(2);
5667 tmp = neon_load_reg(rn, pass);
5670 gen_neon_widen(cpu_V0, tmp, size, u);
5674 neon_load_reg64(cpu_V1, rm + pass);
5677 if (pass == 1 && rd == rm) {
5678 tmp2 = neon_load_scratch(2);
5680 tmp2 = neon_load_reg(rm, pass);
5683 gen_neon_widen(cpu_V1, tmp2, size, u);
5687 case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
5688 gen_neon_addl(size);
5690 case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
5691 gen_neon_subl(size);
5693 case 5: case 7: /* VABAL, VABDL */
5694 switch ((size << 1) | u) {
5696 gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
5699 gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
5702 gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
5705 gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
5708 gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
5711 gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
5715 tcg_temp_free_i32(tmp2);
5716 tcg_temp_free_i32(tmp);
5718 case 8: case 9: case 10: case 11: case 12: case 13:
5719 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
5720 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5722 case 14: /* Polynomial VMULL */
5723 gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
5724 tcg_temp_free_i32(tmp2);
5725 tcg_temp_free_i32(tmp);
5727 default: /* 15 is RESERVED: caught earlier */
5732 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5733 neon_store_reg64(cpu_V0, rd + pass);
5734 } else if (op == 5 || (op >= 8 && op <= 11)) {
5736 neon_load_reg64(cpu_V1, rd + pass);
5738 case 10: /* VMLSL */
5739 gen_neon_negl(cpu_V0, size);
5741 case 5: case 8: /* VABAL, VMLAL */
5742 gen_neon_addl(size);
5744 case 9: case 11: /* VQDMLAL, VQDMLSL */
5745 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5747 gen_neon_negl(cpu_V0, size);
5749 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5754 neon_store_reg64(cpu_V0, rd + pass);
5755 } else if (op == 4 || op == 6) {
5756 /* Narrowing operation. */
5757 tmp = tcg_temp_new_i32();
5761 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
5764 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
5767 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
5774 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
5777 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
5780 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
5781 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
5789 neon_store_reg(rd, 0, tmp3);
5790 neon_store_reg(rd, 1, tmp);
5793 /* Write back the result. */
5794 neon_store_reg64(cpu_V0, rd + pass);
5798 /* Two registers and a scalar. NB that for ops of this form
5799 * the ARM ARM labels bit 24 as Q, but it is in our variable
5806 case 1: /* Float VMLA scalar */
5807 case 5: /* Floating point VMLS scalar */
5808 case 9: /* Floating point VMUL scalar */
5813 case 0: /* Integer VMLA scalar */
5814 case 4: /* Integer VMLS scalar */
5815 case 8: /* Integer VMUL scalar */
5816 case 12: /* VQDMULH scalar */
5817 case 13: /* VQRDMULH scalar */
5818 if (u && ((rd | rn) & 1)) {
5821 tmp = neon_get_scalar(size, rm);
5822 neon_store_scratch(0, tmp);
5823 for (pass = 0; pass < (u ? 4 : 2); pass++) {
5824 tmp = neon_load_scratch(0);
5825 tmp2 = neon_load_reg(rn, pass);
5828 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5830 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5832 } else if (op == 13) {
5834 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5836 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5838 } else if (op & 1) {
5839 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5840 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5841 tcg_temp_free_ptr(fpstatus);
5844 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5845 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5846 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5850 tcg_temp_free_i32(tmp2);
5853 tmp2 = neon_load_reg(rd, pass);
5856 gen_neon_add(size, tmp, tmp2);
5860 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5861 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5862 tcg_temp_free_ptr(fpstatus);
5866 gen_neon_rsb(size, tmp, tmp2);
5870 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5871 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5872 tcg_temp_free_ptr(fpstatus);
5878 tcg_temp_free_i32(tmp2);
5880 neon_store_reg(rd, pass, tmp);
5883 case 3: /* VQDMLAL scalar */
5884 case 7: /* VQDMLSL scalar */
5885 case 11: /* VQDMULL scalar */
5890 case 2: /* VMLAL sclar */
5891 case 6: /* VMLSL scalar */
5892 case 10: /* VMULL scalar */
5896 tmp2 = neon_get_scalar(size, rm);
5897 /* We need a copy of tmp2 because gen_neon_mull
5898 * deletes it during pass 0. */
5899 tmp4 = tcg_temp_new_i32();
5900 tcg_gen_mov_i32(tmp4, tmp2);
5901 tmp3 = neon_load_reg(rn, 1);
5903 for (pass = 0; pass < 2; pass++) {
5905 tmp = neon_load_reg(rn, 0);
5910 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5912 neon_load_reg64(cpu_V1, rd + pass);
5916 gen_neon_negl(cpu_V0, size);
5919 gen_neon_addl(size);
5922 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5924 gen_neon_negl(cpu_V0, size);
5926 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5932 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5937 neon_store_reg64(cpu_V0, rd + pass);
5940 case 14: /* VQRDMLAH scalar */
5941 case 15: /* VQRDMLSH scalar */
5943 NeonGenThreeOpEnvFn *fn;
5945 if (!dc_isar_feature(aa32_rdm, s)) {
5948 if (u && ((rd | rn) & 1)) {
5953 fn = gen_helper_neon_qrdmlah_s16;
5955 fn = gen_helper_neon_qrdmlah_s32;
5959 fn = gen_helper_neon_qrdmlsh_s16;
5961 fn = gen_helper_neon_qrdmlsh_s32;
5965 tmp2 = neon_get_scalar(size, rm);
5966 for (pass = 0; pass < (u ? 4 : 2); pass++) {
5967 tmp = neon_load_reg(rn, pass);
5968 tmp3 = neon_load_reg(rd, pass);
5969 fn(tmp, cpu_env, tmp, tmp2, tmp3);
5970 tcg_temp_free_i32(tmp3);
5971 neon_store_reg(rd, pass, tmp);
5973 tcg_temp_free_i32(tmp2);
5977 g_assert_not_reached();
5980 } else { /* size == 3 */
5983 imm = (insn >> 8) & 0xf;
5988 if (q && ((rd | rn | rm) & 1)) {
5993 neon_load_reg64(cpu_V0, rn);
5995 neon_load_reg64(cpu_V1, rn + 1);
5997 } else if (imm == 8) {
5998 neon_load_reg64(cpu_V0, rn + 1);
6000 neon_load_reg64(cpu_V1, rm);
6003 tmp64 = tcg_temp_new_i64();
6005 neon_load_reg64(cpu_V0, rn);
6006 neon_load_reg64(tmp64, rn + 1);
6008 neon_load_reg64(cpu_V0, rn + 1);
6009 neon_load_reg64(tmp64, rm);
6011 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6012 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6013 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6015 neon_load_reg64(cpu_V1, rm);
6017 neon_load_reg64(cpu_V1, rm + 1);
6020 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6021 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6022 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6023 tcg_temp_free_i64(tmp64);
6026 neon_load_reg64(cpu_V0, rn);
6027 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6028 neon_load_reg64(cpu_V1, rm);
6029 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6030 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6032 neon_store_reg64(cpu_V0, rd);
6034 neon_store_reg64(cpu_V1, rd + 1);
6036 } else if ((insn & (1 << 11)) == 0) {
6037 /* Two register misc. */
6038 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6039 size = (insn >> 18) & 3;
6040 /* UNDEF for unknown op values and bad op-size combinations */
6041 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6044 if (neon_2rm_is_v8_op(op) &&
6045 !arm_dc_feature(s, ARM_FEATURE_V8)) {
6048 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6049 q && ((rm | rd) & 1)) {
6053 case NEON_2RM_VREV64:
6054 for (pass = 0; pass < (q ? 2 : 1); pass++) {
6055 tmp = neon_load_reg(rm, pass * 2);
6056 tmp2 = neon_load_reg(rm, pass * 2 + 1);
6058 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6059 case 1: gen_swap_half(tmp); break;
6060 case 2: /* no-op */ break;
6063 neon_store_reg(rd, pass * 2 + 1, tmp);
6065 neon_store_reg(rd, pass * 2, tmp2);
6068 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6069 case 1: gen_swap_half(tmp2); break;
6072 neon_store_reg(rd, pass * 2, tmp2);
6076 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6077 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6078 for (pass = 0; pass < q + 1; pass++) {
6079 tmp = neon_load_reg(rm, pass * 2);
6080 gen_neon_widen(cpu_V0, tmp, size, op & 1);
6081 tmp = neon_load_reg(rm, pass * 2 + 1);
6082 gen_neon_widen(cpu_V1, tmp, size, op & 1);
6084 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6085 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6086 case 2: tcg_gen_add_i64(CPU_V001); break;
6089 if (op >= NEON_2RM_VPADAL) {
6091 neon_load_reg64(cpu_V1, rd + pass);
6092 gen_neon_addl(size);
6094 neon_store_reg64(cpu_V0, rd + pass);
6100 for (n = 0; n < (q ? 4 : 2); n += 2) {
6101 tmp = neon_load_reg(rm, n);
6102 tmp2 = neon_load_reg(rd, n + 1);
6103 neon_store_reg(rm, n, tmp2);
6104 neon_store_reg(rd, n + 1, tmp);
6111 if (gen_neon_unzip(rd, rm, size, q)) {
6116 if (gen_neon_zip(rd, rm, size, q)) {
6120 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6121 /* also VQMOVUN; op field and mnemonics don't line up */
6126 for (pass = 0; pass < 2; pass++) {
6127 neon_load_reg64(cpu_V0, rm + pass);
6128 tmp = tcg_temp_new_i32();
6129 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6134 neon_store_reg(rd, 0, tmp2);
6135 neon_store_reg(rd, 1, tmp);
6139 case NEON_2RM_VSHLL:
6140 if (q || (rd & 1)) {
6143 tmp = neon_load_reg(rm, 0);
6144 tmp2 = neon_load_reg(rm, 1);
6145 for (pass = 0; pass < 2; pass++) {
6148 gen_neon_widen(cpu_V0, tmp, size, 1);
6149 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6150 neon_store_reg64(cpu_V0, rd + pass);
6153 case NEON_2RM_VCVT_F16_F32:
6158 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6162 fpst = get_fpstatus_ptr(true);
6163 ahp = get_ahp_flag();
6164 tmp = neon_load_reg(rm, 0);
6165 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6166 tmp2 = neon_load_reg(rm, 1);
6167 gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6168 tcg_gen_shli_i32(tmp2, tmp2, 16);
6169 tcg_gen_or_i32(tmp2, tmp2, tmp);
6170 tcg_temp_free_i32(tmp);
6171 tmp = neon_load_reg(rm, 2);
6172 gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6173 tmp3 = neon_load_reg(rm, 3);
6174 neon_store_reg(rd, 0, tmp2);
6175 gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6176 tcg_gen_shli_i32(tmp3, tmp3, 16);
6177 tcg_gen_or_i32(tmp3, tmp3, tmp);
6178 neon_store_reg(rd, 1, tmp3);
6179 tcg_temp_free_i32(tmp);
6180 tcg_temp_free_i32(ahp);
6181 tcg_temp_free_ptr(fpst);
6184 case NEON_2RM_VCVT_F32_F16:
6188 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6192 fpst = get_fpstatus_ptr(true);
6193 ahp = get_ahp_flag();
6194 tmp3 = tcg_temp_new_i32();
6195 tmp = neon_load_reg(rm, 0);
6196 tmp2 = neon_load_reg(rm, 1);
6197 tcg_gen_ext16u_i32(tmp3, tmp);
6198 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6199 neon_store_reg(rd, 0, tmp3);
6200 tcg_gen_shri_i32(tmp, tmp, 16);
6201 gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6202 neon_store_reg(rd, 1, tmp);
6203 tmp3 = tcg_temp_new_i32();
6204 tcg_gen_ext16u_i32(tmp3, tmp2);
6205 gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6206 neon_store_reg(rd, 2, tmp3);
6207 tcg_gen_shri_i32(tmp2, tmp2, 16);
6208 gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6209 neon_store_reg(rd, 3, tmp2);
6210 tcg_temp_free_i32(ahp);
6211 tcg_temp_free_ptr(fpst);
6214 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6215 if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6218 ptr1 = vfp_reg_ptr(true, rd);
6219 ptr2 = vfp_reg_ptr(true, rm);
6221 /* Bit 6 is the lowest opcode bit; it distinguishes between
6222 * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6224 tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6226 if (op == NEON_2RM_AESE) {
6227 gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6229 gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6231 tcg_temp_free_ptr(ptr1);
6232 tcg_temp_free_ptr(ptr2);
6233 tcg_temp_free_i32(tmp3);
6235 case NEON_2RM_SHA1H:
6236 if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6239 ptr1 = vfp_reg_ptr(true, rd);
6240 ptr2 = vfp_reg_ptr(true, rm);
6242 gen_helper_crypto_sha1h(ptr1, ptr2);
6244 tcg_temp_free_ptr(ptr1);
6245 tcg_temp_free_ptr(ptr2);
6247 case NEON_2RM_SHA1SU1:
6248 if ((rm | rd) & 1) {
6251 /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6253 if (!dc_isar_feature(aa32_sha2, s)) {
6256 } else if (!dc_isar_feature(aa32_sha1, s)) {
6259 ptr1 = vfp_reg_ptr(true, rd);
6260 ptr2 = vfp_reg_ptr(true, rm);
6262 gen_helper_crypto_sha256su0(ptr1, ptr2);
6264 gen_helper_crypto_sha1su1(ptr1, ptr2);
6266 tcg_temp_free_ptr(ptr1);
6267 tcg_temp_free_ptr(ptr2);
6271 tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6274 tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6277 tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6282 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6283 tmp = neon_load_reg(rm, pass);
6285 case NEON_2RM_VREV32:
6287 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6288 case 1: gen_swap_half(tmp); break;
6292 case NEON_2RM_VREV16:
6293 gen_rev16(tmp, tmp);
6297 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6298 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6299 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6305 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6306 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6307 case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6312 gen_helper_neon_cnt_u8(tmp, tmp);
6314 case NEON_2RM_VQABS:
6317 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6320 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6323 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6328 case NEON_2RM_VQNEG:
6331 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6334 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6337 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6342 case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
6343 tmp2 = tcg_const_i32(0);
6345 case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
6346 case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
6347 case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
6350 tcg_temp_free_i32(tmp2);
6351 if (op == NEON_2RM_VCLE0) {
6352 tcg_gen_not_i32(tmp, tmp);
6355 case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
6356 tmp2 = tcg_const_i32(0);
6358 case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
6359 case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
6360 case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
6363 tcg_temp_free_i32(tmp2);
6364 if (op == NEON_2RM_VCLT0) {
6365 tcg_gen_not_i32(tmp, tmp);
6368 case NEON_2RM_VCEQ0:
6369 tmp2 = tcg_const_i32(0);
6371 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
6372 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
6373 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
6376 tcg_temp_free_i32(tmp2);
6378 case NEON_2RM_VCGT0_F:
6380 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6381 tmp2 = tcg_const_i32(0);
6382 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6383 tcg_temp_free_i32(tmp2);
6384 tcg_temp_free_ptr(fpstatus);
6387 case NEON_2RM_VCGE0_F:
6389 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6390 tmp2 = tcg_const_i32(0);
6391 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6392 tcg_temp_free_i32(tmp2);
6393 tcg_temp_free_ptr(fpstatus);
6396 case NEON_2RM_VCEQ0_F:
6398 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6399 tmp2 = tcg_const_i32(0);
6400 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6401 tcg_temp_free_i32(tmp2);
6402 tcg_temp_free_ptr(fpstatus);
6405 case NEON_2RM_VCLE0_F:
6407 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6408 tmp2 = tcg_const_i32(0);
6409 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6410 tcg_temp_free_i32(tmp2);
6411 tcg_temp_free_ptr(fpstatus);
6414 case NEON_2RM_VCLT0_F:
6416 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6417 tmp2 = tcg_const_i32(0);
6418 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6419 tcg_temp_free_i32(tmp2);
6420 tcg_temp_free_ptr(fpstatus);
6423 case NEON_2RM_VABS_F:
6424 gen_helper_vfp_abss(tmp, tmp);
6426 case NEON_2RM_VNEG_F:
6427 gen_helper_vfp_negs(tmp, tmp);
6430 tmp2 = neon_load_reg(rd, pass);
6431 neon_store_reg(rm, pass, tmp2);
6434 tmp2 = neon_load_reg(rd, pass);
6436 case 0: gen_neon_trn_u8(tmp, tmp2); break;
6437 case 1: gen_neon_trn_u16(tmp, tmp2); break;
6440 neon_store_reg(rm, pass, tmp2);
6442 case NEON_2RM_VRINTN:
6443 case NEON_2RM_VRINTA:
6444 case NEON_2RM_VRINTM:
6445 case NEON_2RM_VRINTP:
6446 case NEON_2RM_VRINTZ:
6449 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6452 if (op == NEON_2RM_VRINTZ) {
6453 rmode = FPROUNDING_ZERO;
6455 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6458 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6459 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6461 gen_helper_rints(tmp, tmp, fpstatus);
6462 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6464 tcg_temp_free_ptr(fpstatus);
6465 tcg_temp_free_i32(tcg_rmode);
6468 case NEON_2RM_VRINTX:
6470 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6471 gen_helper_rints_exact(tmp, tmp, fpstatus);
6472 tcg_temp_free_ptr(fpstatus);
6475 case NEON_2RM_VCVTAU:
6476 case NEON_2RM_VCVTAS:
6477 case NEON_2RM_VCVTNU:
6478 case NEON_2RM_VCVTNS:
6479 case NEON_2RM_VCVTPU:
6480 case NEON_2RM_VCVTPS:
6481 case NEON_2RM_VCVTMU:
6482 case NEON_2RM_VCVTMS:
6484 bool is_signed = !extract32(insn, 7, 1);
6485 TCGv_ptr fpst = get_fpstatus_ptr(1);
6486 TCGv_i32 tcg_rmode, tcg_shift;
6487 int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6489 tcg_shift = tcg_const_i32(0);
6490 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6491 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6495 gen_helper_vfp_tosls(tmp, tmp,
6498 gen_helper_vfp_touls(tmp, tmp,
6502 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6504 tcg_temp_free_i32(tcg_rmode);
6505 tcg_temp_free_i32(tcg_shift);
6506 tcg_temp_free_ptr(fpst);
6509 case NEON_2RM_VRECPE:
6511 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6512 gen_helper_recpe_u32(tmp, tmp, fpstatus);
6513 tcg_temp_free_ptr(fpstatus);
6516 case NEON_2RM_VRSQRTE:
6518 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6519 gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
6520 tcg_temp_free_ptr(fpstatus);
6523 case NEON_2RM_VRECPE_F:
6525 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6526 gen_helper_recpe_f32(tmp, tmp, fpstatus);
6527 tcg_temp_free_ptr(fpstatus);
6530 case NEON_2RM_VRSQRTE_F:
6532 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6533 gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6534 tcg_temp_free_ptr(fpstatus);
6537 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6539 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6540 gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6541 tcg_temp_free_ptr(fpstatus);
6544 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6546 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6547 gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6548 tcg_temp_free_ptr(fpstatus);
6551 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6553 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6554 gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6555 tcg_temp_free_ptr(fpstatus);
6558 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6560 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6561 gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6562 tcg_temp_free_ptr(fpstatus);
6566 /* Reserved op values were caught by the
6567 * neon_2rm_sizes[] check earlier.
6571 neon_store_reg(rd, pass, tmp);
6575 } else if ((insn & (1 << 10)) == 0) {
6577 int n = ((insn >> 8) & 3) + 1;
6578 if ((rn + n) > 32) {
6579 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6580 * helper function running off the end of the register file.
6585 if (insn & (1 << 6)) {
6586 tmp = neon_load_reg(rd, 0);
6588 tmp = tcg_temp_new_i32();
6589 tcg_gen_movi_i32(tmp, 0);
6591 tmp2 = neon_load_reg(rm, 0);
6592 ptr1 = vfp_reg_ptr(true, rn);
6593 tmp5 = tcg_const_i32(n);
6594 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6595 tcg_temp_free_i32(tmp);
6596 if (insn & (1 << 6)) {
6597 tmp = neon_load_reg(rd, 1);
6599 tmp = tcg_temp_new_i32();
6600 tcg_gen_movi_i32(tmp, 0);
6602 tmp3 = neon_load_reg(rm, 1);
6603 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6604 tcg_temp_free_i32(tmp5);
6605 tcg_temp_free_ptr(ptr1);
6606 neon_store_reg(rd, 0, tmp2);
6607 neon_store_reg(rd, 1, tmp3);
6608 tcg_temp_free_i32(tmp);
6609 } else if ((insn & 0x380) == 0) {
6614 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6617 if (insn & (1 << 16)) {
6619 element = (insn >> 17) & 7;
6620 } else if (insn & (1 << 17)) {
6622 element = (insn >> 18) & 3;
6625 element = (insn >> 19) & 1;
6627 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6628 neon_element_offset(rm, element, size),
6629 q ? 16 : 8, q ? 16 : 8);
6638 /* Advanced SIMD three registers of the same length extension.
6639 * 31 25 23 22 20 16 12 11 10 9 8 3 0
6640 * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
6641 * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
6642 * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
6644 static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
6646 gen_helper_gvec_3 *fn_gvec = NULL;
6647 gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
6648 int rd, rn, rm, opr_sz;
6651 bool is_long = false, q = extract32(insn, 6, 1);
6652 bool ptr_is_env = false;
6654 if ((insn & 0xfe200f10) == 0xfc200800) {
6655 /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
6656 int size = extract32(insn, 20, 1);
6657 data = extract32(insn, 23, 2); /* rot */
6658 if (!dc_isar_feature(aa32_vcma, s)
6659 || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
6662 fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
6663 } else if ((insn & 0xfea00f10) == 0xfc800800) {
6664 /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
6665 int size = extract32(insn, 20, 1);
6666 data = extract32(insn, 24, 1); /* rot */
6667 if (!dc_isar_feature(aa32_vcma, s)
6668 || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
6671 fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
6672 } else if ((insn & 0xfeb00f00) == 0xfc200d00) {
6673 /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
6674 bool u = extract32(insn, 4, 1);
6675 if (!dc_isar_feature(aa32_dp, s)) {
6678 fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
6679 } else if ((insn & 0xff300f10) == 0xfc200810) {
6680 /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
6681 int is_s = extract32(insn, 23, 1);
6682 if (!dc_isar_feature(aa32_fhm, s)) {
6686 data = is_s; /* is_2 == 0 */
6687 fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
6693 VFP_DREG_D(rd, insn);
6697 if (q || !is_long) {
6698 VFP_DREG_N(rn, insn);
6699 VFP_DREG_M(rm, insn);
6700 if ((rn | rm) & q & !is_long) {
6703 off_rn = vfp_reg_offset(1, rn);
6704 off_rm = vfp_reg_offset(1, rm);
6706 rn = VFP_SREG_N(insn);
6707 rm = VFP_SREG_M(insn);
6708 off_rn = vfp_reg_offset(0, rn);
6709 off_rm = vfp_reg_offset(0, rm);
6712 if (s->fp_excp_el) {
6713 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
6714 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
6717 if (!s->vfp_enabled) {
6721 opr_sz = (1 + q) * 8;
6727 ptr = get_fpstatus_ptr(1);
6729 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
6730 opr_sz, opr_sz, data, fn_gvec_ptr);
6732 tcg_temp_free_ptr(ptr);
6735 tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
6736 opr_sz, opr_sz, data, fn_gvec);
6741 /* Advanced SIMD two registers and a scalar extension.
6742 * 31 24 23 22 20 16 12 11 10 9 8 3 0
6743 * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
6744 * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
6745 * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
6749 static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
6751 gen_helper_gvec_3 *fn_gvec = NULL;
6752 gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
6753 int rd, rn, rm, opr_sz, data;
6755 bool is_long = false, q = extract32(insn, 6, 1);
6756 bool ptr_is_env = false;
6758 if ((insn & 0xff000f10) == 0xfe000800) {
6759 /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
6760 int rot = extract32(insn, 20, 2);
6761 int size = extract32(insn, 23, 1);
6764 if (!dc_isar_feature(aa32_vcma, s)) {
6768 if (!dc_isar_feature(aa32_fp16_arith, s)) {
6771 /* For fp16, rm is just Vm, and index is M. */
6772 rm = extract32(insn, 0, 4);
6773 index = extract32(insn, 5, 1);
6775 /* For fp32, rm is the usual M:Vm, and index is 0. */
6776 VFP_DREG_M(rm, insn);
6779 data = (index << 2) | rot;
6780 fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
6781 : gen_helper_gvec_fcmlah_idx);
6782 } else if ((insn & 0xffb00f00) == 0xfe200d00) {
6783 /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
6784 int u = extract32(insn, 4, 1);
6786 if (!dc_isar_feature(aa32_dp, s)) {
6789 fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
6790 /* rm is just Vm, and index is M. */
6791 data = extract32(insn, 5, 1); /* index */
6792 rm = extract32(insn, 0, 4);
6793 } else if ((insn & 0xffa00f10) == 0xfe000810) {
6794 /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
6795 int is_s = extract32(insn, 20, 1);
6796 int vm20 = extract32(insn, 0, 3);
6797 int vm3 = extract32(insn, 3, 1);
6798 int m = extract32(insn, 5, 1);
6801 if (!dc_isar_feature(aa32_fhm, s)) {
6806 index = m * 2 + vm3;
6812 data = (index << 2) | is_s; /* is_2 == 0 */
6813 fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
6819 VFP_DREG_D(rd, insn);
6823 if (q || !is_long) {
6824 VFP_DREG_N(rn, insn);
6825 if (rn & q & !is_long) {
6828 off_rn = vfp_reg_offset(1, rn);
6829 off_rm = vfp_reg_offset(1, rm);
6831 rn = VFP_SREG_N(insn);
6832 off_rn = vfp_reg_offset(0, rn);
6833 off_rm = vfp_reg_offset(0, rm);
6835 if (s->fp_excp_el) {
6836 gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
6837 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
6840 if (!s->vfp_enabled) {
6844 opr_sz = (1 + q) * 8;
6850 ptr = get_fpstatus_ptr(1);
6852 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
6853 opr_sz, opr_sz, data, fn_gvec_ptr);
6855 tcg_temp_free_ptr(ptr);
6858 tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
6859 opr_sz, opr_sz, data, fn_gvec);
6864 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6866 int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6867 const ARMCPRegInfo *ri;
6869 cpnum = (insn >> 8) & 0xf;
6871 /* First check for coprocessor space used for XScale/iwMMXt insns */
6872 if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
6873 if (extract32(s->c15_cpar, cpnum, 1) == 0) {
6876 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
6877 return disas_iwmmxt_insn(s, insn);
6878 } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
6879 return disas_dsp_insn(s, insn);
6884 /* Otherwise treat as a generic register access */
6885 is64 = (insn & (1 << 25)) == 0;
6886 if (!is64 && ((insn & (1 << 4)) == 0)) {
6894 opc1 = (insn >> 4) & 0xf;
6896 rt2 = (insn >> 16) & 0xf;
6898 crn = (insn >> 16) & 0xf;
6899 opc1 = (insn >> 21) & 7;
6900 opc2 = (insn >> 5) & 7;
6903 isread = (insn >> 20) & 1;
6904 rt = (insn >> 12) & 0xf;
6906 ri = get_arm_cp_reginfo(s->cp_regs,
6907 ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
6909 /* Check access permissions */
6910 if (!cp_access_ok(s->current_el, ri, isread)) {
6915 (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
6916 /* Emit code to perform further access permissions checks at
6917 * runtime; this may result in an exception.
6918 * Note that on XScale all cp0..c13 registers do an access check
6919 * call in order to handle c15_cpar.
6922 TCGv_i32 tcg_syn, tcg_isread;
6925 /* Note that since we are an implementation which takes an
6926 * exception on a trapped conditional instruction only if the
6927 * instruction passes its condition code check, we can take
6928 * advantage of the clause in the ARM ARM that allows us to set
6929 * the COND field in the instruction to 0xE in all cases.
6930 * We could fish the actual condition out of the insn (ARM)
6931 * or the condexec bits (Thumb) but it isn't necessary.
6936 syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6939 syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6945 syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6948 syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6953 /* ARMv8 defines that only coprocessors 14 and 15 exist,
6954 * so this can only happen if this is an ARMv7 or earlier CPU,
6955 * in which case the syndrome information won't actually be
6958 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
6959 syndrome = syn_uncategorized();
6963 gen_set_condexec(s);
6964 gen_set_pc_im(s, s->pc_curr);
6965 tmpptr = tcg_const_ptr(ri);
6966 tcg_syn = tcg_const_i32(syndrome);
6967 tcg_isread = tcg_const_i32(isread);
6968 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
6970 tcg_temp_free_ptr(tmpptr);
6971 tcg_temp_free_i32(tcg_syn);
6972 tcg_temp_free_i32(tcg_isread);
6973 } else if (ri->type & ARM_CP_RAISES_EXC) {
6975 * The readfn or writefn might raise an exception;
6976 * synchronize the CPU state in case it does.
6978 gen_set_condexec(s);
6979 gen_set_pc_im(s, s->pc_curr);
6982 /* Handle special cases first */
6983 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
6990 gen_set_pc_im(s, s->base.pc_next);
6991 s->base.is_jmp = DISAS_WFI;
6997 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7006 if (ri->type & ARM_CP_CONST) {
7007 tmp64 = tcg_const_i64(ri->resetvalue);
7008 } else if (ri->readfn) {
7010 tmp64 = tcg_temp_new_i64();
7011 tmpptr = tcg_const_ptr(ri);
7012 gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7013 tcg_temp_free_ptr(tmpptr);
7015 tmp64 = tcg_temp_new_i64();
7016 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7018 tmp = tcg_temp_new_i32();
7019 tcg_gen_extrl_i64_i32(tmp, tmp64);
7020 store_reg(s, rt, tmp);
7021 tmp = tcg_temp_new_i32();
7022 tcg_gen_extrh_i64_i32(tmp, tmp64);
7023 tcg_temp_free_i64(tmp64);
7024 store_reg(s, rt2, tmp);
7027 if (ri->type & ARM_CP_CONST) {
7028 tmp = tcg_const_i32(ri->resetvalue);
7029 } else if (ri->readfn) {
7031 tmp = tcg_temp_new_i32();
7032 tmpptr = tcg_const_ptr(ri);
7033 gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7034 tcg_temp_free_ptr(tmpptr);
7036 tmp = load_cpu_offset(ri->fieldoffset);
7039 /* Destination register of r15 for 32 bit loads sets
7040 * the condition codes from the high 4 bits of the value
7043 tcg_temp_free_i32(tmp);
7045 store_reg(s, rt, tmp);
7050 if (ri->type & ARM_CP_CONST) {
7051 /* If not forbidden by access permissions, treat as WI */
7056 TCGv_i32 tmplo, tmphi;
7057 TCGv_i64 tmp64 = tcg_temp_new_i64();
7058 tmplo = load_reg(s, rt);
7059 tmphi = load_reg(s, rt2);
7060 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7061 tcg_temp_free_i32(tmplo);
7062 tcg_temp_free_i32(tmphi);
7064 TCGv_ptr tmpptr = tcg_const_ptr(ri);
7065 gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7066 tcg_temp_free_ptr(tmpptr);
7068 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7070 tcg_temp_free_i64(tmp64);
7075 tmp = load_reg(s, rt);
7076 tmpptr = tcg_const_ptr(ri);
7077 gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7078 tcg_temp_free_ptr(tmpptr);
7079 tcg_temp_free_i32(tmp);
7081 TCGv_i32 tmp = load_reg(s, rt);
7082 store_cpu_offset(tmp, ri->fieldoffset);
7087 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7088 /* I/O operations must end the TB here (whether read or write) */
7090 } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7091 /* We default to ending the TB on a coprocessor register write,
7092 * but allow this to be suppressed by the register definition
7093 * (usually only necessary to work around guest bugs).
7101 /* Unknown register; this might be a guest error or a QEMU
7102 * unimplemented feature.
7105 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7106 "64 bit system register cp:%d opc1: %d crm:%d "
7108 isread ? "read" : "write", cpnum, opc1, crm,
7109 s->ns ? "non-secure" : "secure");
7111 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7112 "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7114 isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7115 s->ns ? "non-secure" : "secure");
7122 /* Store a 64-bit value to a register pair. Clobbers val. */
7123 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7126 tmp = tcg_temp_new_i32();
7127 tcg_gen_extrl_i64_i32(tmp, val);
7128 store_reg(s, rlow, tmp);
7129 tmp = tcg_temp_new_i32();
7130 tcg_gen_extrh_i64_i32(tmp, val);
7131 store_reg(s, rhigh, tmp);
7134 /* load and add a 64-bit value from a register pair. */
7135 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7141 /* Load 64-bit value rd:rn. */
7142 tmpl = load_reg(s, rlow);
7143 tmph = load_reg(s, rhigh);
7144 tmp = tcg_temp_new_i64();
7145 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7146 tcg_temp_free_i32(tmpl);
7147 tcg_temp_free_i32(tmph);
7148 tcg_gen_add_i64(val, val, tmp);
7149 tcg_temp_free_i64(tmp);
7152 /* Set N and Z flags from hi|lo. */
7153 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7155 tcg_gen_mov_i32(cpu_NF, hi);
7156 tcg_gen_or_i32(cpu_ZF, lo, hi);
7159 /* Load/Store exclusive instructions are implemented by remembering
7160 the value/address loaded, and seeing if these are the same
7161 when the store is performed. This should be sufficient to implement
7162 the architecturally mandated semantics, and avoids having to monitor
7163 regular stores. The compare vs the remembered value is done during
7164 the cmpxchg operation, but we must compare the addresses manually. */
7165 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7166 TCGv_i32 addr, int size)
7168 TCGv_i32 tmp = tcg_temp_new_i32();
7169 MemOp opc = size | MO_ALIGN | s->be_data;
7174 TCGv_i32 tmp2 = tcg_temp_new_i32();
7175 TCGv_i64 t64 = tcg_temp_new_i64();
7177 /* For AArch32, architecturally the 32-bit word at the lowest
7178 * address is always Rt and the one at addr+4 is Rt2, even if
7179 * the CPU is big-endian. That means we don't want to do a
7180 * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7181 * for an architecturally 64-bit access, but instead do a
7182 * 64-bit access using MO_BE if appropriate and then split
7184 * This only makes a difference for BE32 user-mode, where
7185 * frob64() must not flip the two halves of the 64-bit data
7186 * but this code must treat BE32 user-mode like BE32 system.
7188 TCGv taddr = gen_aa32_addr(s, addr, opc);
7190 tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7191 tcg_temp_free(taddr);
7192 tcg_gen_mov_i64(cpu_exclusive_val, t64);
7193 if (s->be_data == MO_BE) {
7194 tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7196 tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7198 tcg_temp_free_i64(t64);
7200 store_reg(s, rt2, tmp2);
7202 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7203 tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7206 store_reg(s, rt, tmp);
7207 tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7210 static void gen_clrex(DisasContext *s)
7212 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7215 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7216 TCGv_i32 addr, int size)
7218 TCGv_i32 t0, t1, t2;
7221 TCGLabel *done_label;
7222 TCGLabel *fail_label;
7223 MemOp opc = size | MO_ALIGN | s->be_data;
7225 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7231 fail_label = gen_new_label();
7232 done_label = gen_new_label();
7233 extaddr = tcg_temp_new_i64();
7234 tcg_gen_extu_i32_i64(extaddr, addr);
7235 tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7236 tcg_temp_free_i64(extaddr);
7238 taddr = gen_aa32_addr(s, addr, opc);
7239 t0 = tcg_temp_new_i32();
7240 t1 = load_reg(s, rt);
7242 TCGv_i64 o64 = tcg_temp_new_i64();
7243 TCGv_i64 n64 = tcg_temp_new_i64();
7245 t2 = load_reg(s, rt2);
7246 /* For AArch32, architecturally the 32-bit word at the lowest
7247 * address is always Rt and the one at addr+4 is Rt2, even if
7248 * the CPU is big-endian. Since we're going to treat this as a
7249 * single 64-bit BE store, we need to put the two halves in the
7250 * opposite order for BE to LE, so that they end up in the right
7252 * We don't want gen_aa32_frob64() because that does the wrong
7253 * thing for BE32 usermode.
7255 if (s->be_data == MO_BE) {
7256 tcg_gen_concat_i32_i64(n64, t2, t1);
7258 tcg_gen_concat_i32_i64(n64, t1, t2);
7260 tcg_temp_free_i32(t2);
7262 tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7263 get_mem_index(s), opc);
7264 tcg_temp_free_i64(n64);
7266 tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7267 tcg_gen_extrl_i64_i32(t0, o64);
7269 tcg_temp_free_i64(o64);
7271 t2 = tcg_temp_new_i32();
7272 tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7273 tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7274 tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7275 tcg_temp_free_i32(t2);
7277 tcg_temp_free_i32(t1);
7278 tcg_temp_free(taddr);
7279 tcg_gen_mov_i32(cpu_R[rd], t0);
7280 tcg_temp_free_i32(t0);
7281 tcg_gen_br(done_label);
7283 gen_set_label(fail_label);
7284 tcg_gen_movi_i32(cpu_R[rd], 1);
7285 gen_set_label(done_label);
7286 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7292 * @mode: mode field from insn (which stack to store to)
7293 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7294 * @writeback: true if writeback bit set
7296 * Generate code for the SRS (Store Return State) insn.
7298 static void gen_srs(DisasContext *s,
7299 uint32_t mode, uint32_t amode, bool writeback)
7306 * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7307 * and specified mode is monitor mode
7308 * - UNDEFINED in Hyp mode
7309 * - UNPREDICTABLE in User or System mode
7310 * - UNPREDICTABLE if the specified mode is:
7311 * -- not implemented
7312 * -- not a valid mode number
7313 * -- a mode that's at a higher exception level
7314 * -- Monitor, if we are Non-secure
7315 * For the UNPREDICTABLE cases we choose to UNDEF.
7317 if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7318 gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
7322 if (s->current_el == 0 || s->current_el == 2) {
7327 case ARM_CPU_MODE_USR:
7328 case ARM_CPU_MODE_FIQ:
7329 case ARM_CPU_MODE_IRQ:
7330 case ARM_CPU_MODE_SVC:
7331 case ARM_CPU_MODE_ABT:
7332 case ARM_CPU_MODE_UND:
7333 case ARM_CPU_MODE_SYS:
7335 case ARM_CPU_MODE_HYP:
7336 if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7340 case ARM_CPU_MODE_MON:
7341 /* No need to check specifically for "are we non-secure" because
7342 * we've already made EL0 UNDEF and handled the trap for S-EL1;
7343 * so if this isn't EL3 then we must be non-secure.
7345 if (s->current_el != 3) {
7354 unallocated_encoding(s);
7358 addr = tcg_temp_new_i32();
7359 tmp = tcg_const_i32(mode);
7360 /* get_r13_banked() will raise an exception if called from System mode */
7361 gen_set_condexec(s);
7362 gen_set_pc_im(s, s->pc_curr);
7363 gen_helper_get_r13_banked(addr, cpu_env, tmp);
7364 tcg_temp_free_i32(tmp);
7381 tcg_gen_addi_i32(addr, addr, offset);
7382 tmp = load_reg(s, 14);
7383 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7384 tcg_temp_free_i32(tmp);
7385 tmp = load_cpu_field(spsr);
7386 tcg_gen_addi_i32(addr, addr, 4);
7387 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7388 tcg_temp_free_i32(tmp);
7406 tcg_gen_addi_i32(addr, addr, offset);
7407 tmp = tcg_const_i32(mode);
7408 gen_helper_set_r13_banked(cpu_env, tmp, addr);
7409 tcg_temp_free_i32(tmp);
7411 tcg_temp_free_i32(addr);
7412 s->base.is_jmp = DISAS_UPDATE;
7415 /* Generate a label used for skipping this instruction */
7416 static void arm_gen_condlabel(DisasContext *s)
7419 s->condlabel = gen_new_label();
7424 /* Skip this instruction if the ARM condition is false */
7425 static void arm_skip_unless(DisasContext *s, uint32_t cond)
7427 arm_gen_condlabel(s);
7428 arm_gen_test_cc(cond ^ 1, s->condlabel);
7433 * Constant expanders for the decoders.
7436 static int negate(DisasContext *s, int x)
7441 static int plus_2(DisasContext *s, int x)
7446 static int times_2(DisasContext *s, int x)
7451 static int times_4(DisasContext *s, int x)
7456 /* Return only the rotation part of T32ExpandImm. */
7457 static int t32_expandimm_rot(DisasContext *s, int x)
7459 return x & 0xc00 ? extract32(x, 7, 5) : 0;
7462 /* Return the unrotated immediate from T32ExpandImm. */
7463 static int t32_expandimm_imm(DisasContext *s, int x)
7465 int imm = extract32(x, 0, 8);
7467 switch (extract32(x, 8, 4)) {
7469 /* Nothing to do. */
7471 case 1: /* 00XY00XY */
7474 case 2: /* XY00XY00 */
7477 case 3: /* XYXYXYXY */
7481 /* Rotated constant. */
7488 static int t32_branch24(DisasContext *s, int x)
7490 /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S. */
7491 x ^= !(x < 0) * (3 << 21);
7492 /* Append the final zero. */
7496 static int t16_setflags(DisasContext *s)
7498 return s->condexec_mask == 0;
7501 static int t16_push_list(DisasContext *s, int x)
7503 return (x & 0xff) | (x & 0x100) << (14 - 8);
7506 static int t16_pop_list(DisasContext *s, int x)
7508 return (x & 0xff) | (x & 0x100) << (15 - 8);
7512 * Include the generated decoders.
7515 #include "decode-a32.inc.c"
7516 #include "decode-a32-uncond.inc.c"
7517 #include "decode-t32.inc.c"
7518 #include "decode-t16.inc.c"
7520 /* Helpers to swap operands for reverse-subtract. */
7521 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7523 tcg_gen_sub_i32(dst, b, a);
7526 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7528 gen_sub_CC(dst, b, a);
7531 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7533 gen_sub_carry(dest, b, a);
7536 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7538 gen_sbc_CC(dest, b, a);
7542 * Helpers for the data processing routines.
7544 * After the computation store the results back.
7545 * This may be suppressed altogether (STREG_NONE), require a runtime
7546 * check against the stack limits (STREG_SP_CHECK), or generate an
7547 * exception return. Oh, or store into a register.
7549 * Always return true, indicating success for a trans_* function.
7558 static bool store_reg_kind(DisasContext *s, int rd,
7559 TCGv_i32 val, StoreRegKind kind)
7563 tcg_temp_free_i32(val);
7566 /* See ALUWritePC: Interworking only from a32 mode. */
7568 store_reg(s, rd, val);
7570 store_reg_bx(s, rd, val);
7573 case STREG_SP_CHECK:
7574 store_sp_checked(s, val);
7577 gen_exception_return(s, val);
7580 g_assert_not_reached();
7584 * Data Processing (register)
7586 * Operate, with set flags, one register source,
7587 * one immediate shifted register source, and a destination.
7589 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
7590 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7591 int logic_cc, StoreRegKind kind)
7593 TCGv_i32 tmp1, tmp2;
7595 tmp2 = load_reg(s, a->rm);
7596 gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7597 tmp1 = load_reg(s, a->rn);
7599 gen(tmp1, tmp1, tmp2);
7600 tcg_temp_free_i32(tmp2);
7605 return store_reg_kind(s, a->rd, tmp1, kind);
7608 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
7609 void (*gen)(TCGv_i32, TCGv_i32),
7610 int logic_cc, StoreRegKind kind)
7614 tmp = load_reg(s, a->rm);
7615 gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7621 return store_reg_kind(s, a->rd, tmp, kind);
7625 * Data-processing (register-shifted register)
7627 * Operate, with set flags, one register source,
7628 * one register shifted register source, and a destination.
7630 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
7631 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7632 int logic_cc, StoreRegKind kind)
7634 TCGv_i32 tmp1, tmp2;
7636 tmp1 = load_reg(s, a->rs);
7637 tmp2 = load_reg(s, a->rm);
7638 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7639 tmp1 = load_reg(s, a->rn);
7641 gen(tmp1, tmp1, tmp2);
7642 tcg_temp_free_i32(tmp2);
7647 return store_reg_kind(s, a->rd, tmp1, kind);
7650 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
7651 void (*gen)(TCGv_i32, TCGv_i32),
7652 int logic_cc, StoreRegKind kind)
7654 TCGv_i32 tmp1, tmp2;
7656 tmp1 = load_reg(s, a->rs);
7657 tmp2 = load_reg(s, a->rm);
7658 gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7664 return store_reg_kind(s, a->rd, tmp2, kind);
7668 * Data-processing (immediate)
7670 * Operate, with set flags, one register source,
7671 * one rotated immediate, and a destination.
7673 * Note that logic_cc && a->rot setting CF based on the msb of the
7674 * immediate is the reason why we must pass in the unrotated form
7677 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
7678 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7679 int logic_cc, StoreRegKind kind)
7681 TCGv_i32 tmp1, tmp2;
7684 imm = ror32(a->imm, a->rot);
7685 if (logic_cc && a->rot) {
7686 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7688 tmp2 = tcg_const_i32(imm);
7689 tmp1 = load_reg(s, a->rn);
7691 gen(tmp1, tmp1, tmp2);
7692 tcg_temp_free_i32(tmp2);
7697 return store_reg_kind(s, a->rd, tmp1, kind);
7700 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
7701 void (*gen)(TCGv_i32, TCGv_i32),
7702 int logic_cc, StoreRegKind kind)
7707 imm = ror32(a->imm, a->rot);
7708 if (logic_cc && a->rot) {
7709 tcg_gen_movi_i32(cpu_CF, imm >> 31);
7711 tmp = tcg_const_i32(imm);
7717 return store_reg_kind(s, a->rd, tmp, kind);
7720 #define DO_ANY3(NAME, OP, L, K) \
7721 static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a) \
7722 { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); } \
7723 static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a) \
7724 { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); } \
7725 static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a) \
7726 { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7728 #define DO_ANY2(NAME, OP, L, K) \
7729 static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a) \
7730 { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); } \
7731 static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a) \
7732 { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); } \
7733 static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a) \
7734 { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7736 #define DO_CMP2(NAME, OP, L) \
7737 static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a) \
7738 { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); } \
7739 static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a) \
7740 { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); } \
7741 static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a) \
7742 { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7744 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7745 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7746 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7747 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7749 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7750 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7751 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7752 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7754 DO_CMP2(TST, tcg_gen_and_i32, true)
7755 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7756 DO_CMP2(CMN, gen_add_CC, false)
7757 DO_CMP2(CMP, gen_sub_CC, false)
7759 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7760 a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7763 * Note for the computation of StoreRegKind we return out of the
7764 * middle of the functions that are expanded by DO_ANY3, and that
7765 * we modify a->s via that parameter before it is used by OP.
7767 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7769 StoreRegKind ret = STREG_NORMAL;
7770 if (a->rd == 15 && a->s) {
7772 * See ALUExceptionReturn:
7773 * In User mode, UNPREDICTABLE; we choose UNDEF.
7774 * In Hyp mode, UNDEFINED.
7776 if (IS_USER(s) || s->current_el == 2) {
7777 unallocated_encoding(s);
7780 /* There is no writeback of nzcv to PSTATE. */
7782 ret = STREG_EXC_RET;
7783 } else if (a->rd == 13 && a->rn == 13) {
7784 ret = STREG_SP_CHECK;
7789 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7791 StoreRegKind ret = STREG_NORMAL;
7792 if (a->rd == 15 && a->s) {
7794 * See ALUExceptionReturn:
7795 * In User mode, UNPREDICTABLE; we choose UNDEF.
7796 * In Hyp mode, UNDEFINED.
7798 if (IS_USER(s) || s->current_el == 2) {
7799 unallocated_encoding(s);
7802 /* There is no writeback of nzcv to PSTATE. */
7804 ret = STREG_EXC_RET;
7805 } else if (a->rd == 13) {
7806 ret = STREG_SP_CHECK;
7811 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7814 * ORN is only available with T32, so there is no register-shifted-register
7815 * form of the insn. Using the DO_ANY3 macro would create an unused function.
7817 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
7819 return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7822 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
7824 return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7831 static bool trans_ADR(DisasContext *s, arg_ri *a)
7833 store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
7837 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
7841 if (!ENABLE_ARCH_6T2) {
7845 tmp = tcg_const_i32(a->imm);
7846 store_reg(s, a->rd, tmp);
7850 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
7854 if (!ENABLE_ARCH_6T2) {
7858 tmp = load_reg(s, a->rd);
7859 tcg_gen_ext16u_i32(tmp, tmp);
7860 tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
7861 store_reg(s, a->rd, tmp);
7866 * Multiply and multiply accumulate
7869 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
7873 t1 = load_reg(s, a->rn);
7874 t2 = load_reg(s, a->rm);
7875 tcg_gen_mul_i32(t1, t1, t2);
7876 tcg_temp_free_i32(t2);
7878 t2 = load_reg(s, a->ra);
7879 tcg_gen_add_i32(t1, t1, t2);
7880 tcg_temp_free_i32(t2);
7885 store_reg(s, a->rd, t1);
7889 static bool trans_MUL(DisasContext *s, arg_MUL *a)
7891 return op_mla(s, a, false);
7894 static bool trans_MLA(DisasContext *s, arg_MLA *a)
7896 return op_mla(s, a, true);
7899 static bool trans_MLS(DisasContext *s, arg_MLS *a)
7903 if (!ENABLE_ARCH_6T2) {
7906 t1 = load_reg(s, a->rn);
7907 t2 = load_reg(s, a->rm);
7908 tcg_gen_mul_i32(t1, t1, t2);
7909 tcg_temp_free_i32(t2);
7910 t2 = load_reg(s, a->ra);
7911 tcg_gen_sub_i32(t1, t2, t1);
7912 tcg_temp_free_i32(t2);
7913 store_reg(s, a->rd, t1);
7917 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
7919 TCGv_i32 t0, t1, t2, t3;
7921 t0 = load_reg(s, a->rm);
7922 t1 = load_reg(s, a->rn);
7924 tcg_gen_mulu2_i32(t0, t1, t0, t1);
7926 tcg_gen_muls2_i32(t0, t1, t0, t1);
7929 t2 = load_reg(s, a->ra);
7930 t3 = load_reg(s, a->rd);
7931 tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
7932 tcg_temp_free_i32(t2);
7933 tcg_temp_free_i32(t3);
7936 gen_logicq_cc(t0, t1);
7938 store_reg(s, a->ra, t0);
7939 store_reg(s, a->rd, t1);
7943 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
7945 return op_mlal(s, a, true, false);
7948 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
7950 return op_mlal(s, a, false, false);
7953 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
7955 return op_mlal(s, a, true, true);
7958 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
7960 return op_mlal(s, a, false, true);
7963 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
7965 TCGv_i32 t0, t1, t2, zero;
7968 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7973 t0 = load_reg(s, a->rm);
7974 t1 = load_reg(s, a->rn);
7975 tcg_gen_mulu2_i32(t0, t1, t0, t1);
7976 zero = tcg_const_i32(0);
7977 t2 = load_reg(s, a->ra);
7978 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7979 tcg_temp_free_i32(t2);
7980 t2 = load_reg(s, a->rd);
7981 tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7982 tcg_temp_free_i32(t2);
7983 tcg_temp_free_i32(zero);
7984 store_reg(s, a->ra, t0);
7985 store_reg(s, a->rd, t1);
7990 * Saturating addition and subtraction
7993 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
7998 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7999 : !ENABLE_ARCH_5TE) {
8003 t0 = load_reg(s, a->rm);
8004 t1 = load_reg(s, a->rn);
8006 gen_helper_add_saturate(t1, cpu_env, t1, t1);
8009 gen_helper_add_saturate(t0, cpu_env, t0, t1);
8011 gen_helper_sub_saturate(t0, cpu_env, t0, t1);
8013 tcg_temp_free_i32(t1);
8014 store_reg(s, a->rd, t0);
8018 #define DO_QADDSUB(NAME, ADD, DOUB) \
8019 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8021 return op_qaddsub(s, a, ADD, DOUB); \
8024 DO_QADDSUB(QADD, true, false)
8025 DO_QADDSUB(QSUB, false, false)
8026 DO_QADDSUB(QDADD, true, true)
8027 DO_QADDSUB(QDSUB, false, true)
8032 * Halfword multiply and multiply accumulate
8035 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
8036 int add_long, bool nt, bool mt)
8038 TCGv_i32 t0, t1, tl, th;
8041 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8042 : !ENABLE_ARCH_5TE) {
8046 t0 = load_reg(s, a->rn);
8047 t1 = load_reg(s, a->rm);
8048 gen_mulxy(t0, t1, nt, mt);
8049 tcg_temp_free_i32(t1);
8053 store_reg(s, a->rd, t0);
8056 t1 = load_reg(s, a->ra);
8057 gen_helper_add_setq(t0, cpu_env, t0, t1);
8058 tcg_temp_free_i32(t1);
8059 store_reg(s, a->rd, t0);
8062 tl = load_reg(s, a->ra);
8063 th = load_reg(s, a->rd);
8064 t1 = tcg_const_i32(0);
8065 tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
8066 tcg_temp_free_i32(t0);
8067 tcg_temp_free_i32(t1);
8068 store_reg(s, a->ra, tl);
8069 store_reg(s, a->rd, th);
8072 g_assert_not_reached();
8077 #define DO_SMLAX(NAME, add, nt, mt) \
8078 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
8080 return op_smlaxxx(s, a, add, nt, mt); \
8083 DO_SMLAX(SMULBB, 0, 0, 0)
8084 DO_SMLAX(SMULBT, 0, 0, 1)
8085 DO_SMLAX(SMULTB, 0, 1, 0)
8086 DO_SMLAX(SMULTT, 0, 1, 1)
8088 DO_SMLAX(SMLABB, 1, 0, 0)
8089 DO_SMLAX(SMLABT, 1, 0, 1)
8090 DO_SMLAX(SMLATB, 1, 1, 0)
8091 DO_SMLAX(SMLATT, 1, 1, 1)
8093 DO_SMLAX(SMLALBB, 2, 0, 0)
8094 DO_SMLAX(SMLALBT, 2, 0, 1)
8095 DO_SMLAX(SMLALTB, 2, 1, 0)
8096 DO_SMLAX(SMLALTT, 2, 1, 1)
8100 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
8104 if (!ENABLE_ARCH_5TE) {
8108 t0 = load_reg(s, a->rn);
8109 t1 = load_reg(s, a->rm);
8111 * Since the nominal result is product<47:16>, shift the 16-bit
8112 * input up by 16 bits, so that the result is at product<63:32>.
8115 tcg_gen_andi_i32(t1, t1, 0xffff0000);
8117 tcg_gen_shli_i32(t1, t1, 16);
8119 tcg_gen_muls2_i32(t0, t1, t0, t1);
8120 tcg_temp_free_i32(t0);
8122 t0 = load_reg(s, a->ra);
8123 gen_helper_add_setq(t1, cpu_env, t1, t0);
8124 tcg_temp_free_i32(t0);
8126 store_reg(s, a->rd, t1);
8130 #define DO_SMLAWX(NAME, add, mt) \
8131 static bool trans_##NAME(DisasContext *s, arg_rrrr *a) \
8133 return op_smlawx(s, a, add, mt); \
8136 DO_SMLAWX(SMULWB, 0, 0)
8137 DO_SMLAWX(SMULWT, 0, 1)
8138 DO_SMLAWX(SMLAWB, 1, 0)
8139 DO_SMLAWX(SMLAWT, 1, 1)
8144 * MSR (immediate) and hints
8147 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
8150 * When running single-threaded TCG code, use the helper to ensure that
8151 * the next round-robin scheduled vCPU gets a crack. When running in
8152 * MTTCG we don't generate jumps to the helper as it won't affect the
8153 * scheduling of other vCPUs.
8155 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8156 gen_set_pc_im(s, s->base.pc_next);
8157 s->base.is_jmp = DISAS_YIELD;
8162 static bool trans_WFE(DisasContext *s, arg_WFE *a)
8165 * When running single-threaded TCG code, use the helper to ensure that
8166 * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
8167 * just skip this instruction. Currently the SEV/SEVL instructions,
8168 * which are *one* of many ways to wake the CPU from WFE, are not
8169 * implemented so we can't sleep like WFI does.
8171 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8172 gen_set_pc_im(s, s->base.pc_next);
8173 s->base.is_jmp = DISAS_WFE;
8178 static bool trans_WFI(DisasContext *s, arg_WFI *a)
8180 /* For WFI, halt the vCPU until an IRQ. */
8181 gen_set_pc_im(s, s->base.pc_next);
8182 s->base.is_jmp = DISAS_WFI;
8186 static bool trans_NOP(DisasContext *s, arg_NOP *a)
8191 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
8193 uint32_t val = ror32(a->imm, a->rot * 2);
8194 uint32_t mask = msr_mask(s, a->mask, a->r);
8196 if (gen_set_psr_im(s, mask, a->r, val)) {
8197 unallocated_encoding(s);
8203 * Cyclic Redundancy Check
8206 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
8208 TCGv_i32 t1, t2, t3;
8210 if (!dc_isar_feature(aa32_crc32, s)) {
8214 t1 = load_reg(s, a->rn);
8215 t2 = load_reg(s, a->rm);
8226 g_assert_not_reached();
8228 t3 = tcg_const_i32(1 << sz);
8230 gen_helper_crc32c(t1, t1, t2, t3);
8232 gen_helper_crc32(t1, t1, t2, t3);
8234 tcg_temp_free_i32(t2);
8235 tcg_temp_free_i32(t3);
8236 store_reg(s, a->rd, t1);
8240 #define DO_CRC32(NAME, c, sz) \
8241 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
8242 { return op_crc32(s, a, c, sz); }
8244 DO_CRC32(CRC32B, false, MO_8)
8245 DO_CRC32(CRC32H, false, MO_16)
8246 DO_CRC32(CRC32W, false, MO_32)
8247 DO_CRC32(CRC32CB, true, MO_8)
8248 DO_CRC32(CRC32CH, true, MO_16)
8249 DO_CRC32(CRC32CW, true, MO_32)
8254 * Miscellaneous instructions
8257 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
8259 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8262 gen_mrs_banked(s, a->r, a->sysm, a->rd);
8266 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
8268 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8271 gen_msr_banked(s, a->r, a->sysm, a->rn);
8275 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
8279 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8284 unallocated_encoding(s);
8287 tmp = load_cpu_field(spsr);
8289 tmp = tcg_temp_new_i32();
8290 gen_helper_cpsr_read(tmp, cpu_env);
8292 store_reg(s, a->rd, tmp);
8296 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
8299 uint32_t mask = msr_mask(s, a->mask, a->r);
8301 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8304 tmp = load_reg(s, a->rn);
8305 if (gen_set_psr(s, mask, a->r, tmp)) {
8306 unallocated_encoding(s);
8311 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
8315 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8318 tmp = tcg_const_i32(a->sysm);
8319 gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8320 store_reg(s, a->rd, tmp);
8324 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
8328 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8331 addr = tcg_const_i32((a->mask << 10) | a->sysm);
8332 reg = load_reg(s, a->rn);
8333 gen_helper_v7m_msr(cpu_env, addr, reg);
8334 tcg_temp_free_i32(addr);
8335 tcg_temp_free_i32(reg);
8340 static bool trans_BX(DisasContext *s, arg_BX *a)
8342 if (!ENABLE_ARCH_4T) {
8345 gen_bx_excret(s, load_reg(s, a->rm));
8349 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
8351 if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
8354 /* Trivial implementation equivalent to bx. */
8355 gen_bx(s, load_reg(s, a->rm));
8359 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
8363 if (!ENABLE_ARCH_5) {
8366 tmp = load_reg(s, a->rm);
8367 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8373 * BXNS/BLXNS: only exist for v8M with the security extensions,
8374 * and always UNDEF if NonSecure. We don't implement these in
8375 * the user-only mode either (in theory you can use them from
8376 * Secure User mode but they are too tied in to system emulation).
8378 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
8380 if (!s->v8m_secure || IS_USER_ONLY) {
8381 unallocated_encoding(s);
8388 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
8390 if (!s->v8m_secure || IS_USER_ONLY) {
8391 unallocated_encoding(s);
8393 gen_blxns(s, a->rm);
8398 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
8402 if (!ENABLE_ARCH_5) {
8405 tmp = load_reg(s, a->rm);
8406 tcg_gen_clzi_i32(tmp, tmp, 32);
8407 store_reg(s, a->rd, tmp);
8411 static bool trans_ERET(DisasContext *s, arg_ERET *a)
8415 if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8419 unallocated_encoding(s);
8422 if (s->current_el == 2) {
8423 /* ERET from Hyp uses ELR_Hyp, not LR */
8424 tmp = load_cpu_field(elr_el[2]);
8426 tmp = load_reg(s, 14);
8428 gen_exception_return(s, tmp);
8432 static bool trans_HLT(DisasContext *s, arg_HLT *a)
8438 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
8440 if (!ENABLE_ARCH_5) {
8443 gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8447 static bool trans_HVC(DisasContext *s, arg_HVC *a)
8449 if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
8453 unallocated_encoding(s);
8460 static bool trans_SMC(DisasContext *s, arg_SMC *a)
8462 if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
8466 unallocated_encoding(s);
8473 static bool trans_SG(DisasContext *s, arg_SG *a)
8475 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8476 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8481 * The bulk of the behaviour for this instruction is implemented
8482 * in v7m_handle_execute_nsc(), which deals with the insn when
8483 * it is executed by a CPU in non-secure state from memory
8484 * which is Secure & NonSecure-Callable.
8485 * Here we only need to handle the remaining cases:
8486 * * in NS memory (including the "security extension not
8487 * implemented" case) : NOP
8488 * * in S memory but CPU already secure (clear IT bits)
8489 * We know that the attribute for the memory this insn is
8490 * in must match the current CPU state, because otherwise
8491 * get_phys_addr_pmsav8 would have generated an exception.
8493 if (s->v8m_secure) {
8494 /* Like the IT insn, we don't need to generate any code */
8495 s->condexec_cond = 0;
8496 s->condexec_mask = 0;
8501 static bool trans_TT(DisasContext *s, arg_TT *a)
8505 if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8506 !arm_dc_feature(s, ARM_FEATURE_V8)) {
8509 if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
8510 /* We UNDEF for these UNPREDICTABLE cases */
8511 unallocated_encoding(s);
8514 if (a->A && !s->v8m_secure) {
8515 /* This case is UNDEFINED. */
8516 unallocated_encoding(s);
8520 addr = load_reg(s, a->rn);
8521 tmp = tcg_const_i32((a->A << 1) | a->T);
8522 gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8523 tcg_temp_free_i32(addr);
8524 store_reg(s, a->rd, tmp);
8529 * Load/store register index
8532 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
8536 /* ISS not valid if writeback */
8545 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
8547 TCGv_i32 addr = load_reg(s, a->rn);
8549 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8550 gen_helper_v8m_stackcheck(cpu_env, addr);
8554 TCGv_i32 ofs = load_reg(s, a->rm);
8555 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8557 tcg_gen_add_i32(addr, addr, ofs);
8559 tcg_gen_sub_i32(addr, addr, ofs);
8561 tcg_temp_free_i32(ofs);
8566 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
8567 TCGv_i32 addr, int address_offset)
8570 TCGv_i32 ofs = load_reg(s, a->rm);
8571 gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8573 tcg_gen_add_i32(addr, addr, ofs);
8575 tcg_gen_sub_i32(addr, addr, ofs);
8577 tcg_temp_free_i32(ofs);
8579 tcg_temp_free_i32(addr);
8582 tcg_gen_addi_i32(addr, addr, address_offset);
8583 store_reg(s, a->rn, addr);
8586 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
8587 MemOp mop, int mem_idx)
8589 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8592 addr = op_addr_rr_pre(s, a);
8594 tmp = tcg_temp_new_i32();
8595 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8596 disas_set_da_iss(s, mop, issinfo);
8599 * Perform base writeback before the loaded value to
8600 * ensure correct behavior with overlapping index registers.
8602 op_addr_rr_post(s, a, addr, 0);
8603 store_reg_from_load(s, a->rt, tmp);
8607 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
8608 MemOp mop, int mem_idx)
8610 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8613 addr = op_addr_rr_pre(s, a);
8615 tmp = load_reg(s, a->rt);
8616 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8617 disas_set_da_iss(s, mop, issinfo);
8618 tcg_temp_free_i32(tmp);
8620 op_addr_rr_post(s, a, addr, 0);
8624 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
8626 int mem_idx = get_mem_index(s);
8629 if (!ENABLE_ARCH_5TE) {
8633 unallocated_encoding(s);
8636 addr = op_addr_rr_pre(s, a);
8638 tmp = tcg_temp_new_i32();
8639 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8640 store_reg(s, a->rt, tmp);
8642 tcg_gen_addi_i32(addr, addr, 4);
8644 tmp = tcg_temp_new_i32();
8645 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8646 store_reg(s, a->rt + 1, tmp);
8648 /* LDRD w/ base writeback is undefined if the registers overlap. */
8649 op_addr_rr_post(s, a, addr, -4);
8653 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
8655 int mem_idx = get_mem_index(s);
8658 if (!ENABLE_ARCH_5TE) {
8662 unallocated_encoding(s);
8665 addr = op_addr_rr_pre(s, a);
8667 tmp = load_reg(s, a->rt);
8668 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8669 tcg_temp_free_i32(tmp);
8671 tcg_gen_addi_i32(addr, addr, 4);
8673 tmp = load_reg(s, a->rt + 1);
8674 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8675 tcg_temp_free_i32(tmp);
8677 op_addr_rr_post(s, a, addr, -4);
8682 * Load/store immediate index
8685 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8693 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8695 * Stackcheck. Here we know 'addr' is the current SP;
8696 * U is set if we're moving SP up, else down. It is
8697 * UNKNOWN whether the limit check triggers when SP starts
8698 * below the limit and ends up above it; we chose to do so.
8701 TCGv_i32 newsp = tcg_temp_new_i32();
8702 tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8703 gen_helper_v8m_stackcheck(cpu_env, newsp);
8704 tcg_temp_free_i32(newsp);
8706 gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8710 return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
8713 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
8714 TCGv_i32 addr, int address_offset)
8718 address_offset += a->imm;
8720 address_offset -= a->imm;
8723 tcg_temp_free_i32(addr);
8726 tcg_gen_addi_i32(addr, addr, address_offset);
8727 store_reg(s, a->rn, addr);
8730 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
8731 MemOp mop, int mem_idx)
8733 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8736 addr = op_addr_ri_pre(s, a);
8738 tmp = tcg_temp_new_i32();
8739 gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8740 disas_set_da_iss(s, mop, issinfo);
8743 * Perform base writeback before the loaded value to
8744 * ensure correct behavior with overlapping index registers.
8746 op_addr_ri_post(s, a, addr, 0);
8747 store_reg_from_load(s, a->rt, tmp);
8751 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
8752 MemOp mop, int mem_idx)
8754 ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8757 addr = op_addr_ri_pre(s, a);
8759 tmp = load_reg(s, a->rt);
8760 gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8761 disas_set_da_iss(s, mop, issinfo);
8762 tcg_temp_free_i32(tmp);
8764 op_addr_ri_post(s, a, addr, 0);
8768 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8770 int mem_idx = get_mem_index(s);
8773 addr = op_addr_ri_pre(s, a);
8775 tmp = tcg_temp_new_i32();
8776 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8777 store_reg(s, a->rt, tmp);
8779 tcg_gen_addi_i32(addr, addr, 4);
8781 tmp = tcg_temp_new_i32();
8782 gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8783 store_reg(s, rt2, tmp);
8785 /* LDRD w/ base writeback is undefined if the registers overlap. */
8786 op_addr_ri_post(s, a, addr, -4);
8790 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8792 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8795 return op_ldrd_ri(s, a, a->rt + 1);
8798 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8801 .u = a->u, .w = a->w, .p = a->p,
8802 .rn = a->rn, .rt = a->rt, .imm = a->imm
8804 return op_ldrd_ri(s, &b, a->rt2);
8807 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8809 int mem_idx = get_mem_index(s);
8812 addr = op_addr_ri_pre(s, a);
8814 tmp = load_reg(s, a->rt);
8815 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8816 tcg_temp_free_i32(tmp);
8818 tcg_gen_addi_i32(addr, addr, 4);
8820 tmp = load_reg(s, rt2);
8821 gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8822 tcg_temp_free_i32(tmp);
8824 op_addr_ri_post(s, a, addr, -4);
8828 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8830 if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8833 return op_strd_ri(s, a, a->rt + 1);
8836 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8839 .u = a->u, .w = a->w, .p = a->p,
8840 .rn = a->rn, .rt = a->rt, .imm = a->imm
8842 return op_strd_ri(s, &b, a->rt2);
8845 #define DO_LDST(NAME, WHICH, MEMOP) \
8846 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a) \
8848 return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s)); \
8850 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a) \
8852 return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s)); \
8854 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a) \
8856 return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s)); \
8858 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a) \
8860 return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s)); \
8863 DO_LDST(LDR, load, MO_UL)
8864 DO_LDST(LDRB, load, MO_UB)
8865 DO_LDST(LDRH, load, MO_UW)
8866 DO_LDST(LDRSB, load, MO_SB)
8867 DO_LDST(LDRSH, load, MO_SW)
8869 DO_LDST(STR, store, MO_UL)
8870 DO_LDST(STRB, store, MO_UB)
8871 DO_LDST(STRH, store, MO_UW)
8876 * Synchronization primitives
8879 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
8885 addr = load_reg(s, a->rn);
8886 taddr = gen_aa32_addr(s, addr, opc);
8887 tcg_temp_free_i32(addr);
8889 tmp = load_reg(s, a->rt2);
8890 tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
8891 tcg_temp_free(taddr);
8893 store_reg(s, a->rt, tmp);
8897 static bool trans_SWP(DisasContext *s, arg_SWP *a)
8899 return op_swp(s, a, MO_UL | MO_ALIGN);
8902 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
8904 return op_swp(s, a, MO_UB);
8908 * Load/Store Exclusive and Load-Acquire/Store-Release
8911 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
8915 /* We UNDEF for these UNPREDICTABLE cases. */
8916 if (a->rd == 15 || a->rn == 15 || a->rt == 15
8917 || a->rd == a->rn || a->rd == a->rt
8918 || (s->thumb && (a->rd == 13 || a->rt == 13))
8921 || a->rd == a->rt2 || a->rt == a->rt2
8922 || (s->thumb && a->rt2 == 13)))) {
8923 unallocated_encoding(s);
8928 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8931 addr = tcg_temp_local_new_i32();
8932 load_reg_var(s, addr, a->rn);
8933 tcg_gen_addi_i32(addr, addr, a->imm);
8935 gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
8936 tcg_temp_free_i32(addr);
8940 static bool trans_STREX(DisasContext *s, arg_STREX *a)
8942 if (!ENABLE_ARCH_6) {
8945 return op_strex(s, a, MO_32, false);
8948 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
8950 if (!ENABLE_ARCH_6K) {
8953 /* We UNDEF for these UNPREDICTABLE cases. */
8955 unallocated_encoding(s);
8959 return op_strex(s, a, MO_64, false);
8962 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
8964 return op_strex(s, a, MO_64, false);
8967 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
8969 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8972 return op_strex(s, a, MO_8, false);
8975 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
8977 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8980 return op_strex(s, a, MO_16, false);
8983 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
8985 if (!ENABLE_ARCH_8) {
8988 return op_strex(s, a, MO_32, true);
8991 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
8993 if (!ENABLE_ARCH_8) {
8996 /* We UNDEF for these UNPREDICTABLE cases. */
8998 unallocated_encoding(s);
9002 return op_strex(s, a, MO_64, true);
9005 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
9007 if (!ENABLE_ARCH_8) {
9010 return op_strex(s, a, MO_64, true);
9013 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
9015 if (!ENABLE_ARCH_8) {
9018 return op_strex(s, a, MO_8, true);
9021 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
9023 if (!ENABLE_ARCH_8) {
9026 return op_strex(s, a, MO_16, true);
9029 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
9033 if (!ENABLE_ARCH_8) {
9036 /* We UNDEF for these UNPREDICTABLE cases. */
9037 if (a->rn == 15 || a->rt == 15) {
9038 unallocated_encoding(s);
9042 addr = load_reg(s, a->rn);
9043 tmp = load_reg(s, a->rt);
9044 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9045 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9046 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
9048 tcg_temp_free_i32(tmp);
9049 tcg_temp_free_i32(addr);
9053 static bool trans_STL(DisasContext *s, arg_STL *a)
9055 return op_stl(s, a, MO_UL);
9058 static bool trans_STLB(DisasContext *s, arg_STL *a)
9060 return op_stl(s, a, MO_UB);
9063 static bool trans_STLH(DisasContext *s, arg_STL *a)
9065 return op_stl(s, a, MO_UW);
9068 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
9072 /* We UNDEF for these UNPREDICTABLE cases. */
9073 if (a->rn == 15 || a->rt == 15
9074 || (s->thumb && a->rt == 13)
9076 && (a->rt2 == 15 || a->rt == a->rt2
9077 || (s->thumb && a->rt2 == 13)))) {
9078 unallocated_encoding(s);
9082 addr = tcg_temp_local_new_i32();
9083 load_reg_var(s, addr, a->rn);
9084 tcg_gen_addi_i32(addr, addr, a->imm);
9086 gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9087 tcg_temp_free_i32(addr);
9090 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9095 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
9097 if (!ENABLE_ARCH_6) {
9100 return op_ldrex(s, a, MO_32, false);
9103 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
9105 if (!ENABLE_ARCH_6K) {
9108 /* We UNDEF for these UNPREDICTABLE cases. */
9110 unallocated_encoding(s);
9114 return op_ldrex(s, a, MO_64, false);
9117 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
9119 return op_ldrex(s, a, MO_64, false);
9122 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
9124 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9127 return op_ldrex(s, a, MO_8, false);
9130 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
9132 if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9135 return op_ldrex(s, a, MO_16, false);
9138 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
9140 if (!ENABLE_ARCH_8) {
9143 return op_ldrex(s, a, MO_32, true);
9146 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
9148 if (!ENABLE_ARCH_8) {
9151 /* We UNDEF for these UNPREDICTABLE cases. */
9153 unallocated_encoding(s);
9157 return op_ldrex(s, a, MO_64, true);
9160 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
9162 if (!ENABLE_ARCH_8) {
9165 return op_ldrex(s, a, MO_64, true);
9168 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
9170 if (!ENABLE_ARCH_8) {
9173 return op_ldrex(s, a, MO_8, true);
9176 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
9178 if (!ENABLE_ARCH_8) {
9181 return op_ldrex(s, a, MO_16, true);
9184 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
9188 if (!ENABLE_ARCH_8) {
9191 /* We UNDEF for these UNPREDICTABLE cases. */
9192 if (a->rn == 15 || a->rt == 15) {
9193 unallocated_encoding(s);
9197 addr = load_reg(s, a->rn);
9198 tmp = tcg_temp_new_i32();
9199 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9200 disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
9201 tcg_temp_free_i32(addr);
9203 store_reg(s, a->rt, tmp);
9204 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9208 static bool trans_LDA(DisasContext *s, arg_LDA *a)
9210 return op_lda(s, a, MO_UL);
9213 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
9215 return op_lda(s, a, MO_UB);
9218 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
9220 return op_lda(s, a, MO_UW);
9224 * Media instructions
9227 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
9231 if (!ENABLE_ARCH_6) {
9235 t1 = load_reg(s, a->rn);
9236 t2 = load_reg(s, a->rm);
9237 gen_helper_usad8(t1, t1, t2);
9238 tcg_temp_free_i32(t2);
9240 t2 = load_reg(s, a->ra);
9241 tcg_gen_add_i32(t1, t1, t2);
9242 tcg_temp_free_i32(t2);
9244 store_reg(s, a->rd, t1);
9248 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
9251 int width = a->widthm1 + 1;
9254 if (!ENABLE_ARCH_6T2) {
9257 if (shift + width > 32) {
9258 /* UNPREDICTABLE; we choose to UNDEF */
9259 unallocated_encoding(s);
9263 tmp = load_reg(s, a->rn);
9265 tcg_gen_extract_i32(tmp, tmp, shift, width);
9267 tcg_gen_sextract_i32(tmp, tmp, shift, width);
9269 store_reg(s, a->rd, tmp);
9273 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
9275 return op_bfx(s, a, false);
9278 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
9280 return op_bfx(s, a, true);
9283 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
9286 int msb = a->msb, lsb = a->lsb;
9289 if (!ENABLE_ARCH_6T2) {
9293 /* UNPREDICTABLE; we choose to UNDEF */
9294 unallocated_encoding(s);
9298 width = msb + 1 - lsb;
9301 tmp = tcg_const_i32(0);
9304 tmp = load_reg(s, a->rn);
9307 TCGv_i32 tmp2 = load_reg(s, a->rd);
9308 tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9309 tcg_temp_free_i32(tmp2);
9311 store_reg(s, a->rd, tmp);
9315 static bool trans_UDF(DisasContext *s, arg_UDF *a)
9317 unallocated_encoding(s);
9322 * Parallel addition and subtraction
9325 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
9326 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9331 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9336 t0 = load_reg(s, a->rn);
9337 t1 = load_reg(s, a->rm);
9341 tcg_temp_free_i32(t1);
9342 store_reg(s, a->rd, t0);
9346 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
9347 void (*gen)(TCGv_i32, TCGv_i32,
9348 TCGv_i32, TCGv_ptr))
9354 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9359 t0 = load_reg(s, a->rn);
9360 t1 = load_reg(s, a->rm);
9362 ge = tcg_temp_new_ptr();
9363 tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9364 gen(t0, t0, t1, ge);
9366 tcg_temp_free_ptr(ge);
9367 tcg_temp_free_i32(t1);
9368 store_reg(s, a->rd, t0);
9372 #define DO_PAR_ADDSUB(NAME, helper) \
9373 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9375 return op_par_addsub(s, a, helper); \
9378 #define DO_PAR_ADDSUB_GE(NAME, helper) \
9379 static bool trans_##NAME(DisasContext *s, arg_rrr *a) \
9381 return op_par_addsub_ge(s, a, helper); \
9384 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9385 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9386 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9387 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9388 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9389 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9391 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9392 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9393 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9394 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9395 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9396 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9398 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9399 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9400 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9401 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9402 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9403 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9405 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9406 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9407 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9408 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9409 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9410 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9412 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9413 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9414 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9415 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9416 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9417 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9419 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9420 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9421 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9422 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9423 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9424 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9426 #undef DO_PAR_ADDSUB
9427 #undef DO_PAR_ADDSUB_GE
9430 * Packing, unpacking, saturation, and reversal
9433 static bool trans_PKH(DisasContext *s, arg_PKH *a)
9439 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9444 tn = load_reg(s, a->rn);
9445 tm = load_reg(s, a->rm);
9451 tcg_gen_sari_i32(tm, tm, shift);
9452 tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
9455 tcg_gen_shli_i32(tm, tm, shift);
9456 tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
9458 tcg_temp_free_i32(tm);
9459 store_reg(s, a->rd, tn);
9463 static bool op_sat(DisasContext *s, arg_sat *a,
9464 void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9466 TCGv_i32 tmp, satimm;
9469 if (!ENABLE_ARCH_6) {
9473 tmp = load_reg(s, a->rn);
9475 tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
9477 tcg_gen_shli_i32(tmp, tmp, shift);
9480 satimm = tcg_const_i32(a->satimm);
9481 gen(tmp, cpu_env, tmp, satimm);
9482 tcg_temp_free_i32(satimm);
9484 store_reg(s, a->rd, tmp);
9488 static bool trans_SSAT(DisasContext *s, arg_sat *a)
9490 return op_sat(s, a, gen_helper_ssat);
9493 static bool trans_USAT(DisasContext *s, arg_sat *a)
9495 return op_sat(s, a, gen_helper_usat);
9498 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
9500 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9503 return op_sat(s, a, gen_helper_ssat16);
9506 static bool trans_USAT16(DisasContext *s, arg_sat *a)
9508 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9511 return op_sat(s, a, gen_helper_usat16);
9514 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
9515 void (*gen_extract)(TCGv_i32, TCGv_i32),
9516 void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9520 if (!ENABLE_ARCH_6) {
9524 tmp = load_reg(s, a->rm);
9526 * TODO: In many cases we could do a shift instead of a rotate.
9527 * Combined with a simple extend, that becomes an extract.
9529 tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
9530 gen_extract(tmp, tmp);
9533 TCGv_i32 tmp2 = load_reg(s, a->rn);
9534 gen_add(tmp, tmp, tmp2);
9535 tcg_temp_free_i32(tmp2);
9537 store_reg(s, a->rd, tmp);
9541 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
9543 return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9546 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
9548 return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9551 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
9553 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9556 return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9559 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
9561 return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9564 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
9566 return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9569 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
9571 if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9574 return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9577 static bool trans_SEL(DisasContext *s, arg_rrr *a)
9579 TCGv_i32 t1, t2, t3;
9582 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9587 t1 = load_reg(s, a->rn);
9588 t2 = load_reg(s, a->rm);
9589 t3 = tcg_temp_new_i32();
9590 tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9591 gen_helper_sel_flags(t1, t3, t1, t2);
9592 tcg_temp_free_i32(t3);
9593 tcg_temp_free_i32(t2);
9594 store_reg(s, a->rd, t1);
9598 static bool op_rr(DisasContext *s, arg_rr *a,
9599 void (*gen)(TCGv_i32, TCGv_i32))
9603 tmp = load_reg(s, a->rm);
9605 store_reg(s, a->rd, tmp);
9609 static bool trans_REV(DisasContext *s, arg_rr *a)
9611 if (!ENABLE_ARCH_6) {
9614 return op_rr(s, a, tcg_gen_bswap32_i32);
9617 static bool trans_REV16(DisasContext *s, arg_rr *a)
9619 if (!ENABLE_ARCH_6) {
9622 return op_rr(s, a, gen_rev16);
9625 static bool trans_REVSH(DisasContext *s, arg_rr *a)
9627 if (!ENABLE_ARCH_6) {
9630 return op_rr(s, a, gen_revsh);
9633 static bool trans_RBIT(DisasContext *s, arg_rr *a)
9635 if (!ENABLE_ARCH_6T2) {
9638 return op_rr(s, a, gen_helper_rbit);
9642 * Signed multiply, signed and unsigned divide
9645 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9649 if (!ENABLE_ARCH_6) {
9653 t1 = load_reg(s, a->rn);
9654 t2 = load_reg(s, a->rm);
9658 gen_smul_dual(t1, t2);
9661 /* This subtraction cannot overflow. */
9662 tcg_gen_sub_i32(t1, t1, t2);
9665 * This addition cannot overflow 32 bits; however it may
9666 * overflow considered as a signed operation, in which case
9667 * we must set the Q flag.
9669 gen_helper_add_setq(t1, cpu_env, t1, t2);
9671 tcg_temp_free_i32(t2);
9674 t2 = load_reg(s, a->ra);
9675 gen_helper_add_setq(t1, cpu_env, t1, t2);
9676 tcg_temp_free_i32(t2);
9678 store_reg(s, a->rd, t1);
9682 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9684 return op_smlad(s, a, false, false);
9687 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9689 return op_smlad(s, a, true, false);
9692 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9694 return op_smlad(s, a, false, true);
9697 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9699 return op_smlad(s, a, true, true);
9702 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9707 if (!ENABLE_ARCH_6) {
9711 t1 = load_reg(s, a->rn);
9712 t2 = load_reg(s, a->rm);
9716 gen_smul_dual(t1, t2);
9718 l1 = tcg_temp_new_i64();
9719 l2 = tcg_temp_new_i64();
9720 tcg_gen_ext_i32_i64(l1, t1);
9721 tcg_gen_ext_i32_i64(l2, t2);
9722 tcg_temp_free_i32(t1);
9723 tcg_temp_free_i32(t2);
9726 tcg_gen_sub_i64(l1, l1, l2);
9728 tcg_gen_add_i64(l1, l1, l2);
9730 tcg_temp_free_i64(l2);
9732 gen_addq(s, l1, a->ra, a->rd);
9733 gen_storeq_reg(s, a->ra, a->rd, l1);
9734 tcg_temp_free_i64(l1);
9738 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9740 return op_smlald(s, a, false, false);
9743 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9745 return op_smlald(s, a, true, false);
9748 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
9750 return op_smlald(s, a, false, true);
9753 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
9755 return op_smlald(s, a, true, true);
9758 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
9763 ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9768 t1 = load_reg(s, a->rn);
9769 t2 = load_reg(s, a->rm);
9770 tcg_gen_muls2_i32(t2, t1, t1, t2);
9773 TCGv_i32 t3 = load_reg(s, a->ra);
9776 * For SMMLS, we need a 64-bit subtract. Borrow caused by
9777 * a non-zero multiplicand lowpart, and the correct result
9778 * lowpart for rounding.
9780 TCGv_i32 zero = tcg_const_i32(0);
9781 tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9782 tcg_temp_free_i32(zero);
9784 tcg_gen_add_i32(t1, t1, t3);
9786 tcg_temp_free_i32(t3);
9790 * Adding 0x80000000 to the 64-bit quantity means that we have
9791 * carry in to the high word when the low word has the msb set.
9793 tcg_gen_shri_i32(t2, t2, 31);
9794 tcg_gen_add_i32(t1, t1, t2);
9796 tcg_temp_free_i32(t2);
9797 store_reg(s, a->rd, t1);
9801 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
9803 return op_smmla(s, a, false, false);
9806 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
9808 return op_smmla(s, a, true, false);
9811 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
9813 return op_smmla(s, a, false, true);
9816 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
9818 return op_smmla(s, a, true, true);
9821 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
9826 ? !dc_isar_feature(thumb_div, s)
9827 : !dc_isar_feature(arm_div, s)) {
9831 t1 = load_reg(s, a->rn);
9832 t2 = load_reg(s, a->rm);
9834 gen_helper_udiv(t1, t1, t2);
9836 gen_helper_sdiv(t1, t1, t2);
9838 tcg_temp_free_i32(t2);
9839 store_reg(s, a->rd, t1);
9843 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
9845 return op_div(s, a, false);
9848 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
9850 return op_div(s, a, true);
9854 * Block data transfer
9857 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
9859 TCGv_i32 addr = load_reg(s, a->rn);
9864 tcg_gen_addi_i32(addr, addr, 4);
9867 tcg_gen_addi_i32(addr, addr, -(n * 4));
9869 } else if (!a->i && n != 1) {
9870 /* post decrement */
9871 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9874 if (s->v8m_stackcheck && a->rn == 13 && a->w) {
9876 * If the writeback is incrementing SP rather than
9877 * decrementing it, and the initial SP is below the
9878 * stack limit but the final written-back SP would
9879 * be above, then then we must not perform any memory
9880 * accesses, but it is IMPDEF whether we generate
9881 * an exception. We choose to do so in this case.
9882 * At this point 'addr' is the lowest address, so
9883 * either the original SP (if incrementing) or our
9884 * final SP (if decrementing), so that's what we check.
9886 gen_helper_v8m_stackcheck(cpu_env, addr);
9892 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
9893 TCGv_i32 addr, int n)
9899 /* post increment */
9900 tcg_gen_addi_i32(addr, addr, 4);
9902 /* post decrement */
9903 tcg_gen_addi_i32(addr, addr, -(n * 4));
9905 } else if (!a->i && n != 1) {
9907 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9909 store_reg(s, a->rn, addr);
9911 tcg_temp_free_i32(addr);
9915 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
9917 int i, j, n, list, mem_idx;
9919 TCGv_i32 addr, tmp, tmp2;
9924 /* Only usable in supervisor mode. */
9925 unallocated_encoding(s);
9932 if (n < min_n || a->rn == 15) {
9933 unallocated_encoding(s);
9937 addr = op_addr_block_pre(s, a, n);
9938 mem_idx = get_mem_index(s);
9940 for (i = j = 0; i < 16; i++) {
9941 if (!(list & (1 << i))) {
9945 if (user && i != 15) {
9946 tmp = tcg_temp_new_i32();
9947 tmp2 = tcg_const_i32(i);
9948 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
9949 tcg_temp_free_i32(tmp2);
9951 tmp = load_reg(s, i);
9953 gen_aa32_st32(s, tmp, addr, mem_idx);
9954 tcg_temp_free_i32(tmp);
9956 /* No need to add after the last transfer. */
9958 tcg_gen_addi_i32(addr, addr, 4);
9962 op_addr_block_post(s, a, addr, n);
9966 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
9968 /* BitCount(list) < 1 is UNPREDICTABLE */
9969 return op_stm(s, a, 1);
9972 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
9974 /* Writeback register in register list is UNPREDICTABLE for T32. */
9975 if (a->w && (a->list & (1 << a->rn))) {
9976 unallocated_encoding(s);
9979 /* BitCount(list) < 2 is UNPREDICTABLE */
9980 return op_stm(s, a, 2);
9983 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
9985 int i, j, n, list, mem_idx;
9988 bool exc_return = false;
9989 TCGv_i32 addr, tmp, tmp2, loaded_var;
9992 /* LDM (user), LDM (exception return) */
9994 /* Only usable in supervisor mode. */
9995 unallocated_encoding(s);
9998 if (extract32(a->list, 15, 1)) {
10002 /* LDM (user) does not allow writeback. */
10004 unallocated_encoding(s);
10012 if (n < min_n || a->rn == 15) {
10013 unallocated_encoding(s);
10017 addr = op_addr_block_pre(s, a, n);
10018 mem_idx = get_mem_index(s);
10019 loaded_base = false;
10022 for (i = j = 0; i < 16; i++) {
10023 if (!(list & (1 << i))) {
10027 tmp = tcg_temp_new_i32();
10028 gen_aa32_ld32u(s, tmp, addr, mem_idx);
10030 tmp2 = tcg_const_i32(i);
10031 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
10032 tcg_temp_free_i32(tmp2);
10033 tcg_temp_free_i32(tmp);
10034 } else if (i == a->rn) {
10036 loaded_base = true;
10037 } else if (i == 15 && exc_return) {
10038 store_pc_exc_ret(s, tmp);
10040 store_reg_from_load(s, i, tmp);
10043 /* No need to add after the last transfer. */
10045 tcg_gen_addi_i32(addr, addr, 4);
10049 op_addr_block_post(s, a, addr, n);
10052 /* Note that we reject base == pc above. */
10053 store_reg(s, a->rn, loaded_var);
10057 /* Restore CPSR from SPSR. */
10058 tmp = load_cpu_field(spsr);
10059 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10062 gen_helper_cpsr_write_eret(cpu_env, tmp);
10063 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10066 tcg_temp_free_i32(tmp);
10067 /* Must exit loop to check un-masked IRQs */
10068 s->base.is_jmp = DISAS_EXIT;
10073 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
10076 * Writeback register in register list is UNPREDICTABLE
10077 * for ArchVersion() >= 7. Prior to v7, A32 would write
10078 * an UNKNOWN value to the base register.
10080 if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
10081 unallocated_encoding(s);
10084 /* BitCount(list) < 1 is UNPREDICTABLE */
10085 return do_ldm(s, a, 1);
10088 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
10090 /* Writeback register in register list is UNPREDICTABLE for T32. */
10091 if (a->w && (a->list & (1 << a->rn))) {
10092 unallocated_encoding(s);
10095 /* BitCount(list) < 2 is UNPREDICTABLE */
10096 return do_ldm(s, a, 2);
10099 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
10101 /* Writeback is conditional on the base register not being loaded. */
10102 a->w = !(a->list & (1 << a->rn));
10103 /* BitCount(list) < 1 is UNPREDICTABLE */
10104 return do_ldm(s, a, 1);
10108 * Branch, branch with link
10111 static bool trans_B(DisasContext *s, arg_i *a)
10113 gen_jmp(s, read_pc(s) + a->imm);
10117 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
10119 /* This has cond from encoding, required to be outside IT block. */
10120 if (a->cond >= 0xe) {
10123 if (s->condexec_mask) {
10124 unallocated_encoding(s);
10127 arm_skip_unless(s, a->cond);
10128 gen_jmp(s, read_pc(s) + a->imm);
10132 static bool trans_BL(DisasContext *s, arg_i *a)
10134 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10135 gen_jmp(s, read_pc(s) + a->imm);
10139 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
10141 /* For A32, ARCH(5) is checked near the start of the uncond block. */
10142 if (s->thumb && (a->imm & 2)) {
10145 tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10146 gen_bx_im(s, (read_pc(s) & ~3) + a->imm + !s->thumb);
10150 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
10152 TCGv_i32 addr, tmp;
10154 tmp = load_reg(s, a->rm);
10156 tcg_gen_add_i32(tmp, tmp, tmp);
10158 addr = load_reg(s, a->rn);
10159 tcg_gen_add_i32(addr, addr, tmp);
10161 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10162 half ? MO_UW | s->be_data : MO_UB);
10163 tcg_temp_free_i32(addr);
10165 tcg_gen_add_i32(tmp, tmp, tmp);
10166 tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10167 store_reg(s, 15, tmp);
10171 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
10173 return op_tbranch(s, a, false);
10176 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
10178 return op_tbranch(s, a, true);
10181 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
10183 TCGv_i32 tmp = load_reg(s, a->rn);
10185 arm_gen_condlabel(s);
10186 tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10187 tmp, 0, s->condlabel);
10188 tcg_temp_free_i32(tmp);
10189 gen_jmp(s, read_pc(s) + a->imm);
10197 static bool trans_SVC(DisasContext *s, arg_SVC *a)
10199 gen_set_pc_im(s, s->base.pc_next);
10200 s->svc_imm = a->imm;
10201 s->base.is_jmp = DISAS_SWI;
10206 * Unconditional system instructions
10209 static bool trans_RFE(DisasContext *s, arg_RFE *a)
10211 static const int8_t pre_offset[4] = {
10212 /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
10214 static const int8_t post_offset[4] = {
10215 /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
10217 TCGv_i32 addr, t1, t2;
10219 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10223 unallocated_encoding(s);
10227 addr = load_reg(s, a->rn);
10228 tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10230 /* Load PC into tmp and CPSR into tmp2. */
10231 t1 = tcg_temp_new_i32();
10232 gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10233 tcg_gen_addi_i32(addr, addr, 4);
10234 t2 = tcg_temp_new_i32();
10235 gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10238 /* Base writeback. */
10239 tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10240 store_reg(s, a->rn, addr);
10242 tcg_temp_free_i32(addr);
10244 gen_rfe(s, t1, t2);
10248 static bool trans_SRS(DisasContext *s, arg_SRS *a)
10250 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10253 gen_srs(s, a->mode, a->pu, a->w);
10257 static bool trans_CPS(DisasContext *s, arg_CPS *a)
10259 uint32_t mask, val;
10261 if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10265 /* Implemented as NOP in user mode. */
10268 /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
10290 gen_set_psr_im(s, mask, 0, val);
10295 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
10297 TCGv_i32 tmp, addr;
10299 if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10303 /* Implemented as NOP in user mode. */
10307 tmp = tcg_const_i32(a->im);
10310 addr = tcg_const_i32(19);
10311 gen_helper_v7m_msr(cpu_env, addr, tmp);
10312 tcg_temp_free_i32(addr);
10316 addr = tcg_const_i32(16);
10317 gen_helper_v7m_msr(cpu_env, addr, tmp);
10318 tcg_temp_free_i32(addr);
10320 tcg_temp_free_i32(tmp);
10326 * Clear-Exclusive, Barriers
10329 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
10332 ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10333 : !ENABLE_ARCH_6K) {
10340 static bool trans_DSB(DisasContext *s, arg_DSB *a)
10342 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10345 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10349 static bool trans_DMB(DisasContext *s, arg_DMB *a)
10351 return trans_DSB(s, NULL);
10354 static bool trans_ISB(DisasContext *s, arg_ISB *a)
10356 if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10360 * We need to break the TB after this insn to execute
10361 * self-modifying code correctly and also to take
10362 * any pending interrupts immediately.
10364 gen_goto_tb(s, 0, s->base.pc_next);
10368 static bool trans_SB(DisasContext *s, arg_SB *a)
10370 if (!dc_isar_feature(aa32_sb, s)) {
10374 * TODO: There is no speculation barrier opcode
10375 * for TCG; MB and end the TB instead.
10377 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10378 gen_goto_tb(s, 0, s->base.pc_next);
10382 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
10384 if (!ENABLE_ARCH_6) {
10387 if (a->E != (s->be_data == MO_BE)) {
10388 gen_helper_setend(cpu_env);
10389 s->base.is_jmp = DISAS_UPDATE;
10395 * Preload instructions
10396 * All are nops, contingent on the appropriate arch level.
10399 static bool trans_PLD(DisasContext *s, arg_PLD *a)
10401 return ENABLE_ARCH_5TE;
10404 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
10406 return arm_dc_feature(s, ARM_FEATURE_V7MP);
10409 static bool trans_PLI(DisasContext *s, arg_PLD *a)
10411 return ENABLE_ARCH_7;
10418 static bool trans_IT(DisasContext *s, arg_IT *a)
10420 int cond_mask = a->cond_mask;
10423 * No actual code generated for this insn, just setup state.
10425 * Combinations of firstcond and mask which set up an 0b1111
10426 * condition are UNPREDICTABLE; we take the CONSTRAINED
10427 * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10428 * i.e. both meaning "execute always".
10430 s->condexec_cond = (cond_mask >> 4) & 0xe;
10431 s->condexec_mask = cond_mask & 0x1f;
10439 static void disas_arm_insn(DisasContext *s, unsigned int insn)
10441 unsigned int cond = insn >> 28;
10443 /* M variants do not implement ARM mode; this must raise the INVSTATE
10444 * UsageFault exception.
10446 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10447 gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10448 default_exception_el(s));
10453 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
10454 * choose to UNDEF. In ARMv5 and above the space is used
10455 * for miscellaneous unconditional instructions.
10459 /* Unconditional instructions. */
10460 if (disas_a32_uncond(s, insn)) {
10463 /* fall back to legacy decoder */
10465 if (((insn >> 25) & 7) == 1) {
10466 /* NEON Data processing. */
10467 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
10471 if (disas_neon_data_insn(s, insn)) {
10476 if ((insn & 0x0f100000) == 0x04000000) {
10477 /* NEON load/store. */
10478 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
10482 if (disas_neon_ls_insn(s, insn)) {
10487 if ((insn & 0x0f000e10) == 0x0e000a00) {
10489 if (disas_vfp_insn(s, insn)) {
10494 if ((insn & 0x0e000f00) == 0x0c000100) {
10495 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10496 /* iWMMXt register transfer. */
10497 if (extract32(s->c15_cpar, 1, 1)) {
10498 if (!disas_iwmmxt_insn(s, insn)) {
10503 } else if ((insn & 0x0e000a00) == 0x0c000800
10504 && arm_dc_feature(s, ARM_FEATURE_V8)) {
10505 if (disas_neon_insn_3same_ext(s, insn)) {
10509 } else if ((insn & 0x0f000a00) == 0x0e000800
10510 && arm_dc_feature(s, ARM_FEATURE_V8)) {
10511 if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
10519 /* if not always execute, we generate a conditional jump to
10520 next instruction */
10521 arm_skip_unless(s, cond);
10524 if (disas_a32(s, insn)) {
10527 /* fall back to legacy decoder */
10529 switch ((insn >> 24) & 0xf) {
10533 if (((insn >> 8) & 0xe) == 10) {
10535 if (disas_vfp_insn(s, insn)) {
10538 } else if (disas_coproc_insn(s, insn)) {
10545 unallocated_encoding(s);
10550 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10553 * Return true if this is a 16 bit instruction. We must be precise
10554 * about this (matching the decode).
10556 if ((insn >> 11) < 0x1d) {
10557 /* Definitely a 16-bit instruction */
10561 /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10562 * first half of a 32-bit Thumb insn. Thumb-1 cores might
10563 * end up actually treating this as two 16-bit insns, though,
10564 * if it's half of a bl/blx pair that might span a page boundary.
10566 if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10567 arm_dc_feature(s, ARM_FEATURE_M)) {
10568 /* Thumb2 cores (including all M profile ones) always treat
10569 * 32-bit insns as 32-bit.
10574 if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10575 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10576 * is not on the next page; we merge this into a 32-bit
10581 /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10582 * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10583 * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10584 * -- handle as single 16 bit insn
10589 /* Translate a 32-bit thumb instruction. */
10590 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10593 * ARMv6-M supports a limited subset of Thumb2 instructions.
10594 * Other Thumb1 architectures allow only 32-bit
10595 * combined BL/BLX prefix and suffix.
10597 if (arm_dc_feature(s, ARM_FEATURE_M) &&
10598 !arm_dc_feature(s, ARM_FEATURE_V7)) {
10600 bool found = false;
10601 static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10602 0xf3b08040 /* dsb */,
10603 0xf3b08050 /* dmb */,
10604 0xf3b08060 /* isb */,
10605 0xf3e08000 /* mrs */,
10606 0xf000d000 /* bl */};
10607 static const uint32_t armv6m_mask[] = {0xffe0d000,
10614 for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10615 if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10623 } else if ((insn & 0xf800e800) != 0xf000e800) {
10627 if (disas_t32(s, insn)) {
10630 /* fall back to legacy decoder */
10632 switch ((insn >> 25) & 0xf) {
10633 case 0: case 1: case 2: case 3:
10634 /* 16-bit instructions. Should never happen. */
10636 case 6: case 7: case 14: case 15:
10638 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10639 /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10640 if (extract32(insn, 24, 2) == 3) {
10641 goto illegal_op; /* op0 = 0b11 : unallocated */
10645 * Decode VLLDM and VLSTM first: these are nonstandard because:
10646 * * if there is no FPU then these insns must NOP in
10647 * Secure state and UNDEF in Nonsecure state
10648 * * if there is an FPU then these insns do not have
10649 * the usual behaviour that disas_vfp_insn() provides of
10650 * being controlled by CPACR/NSACR enable bits or the
10651 * lazy-stacking logic.
10653 if (arm_dc_feature(s, ARM_FEATURE_V8) &&
10654 (insn & 0xffa00f00) == 0xec200a00) {
10655 /* 0b1110_1100_0x1x_xxxx_xxxx_1010_xxxx_xxxx
10657 * We choose to UNDEF if the RAZ bits are non-zero.
10659 if (!s->v8m_secure || (insn & 0x0040f0ff)) {
10663 if (arm_dc_feature(s, ARM_FEATURE_VFP)) {
10664 uint32_t rn = (insn >> 16) & 0xf;
10665 TCGv_i32 fptr = load_reg(s, rn);
10667 if (extract32(insn, 20, 1)) {
10668 gen_helper_v7m_vlldm(cpu_env, fptr);
10670 gen_helper_v7m_vlstm(cpu_env, fptr);
10672 tcg_temp_free_i32(fptr);
10674 /* End the TB, because we have updated FP control bits */
10675 s->base.is_jmp = DISAS_UPDATE;
10679 if (arm_dc_feature(s, ARM_FEATURE_VFP) &&
10680 ((insn >> 8) & 0xe) == 10) {
10681 /* FP, and the CPU supports it */
10682 if (disas_vfp_insn(s, insn)) {
10688 /* All other insns: NOCP */
10689 gen_exception_insn(s, s->pc_curr, EXCP_NOCP, syn_uncategorized(),
10690 default_exception_el(s));
10693 if ((insn & 0xfe000a00) == 0xfc000800
10694 && arm_dc_feature(s, ARM_FEATURE_V8)) {
10695 /* The Thumb2 and ARM encodings are identical. */
10696 if (disas_neon_insn_3same_ext(s, insn)) {
10699 } else if ((insn & 0xff000a00) == 0xfe000800
10700 && arm_dc_feature(s, ARM_FEATURE_V8)) {
10701 /* The Thumb2 and ARM encodings are identical. */
10702 if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
10705 } else if (((insn >> 24) & 3) == 3) {
10706 /* Translate into the equivalent ARM encoding. */
10707 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10708 if (disas_neon_data_insn(s, insn)) {
10711 } else if (((insn >> 8) & 0xe) == 10) {
10712 if (disas_vfp_insn(s, insn)) {
10716 if (insn & (1 << 28))
10718 if (disas_coproc_insn(s, insn)) {
10724 if ((insn & 0x01100000) == 0x01000000) {
10725 if (disas_neon_ls_insn(s, insn)) {
10733 unallocated_encoding(s);
10737 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10744 if (disas_t16(s, insn)) {
10747 /* fall back to legacy decoder */
10749 switch (insn >> 12) {
10750 case 0: case 1: /* add/sub (3reg, 2reg imm), shift imm; in decodetree */
10751 case 2: case 3: /* add, sub, cmp, mov (reg, imm), in decodetree */
10752 case 4: /* ldr lit, data proc (2reg), data proc ext, bx; in decodetree */
10753 case 5: /* load/store register offset, in decodetree */
10754 case 6: /* load/store word immediate offset, in decodetree */
10755 case 7: /* load/store byte immediate offset, in decodetree */
10756 case 8: /* load/store halfword immediate offset, in decodetree */
10757 case 9: /* load/store from stack, in decodetree */
10758 case 10: /* add PC/SP (immediate), in decodetree */
10759 case 11: /* misc, in decodetree */
10760 case 12: /* load/store multiple, in decodetree */
10761 case 13: /* conditional branch or swi, in decodetree */
10765 if (insn & (1 << 11)) {
10766 /* thumb_insn_is_16bit() ensures we can't get here for
10767 * a Thumb2 CPU, so this must be a thumb1 split BL/BLX:
10768 * 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF)
10770 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10772 offset = ((insn & 0x7ff) << 1);
10773 tmp = load_reg(s, 14);
10774 tcg_gen_addi_i32(tmp, tmp, offset);
10775 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
10777 tmp2 = tcg_temp_new_i32();
10778 tcg_gen_movi_i32(tmp2, s->base.pc_next | 1);
10779 store_reg(s, 14, tmp2);
10783 /* unconditional branch */
10785 offset = ((int32_t)insn << 21) >> 21;
10786 val += offset << 1;
10791 /* thumb_insn_is_16bit() ensures we can't get here for
10792 * a Thumb2 CPU, so this must be a thumb1 split BL/BLX.
10794 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10796 if (insn & (1 << 11)) {
10797 /* 0b1111_1xxx_xxxx_xxxx : BL suffix */
10798 offset = ((insn & 0x7ff) << 1) | 1;
10799 tmp = load_reg(s, 14);
10800 tcg_gen_addi_i32(tmp, tmp, offset);
10802 tmp2 = tcg_temp_new_i32();
10803 tcg_gen_movi_i32(tmp2, s->base.pc_next | 1);
10804 store_reg(s, 14, tmp2);
10807 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix */
10808 uint32_t uoffset = ((int32_t)insn << 21) >> 9;
10810 tcg_gen_movi_i32(cpu_R[14], read_pc(s) + uoffset);
10816 unallocated_encoding(s);
10819 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
10821 /* Return true if the insn at dc->base.pc_next might cross a page boundary.
10822 * (False positives are OK, false negatives are not.)
10823 * We know this is a Thumb insn, and our caller ensures we are
10824 * only called if dc->base.pc_next is less than 4 bytes from the page
10825 * boundary, so we cross the page if the first 16 bits indicate
10826 * that this is a 32 bit insn.
10828 uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10830 return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10833 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
10835 DisasContext *dc = container_of(dcbase, DisasContext, base);
10836 CPUARMState *env = cs->env_ptr;
10837 ARMCPU *cpu = env_archcpu(env);
10838 uint32_t tb_flags = dc->base.tb->flags;
10839 uint32_t condexec, core_mmu_idx;
10841 dc->isar = &cpu->isar;
10845 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10846 * there is no secure EL1, so we route exceptions to EL3.
10848 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10849 !arm_el_is_aa64(env, 3);
10850 dc->thumb = FIELD_EX32(tb_flags, TBFLAG_A32, THUMB);
10851 dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10852 dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10853 condexec = FIELD_EX32(tb_flags, TBFLAG_A32, CONDEXEC);
10854 dc->condexec_mask = (condexec & 0xf) << 1;
10855 dc->condexec_cond = condexec >> 4;
10856 core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10857 dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10858 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10859 #if !defined(CONFIG_USER_ONLY)
10860 dc->user = (dc->current_el == 0);
10862 dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10863 dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10864 dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10865 dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10866 if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10867 dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10868 dc->vec_stride = 0;
10870 dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10873 dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_A32, HANDLER);
10874 dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10875 regime_is_secure(env, dc->mmu_idx);
10876 dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_A32, STACKCHECK);
10877 dc->v8m_fpccr_s_wrong = FIELD_EX32(tb_flags, TBFLAG_A32, FPCCR_S_WRONG);
10878 dc->v7m_new_fp_ctxt_needed =
10879 FIELD_EX32(tb_flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED);
10880 dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_A32, LSPACT);
10881 dc->cp_regs = cpu->cp_regs;
10882 dc->features = env->features;
10884 /* Single step state. The code-generation logic here is:
10886 * generate code with no special handling for single-stepping (except
10887 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10888 * this happens anyway because those changes are all system register or
10890 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10891 * emit code for one insn
10892 * emit code to clear PSTATE.SS
10893 * emit code to generate software step exception for completed step
10894 * end TB (as usual for having generated an exception)
10895 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10896 * emit code to generate a software step exception
10899 dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
10900 dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
10901 dc->is_ldex = false;
10902 if (!arm_feature(env, ARM_FEATURE_M)) {
10903 dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10906 dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
10908 /* If architectural single step active, limit to 1. */
10909 if (is_singlestepping(dc)) {
10910 dc->base.max_insns = 1;
10913 /* ARM is a fixed-length ISA. Bound the number of insns to execute
10914 to those left on the page. */
10916 int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10917 dc->base.max_insns = MIN(dc->base.max_insns, bound);
10920 cpu_V0 = tcg_temp_new_i64();
10921 cpu_V1 = tcg_temp_new_i64();
10922 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
10923 cpu_M0 = tcg_temp_new_i64();
10926 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
10928 DisasContext *dc = container_of(dcbase, DisasContext, base);
10930 /* A note on handling of the condexec (IT) bits:
10932 * We want to avoid the overhead of having to write the updated condexec
10933 * bits back to the CPUARMState for every instruction in an IT block. So:
10934 * (1) if the condexec bits are not already zero then we write
10935 * zero back into the CPUARMState now. This avoids complications trying
10936 * to do it at the end of the block. (For example if we don't do this
10937 * it's hard to identify whether we can safely skip writing condexec
10938 * at the end of the TB, which we definitely want to do for the case
10939 * where a TB doesn't do anything with the IT state at all.)
10940 * (2) if we are going to leave the TB then we call gen_set_condexec()
10941 * which will write the correct value into CPUARMState if zero is wrong.
10942 * This is done both for leaving the TB at the end, and for leaving
10943 * it because of an exception we know will happen, which is done in
10944 * gen_exception_insn(). The latter is necessary because we need to
10945 * leave the TB with the PC/IT state just prior to execution of the
10946 * instruction which caused the exception.
10947 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
10948 * then the CPUARMState will be wrong and we need to reset it.
10949 * This is handled in the same way as restoration of the
10950 * PC in these situations; we save the value of the condexec bits
10951 * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
10952 * then uses this to restore them after an exception.
10954 * Note that there are no instructions which can read the condexec
10955 * bits, and none which can write non-static values to them, so
10956 * we don't need to care about whether CPUARMState is correct in the
10960 /* Reset the conditional execution bits immediately. This avoids
10961 complications trying to do it at the end of the block. */
10962 if (dc->condexec_mask || dc->condexec_cond) {
10963 TCGv_i32 tmp = tcg_temp_new_i32();
10964 tcg_gen_movi_i32(tmp, 0);
10965 store_cpu_field(tmp, condexec_bits);
10969 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10971 DisasContext *dc = container_of(dcbase, DisasContext, base);
10973 tcg_gen_insn_start(dc->base.pc_next,
10974 (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
10976 dc->insn_start = tcg_last_op();
10979 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
10980 const CPUBreakpoint *bp)
10982 DisasContext *dc = container_of(dcbase, DisasContext, base);
10984 if (bp->flags & BP_CPU) {
10985 gen_set_condexec(dc);
10986 gen_set_pc_im(dc, dc->base.pc_next);
10987 gen_helper_check_breakpoints(cpu_env);
10988 /* End the TB early; it's likely not going to be executed */
10989 dc->base.is_jmp = DISAS_TOO_MANY;
10991 gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
10992 /* The address covered by the breakpoint must be
10993 included in [tb->pc, tb->pc + tb->size) in order
10994 to for it to be properly cleared -- thus we
10995 increment the PC here so that the logic setting
10996 tb->size below does the right thing. */
10997 /* TODO: Advance PC by correct instruction length to
10998 * avoid disassembler error messages */
10999 dc->base.pc_next += 2;
11000 dc->base.is_jmp = DISAS_NORETURN;
11006 static bool arm_pre_translate_insn(DisasContext *dc)
11008 #ifdef CONFIG_USER_ONLY
11009 /* Intercept jump to the magic kernel page. */
11010 if (dc->base.pc_next >= 0xffff0000) {
11011 /* We always get here via a jump, so know we are not in a
11012 conditional execution block. */
11013 gen_exception_internal(EXCP_KERNEL_TRAP);
11014 dc->base.is_jmp = DISAS_NORETURN;
11019 if (dc->ss_active && !dc->pstate_ss) {
11020 /* Singlestep state is Active-pending.
11021 * If we're in this state at the start of a TB then either
11022 * a) we just took an exception to an EL which is being debugged
11023 * and this is the first insn in the exception handler
11024 * b) debug exceptions were masked and we just unmasked them
11025 * without changing EL (eg by clearing PSTATE.D)
11026 * In either case we're going to take a swstep exception in the
11027 * "did not step an insn" case, and so the syndrome ISV and EX
11028 * bits should be zero.
11030 assert(dc->base.num_insns == 1);
11031 gen_swstep_exception(dc, 0, 0);
11032 dc->base.is_jmp = DISAS_NORETURN;
11039 static void arm_post_translate_insn(DisasContext *dc)
11041 if (dc->condjmp && !dc->base.is_jmp) {
11042 gen_set_label(dc->condlabel);
11045 translator_loop_temp_check(&dc->base);
11048 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11050 DisasContext *dc = container_of(dcbase, DisasContext, base);
11051 CPUARMState *env = cpu->env_ptr;
11054 if (arm_pre_translate_insn(dc)) {
11058 dc->pc_curr = dc->base.pc_next;
11059 insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
11061 dc->base.pc_next += 4;
11062 disas_arm_insn(dc, insn);
11064 arm_post_translate_insn(dc);
11066 /* ARM is a fixed-length ISA. We performed the cross-page check
11067 in init_disas_context by adjusting max_insns. */
11070 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
11072 /* Return true if this Thumb insn is always unconditional,
11073 * even inside an IT block. This is true of only a very few
11074 * instructions: BKPT, HLT, and SG.
11076 * A larger class of instructions are UNPREDICTABLE if used
11077 * inside an IT block; we do not need to detect those here, because
11078 * what we do by default (perform the cc check and update the IT
11079 * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
11080 * choice for those situations.
11082 * insn is either a 16-bit or a 32-bit instruction; the two are
11083 * distinguishable because for the 16-bit case the top 16 bits
11084 * are zeroes, and that isn't a valid 32-bit encoding.
11086 if ((insn & 0xffffff00) == 0xbe00) {
11091 if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
11092 !arm_dc_feature(s, ARM_FEATURE_M)) {
11093 /* HLT: v8A only. This is unconditional even when it is going to
11094 * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
11095 * For v7 cores this was a plain old undefined encoding and so
11096 * honours its cc check. (We might be using the encoding as
11097 * a semihosting trap, but we don't change the cc check behaviour
11098 * on that account, because a debugger connected to a real v7A
11099 * core and emulating semihosting traps by catching the UNDEF
11100 * exception would also only see cases where the cc check passed.
11101 * No guest code should be trying to do a HLT semihosting trap
11102 * in an IT block anyway.
11107 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
11108 arm_dc_feature(s, ARM_FEATURE_M)) {
11116 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
11118 DisasContext *dc = container_of(dcbase, DisasContext, base);
11119 CPUARMState *env = cpu->env_ptr;
11123 if (arm_pre_translate_insn(dc)) {
11127 dc->pc_curr = dc->base.pc_next;
11128 insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11129 is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
11130 dc->base.pc_next += 2;
11132 uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11134 insn = insn << 16 | insn2;
11135 dc->base.pc_next += 2;
11139 if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
11140 uint32_t cond = dc->condexec_cond;
11143 * Conditionally skip the insn. Note that both 0xe and 0xf mean
11144 * "always"; 0xf is not "never".
11147 arm_skip_unless(dc, cond);
11152 disas_thumb_insn(dc, insn);
11154 disas_thumb2_insn(dc, insn);
11157 /* Advance the Thumb condexec condition. */
11158 if (dc->condexec_mask) {
11159 dc->condexec_cond = ((dc->condexec_cond & 0xe) |
11160 ((dc->condexec_mask >> 4) & 1));
11161 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11162 if (dc->condexec_mask == 0) {
11163 dc->condexec_cond = 0;
11167 arm_post_translate_insn(dc);
11169 /* Thumb is a variable-length ISA. Stop translation when the next insn
11170 * will touch a new page. This ensures that prefetch aborts occur at
11173 * We want to stop the TB if the next insn starts in a new page,
11174 * or if it spans between this page and the next. This means that
11175 * if we're looking at the last halfword in the page we need to
11176 * see if it's a 16-bit Thumb insn (which will fit in this TB)
11177 * or a 32-bit Thumb insn (which won't).
11178 * This is to avoid generating a silly TB with a single 16-bit insn
11179 * in it at the end of this page (which would execute correctly
11180 * but isn't very efficient).
11182 if (dc->base.is_jmp == DISAS_NEXT
11183 && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11184 || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
11185 && insn_crosses_page(env, dc)))) {
11186 dc->base.is_jmp = DISAS_TOO_MANY;
11190 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11192 DisasContext *dc = container_of(dcbase, DisasContext, base);
11194 if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11195 /* FIXME: This can theoretically happen with self-modifying code. */
11196 cpu_abort(cpu, "IO on conditional branch instruction");
11199 /* At this stage dc->condjmp will only be set when the skipped
11200 instruction was a conditional branch or trap, and the PC has
11201 already been written. */
11202 gen_set_condexec(dc);
11203 if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11204 /* Exception return branches need some special case code at the
11205 * end of the TB, which is complex enough that it has to
11206 * handle the single-step vs not and the condition-failed
11207 * insn codepath itself.
11209 gen_bx_excret_final_code(dc);
11210 } else if (unlikely(is_singlestepping(dc))) {
11211 /* Unconditional and "condition passed" instruction codepath. */
11212 switch (dc->base.is_jmp) {
11214 gen_ss_advance(dc);
11215 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11216 default_exception_el(dc));
11219 gen_ss_advance(dc);
11220 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11223 gen_ss_advance(dc);
11224 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11227 case DISAS_TOO_MANY:
11229 gen_set_pc_im(dc, dc->base.pc_next);
11232 /* FIXME: Single stepping a WFI insn will not halt the CPU. */
11233 gen_singlestep_exception(dc);
11235 case DISAS_NORETURN:
11239 /* While branches must always occur at the end of an IT block,
11240 there are a few other things that can cause us to terminate
11241 the TB in the middle of an IT block:
11242 - Exception generating instructions (bkpt, swi, undefined).
11244 - Hardware watchpoints.
11245 Hardware breakpoints have already been handled and skip this code.
11247 switch(dc->base.is_jmp) {
11249 case DISAS_TOO_MANY:
11250 gen_goto_tb(dc, 1, dc->base.pc_next);
11256 gen_set_pc_im(dc, dc->base.pc_next);
11259 /* indicate that the hash table must be used to find the next TB */
11260 tcg_gen_exit_tb(NULL, 0);
11262 case DISAS_NORETURN:
11263 /* nothing more to generate */
11267 TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11268 !(dc->insn & (1U << 31))) ? 2 : 4);
11270 gen_helper_wfi(cpu_env, tmp);
11271 tcg_temp_free_i32(tmp);
11272 /* The helper doesn't necessarily throw an exception, but we
11273 * must go back to the main loop to check for interrupts anyway.
11275 tcg_gen_exit_tb(NULL, 0);
11279 gen_helper_wfe(cpu_env);
11282 gen_helper_yield(cpu_env);
11285 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11286 default_exception_el(dc));
11289 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11292 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11298 /* "Condition failed" instruction codepath for the branch/trap insn */
11299 gen_set_label(dc->condlabel);
11300 gen_set_condexec(dc);
11301 if (unlikely(is_singlestepping(dc))) {
11302 gen_set_pc_im(dc, dc->base.pc_next);
11303 gen_singlestep_exception(dc);
11305 gen_goto_tb(dc, 1, dc->base.pc_next);
11310 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
11312 DisasContext *dc = container_of(dcbase, DisasContext, base);
11314 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11315 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11318 static const TranslatorOps arm_translator_ops = {
11319 .init_disas_context = arm_tr_init_disas_context,
11320 .tb_start = arm_tr_tb_start,
11321 .insn_start = arm_tr_insn_start,
11322 .breakpoint_check = arm_tr_breakpoint_check,
11323 .translate_insn = arm_tr_translate_insn,
11324 .tb_stop = arm_tr_tb_stop,
11325 .disas_log = arm_tr_disas_log,
11328 static const TranslatorOps thumb_translator_ops = {
11329 .init_disas_context = arm_tr_init_disas_context,
11330 .tb_start = arm_tr_tb_start,
11331 .insn_start = arm_tr_insn_start,
11332 .breakpoint_check = arm_tr_breakpoint_check,
11333 .translate_insn = thumb_tr_translate_insn,
11334 .tb_stop = arm_tr_tb_stop,
11335 .disas_log = arm_tr_disas_log,
11338 /* generate intermediate code for basic block 'tb'. */
11339 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
11342 const TranslatorOps *ops = &arm_translator_ops;
11344 if (FIELD_EX32(tb->flags, TBFLAG_A32, THUMB)) {
11345 ops = &thumb_translator_ops;
11347 #ifdef TARGET_AARCH64
11348 if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11349 ops = &aarch64_translator_ops;
11353 translator_loop(ops, &dc.base, cpu, tb, max_insns);
11356 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11357 target_ulong *data)
11361 env->condexec_bits = 0;
11362 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11364 env->regs[15] = data[0];
11365 env->condexec_bits = data[1];
11366 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;