4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include "qemu/osdep.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
28 #include "tcg-op-gvec.h"
30 #include "qemu/bitops.h"
31 #include "qemu/qemu-print.h"
33 #include "hw/semihosting/semihost.h"
35 #include "exec/helper-proto.h"
36 #include "exec/helper-gen.h"
38 #include "trace-tcg.h"
42 #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
43 #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
44 /* currently all emulated v5 cores are also v5TE, so don't bother */
45 #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
46 #define ENABLE_ARCH_5J dc_isar_feature(jazelle, s)
47 #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
48 #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
49 #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
50 #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
51 #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
53 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
55 #include "translate.h"
57 #if defined(CONFIG_USER_ONLY)
60 #define IS_USER(s) (s->user)
63 /* We reuse the same 64-bit temporaries for efficiency. */
64 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
65 static TCGv_i32 cpu_R[16];
66 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
67 TCGv_i64 cpu_exclusive_addr;
68 TCGv_i64 cpu_exclusive_val;
70 /* FIXME: These should be removed. */
71 static TCGv_i32 cpu_F0s, cpu_F1s;
72 static TCGv_i64 cpu_F0d, cpu_F1d;
74 #include "exec/gen-icount.h"
76 static const char * const regnames[] =
77 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
78 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
80 /* Function prototypes for gen_ functions calling Neon helpers. */
81 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
84 /* initialize TCG globals. */
85 void arm_translate_init(void)
89 for (i = 0; i < 16; i++) {
90 cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
91 offsetof(CPUARMState, regs[i]),
94 cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
95 cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
96 cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
97 cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
99 cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
100 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
101 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
102 offsetof(CPUARMState, exclusive_val), "exclusive_val");
104 a64_translate_init();
107 /* Flags for the disas_set_da_iss info argument:
108 * lower bits hold the Rt register number, higher bits are flags.
110 typedef enum ISSInfo {
113 ISSInvalid = (1 << 5),
114 ISSIsAcqRel = (1 << 6),
115 ISSIsWrite = (1 << 7),
116 ISSIs16Bit = (1 << 8),
119 /* Save the syndrome information for a Data Abort */
120 static void disas_set_da_iss(DisasContext *s, TCGMemOp memop, ISSInfo issinfo)
123 int sas = memop & MO_SIZE;
124 bool sse = memop & MO_SIGN;
125 bool is_acqrel = issinfo & ISSIsAcqRel;
126 bool is_write = issinfo & ISSIsWrite;
127 bool is_16bit = issinfo & ISSIs16Bit;
128 int srt = issinfo & ISSRegMask;
130 if (issinfo & ISSInvalid) {
131 /* Some callsites want to conditionally provide ISS info,
132 * eg "only if this was not a writeback"
138 /* For AArch32, insns where the src/dest is R15 never generate
139 * ISS information. Catching that here saves checking at all
145 syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
146 0, 0, 0, is_write, 0, is_16bit);
147 disas_set_insn_syndrome(s, syn);
150 static inline int get_a32_user_mem_index(DisasContext *s)
152 /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
154 * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
155 * otherwise, access as if at PL0.
157 switch (s->mmu_idx) {
158 case ARMMMUIdx_S1E2: /* this one is UNPREDICTABLE */
159 case ARMMMUIdx_S12NSE0:
160 case ARMMMUIdx_S12NSE1:
161 return arm_to_core_mmu_idx(ARMMMUIdx_S12NSE0);
163 case ARMMMUIdx_S1SE0:
164 case ARMMMUIdx_S1SE1:
165 return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0);
166 case ARMMMUIdx_MUser:
167 case ARMMMUIdx_MPriv:
168 return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
169 case ARMMMUIdx_MUserNegPri:
170 case ARMMMUIdx_MPrivNegPri:
171 return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
172 case ARMMMUIdx_MSUser:
173 case ARMMMUIdx_MSPriv:
174 return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
175 case ARMMMUIdx_MSUserNegPri:
176 case ARMMMUIdx_MSPrivNegPri:
177 return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
180 g_assert_not_reached();
184 static inline TCGv_i32 load_cpu_offset(int offset)
186 TCGv_i32 tmp = tcg_temp_new_i32();
187 tcg_gen_ld_i32(tmp, cpu_env, offset);
191 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
193 static inline void store_cpu_offset(TCGv_i32 var, int offset)
195 tcg_gen_st_i32(var, cpu_env, offset);
196 tcg_temp_free_i32(var);
199 #define store_cpu_field(var, name) \
200 store_cpu_offset(var, offsetof(CPUARMState, name))
202 /* Set a variable to the value of a CPU register. */
203 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
207 /* normally, since we updated PC, we need only to add one insn */
209 addr = (long)s->pc + 2;
211 addr = (long)s->pc + 4;
212 tcg_gen_movi_i32(var, addr);
214 tcg_gen_mov_i32(var, cpu_R[reg]);
218 /* Create a new temporary and set it to the value of a CPU register. */
219 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
221 TCGv_i32 tmp = tcg_temp_new_i32();
222 load_reg_var(s, tmp, reg);
226 /* Set a CPU register. The source must be a temporary and will be
228 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
231 /* In Thumb mode, we must ignore bit 0.
232 * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
233 * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
234 * We choose to ignore [1:0] in ARM mode for all architecture versions.
236 tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
237 s->base.is_jmp = DISAS_JUMP;
239 tcg_gen_mov_i32(cpu_R[reg], var);
240 tcg_temp_free_i32(var);
244 * Variant of store_reg which applies v8M stack-limit checks before updating
245 * SP. If the check fails this will result in an exception being taken.
246 * We disable the stack checks for CONFIG_USER_ONLY because we have
247 * no idea what the stack limits should be in that case.
248 * If stack checking is not being done this just acts like store_reg().
250 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
252 #ifndef CONFIG_USER_ONLY
253 if (s->v8m_stackcheck) {
254 gen_helper_v8m_stackcheck(cpu_env, var);
257 store_reg(s, 13, var);
260 /* Value extensions. */
261 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
262 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
263 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
264 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
266 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
267 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
270 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
272 TCGv_i32 tmp_mask = tcg_const_i32(mask);
273 gen_helper_cpsr_write(cpu_env, var, tmp_mask);
274 tcg_temp_free_i32(tmp_mask);
276 /* Set NZCV flags from the high 4 bits of var. */
277 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
279 static void gen_exception_internal(int excp)
281 TCGv_i32 tcg_excp = tcg_const_i32(excp);
283 assert(excp_is_internal(excp));
284 gen_helper_exception_internal(cpu_env, tcg_excp);
285 tcg_temp_free_i32(tcg_excp);
288 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
290 TCGv_i32 tcg_excp = tcg_const_i32(excp);
291 TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
292 TCGv_i32 tcg_el = tcg_const_i32(target_el);
294 gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
297 tcg_temp_free_i32(tcg_el);
298 tcg_temp_free_i32(tcg_syn);
299 tcg_temp_free_i32(tcg_excp);
302 static void gen_step_complete_exception(DisasContext *s)
304 /* We just completed step of an insn. Move from Active-not-pending
305 * to Active-pending, and then also take the swstep exception.
306 * This corresponds to making the (IMPDEF) choice to prioritize
307 * swstep exceptions over asynchronous exceptions taken to an exception
308 * level where debug is disabled. This choice has the advantage that
309 * we do not need to maintain internal state corresponding to the
310 * ISV/EX syndrome bits between completion of the step and generation
311 * of the exception, and our syndrome information is always correct.
314 gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
315 default_exception_el(s));
316 s->base.is_jmp = DISAS_NORETURN;
319 static void gen_singlestep_exception(DisasContext *s)
321 /* Generate the right kind of exception for singlestep, which is
322 * either the architectural singlestep or EXCP_DEBUG for QEMU's
323 * gdb singlestepping.
326 gen_step_complete_exception(s);
328 gen_exception_internal(EXCP_DEBUG);
332 static inline bool is_singlestepping(DisasContext *s)
334 /* Return true if we are singlestepping either because of
335 * architectural singlestep or QEMU gdbstub singlestep. This does
336 * not include the command line '-singlestep' mode which is rather
337 * misnamed as it only means "one instruction per TB" and doesn't
338 * affect the code we generate.
340 return s->base.singlestep_enabled || s->ss_active;
343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
345 TCGv_i32 tmp1 = tcg_temp_new_i32();
346 TCGv_i32 tmp2 = tcg_temp_new_i32();
347 tcg_gen_ext16s_i32(tmp1, a);
348 tcg_gen_ext16s_i32(tmp2, b);
349 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
350 tcg_temp_free_i32(tmp2);
351 tcg_gen_sari_i32(a, a, 16);
352 tcg_gen_sari_i32(b, b, 16);
353 tcg_gen_mul_i32(b, b, a);
354 tcg_gen_mov_i32(a, tmp1);
355 tcg_temp_free_i32(tmp1);
358 /* Byteswap each halfword. */
359 static void gen_rev16(TCGv_i32 var)
361 TCGv_i32 tmp = tcg_temp_new_i32();
362 TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
363 tcg_gen_shri_i32(tmp, var, 8);
364 tcg_gen_and_i32(tmp, tmp, mask);
365 tcg_gen_and_i32(var, var, mask);
366 tcg_gen_shli_i32(var, var, 8);
367 tcg_gen_or_i32(var, var, tmp);
368 tcg_temp_free_i32(mask);
369 tcg_temp_free_i32(tmp);
372 /* Byteswap low halfword and sign extend. */
373 static void gen_revsh(TCGv_i32 var)
375 tcg_gen_ext16u_i32(var, var);
376 tcg_gen_bswap16_i32(var, var);
377 tcg_gen_ext16s_i32(var, var);
380 /* Return (b << 32) + a. Mark inputs as dead */
381 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b)
383 TCGv_i64 tmp64 = tcg_temp_new_i64();
385 tcg_gen_extu_i32_i64(tmp64, b);
386 tcg_temp_free_i32(b);
387 tcg_gen_shli_i64(tmp64, tmp64, 32);
388 tcg_gen_add_i64(a, tmp64, a);
390 tcg_temp_free_i64(tmp64);
394 /* Return (b << 32) - a. Mark inputs as dead. */
395 static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv_i32 b)
397 TCGv_i64 tmp64 = tcg_temp_new_i64();
399 tcg_gen_extu_i32_i64(tmp64, b);
400 tcg_temp_free_i32(b);
401 tcg_gen_shli_i64(tmp64, tmp64, 32);
402 tcg_gen_sub_i64(a, tmp64, a);
404 tcg_temp_free_i64(tmp64);
408 /* 32x32->64 multiply. Marks inputs as dead. */
409 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
411 TCGv_i32 lo = tcg_temp_new_i32();
412 TCGv_i32 hi = tcg_temp_new_i32();
415 tcg_gen_mulu2_i32(lo, hi, a, b);
416 tcg_temp_free_i32(a);
417 tcg_temp_free_i32(b);
419 ret = tcg_temp_new_i64();
420 tcg_gen_concat_i32_i64(ret, lo, hi);
421 tcg_temp_free_i32(lo);
422 tcg_temp_free_i32(hi);
427 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
429 TCGv_i32 lo = tcg_temp_new_i32();
430 TCGv_i32 hi = tcg_temp_new_i32();
433 tcg_gen_muls2_i32(lo, hi, a, b);
434 tcg_temp_free_i32(a);
435 tcg_temp_free_i32(b);
437 ret = tcg_temp_new_i64();
438 tcg_gen_concat_i32_i64(ret, lo, hi);
439 tcg_temp_free_i32(lo);
440 tcg_temp_free_i32(hi);
445 /* Swap low and high halfwords. */
446 static void gen_swap_half(TCGv_i32 var)
448 TCGv_i32 tmp = tcg_temp_new_i32();
449 tcg_gen_shri_i32(tmp, var, 16);
450 tcg_gen_shli_i32(var, var, 16);
451 tcg_gen_or_i32(var, var, tmp);
452 tcg_temp_free_i32(tmp);
455 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
456 tmp = (t0 ^ t1) & 0x8000;
459 t0 = (t0 + t1) ^ tmp;
462 static void gen_add16(TCGv_i32 t0, TCGv_i32 t1)
464 TCGv_i32 tmp = tcg_temp_new_i32();
465 tcg_gen_xor_i32(tmp, t0, t1);
466 tcg_gen_andi_i32(tmp, tmp, 0x8000);
467 tcg_gen_andi_i32(t0, t0, ~0x8000);
468 tcg_gen_andi_i32(t1, t1, ~0x8000);
469 tcg_gen_add_i32(t0, t0, t1);
470 tcg_gen_xor_i32(t0, t0, tmp);
471 tcg_temp_free_i32(tmp);
472 tcg_temp_free_i32(t1);
475 /* Set CF to the top bit of var. */
476 static void gen_set_CF_bit31(TCGv_i32 var)
478 tcg_gen_shri_i32(cpu_CF, var, 31);
481 /* Set N and Z flags from var. */
482 static inline void gen_logic_CC(TCGv_i32 var)
484 tcg_gen_mov_i32(cpu_NF, var);
485 tcg_gen_mov_i32(cpu_ZF, var);
489 static void gen_adc(TCGv_i32 t0, TCGv_i32 t1)
491 tcg_gen_add_i32(t0, t0, t1);
492 tcg_gen_add_i32(t0, t0, cpu_CF);
495 /* dest = T0 + T1 + CF. */
496 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
498 tcg_gen_add_i32(dest, t0, t1);
499 tcg_gen_add_i32(dest, dest, cpu_CF);
502 /* dest = T0 - T1 + CF - 1. */
503 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
505 tcg_gen_sub_i32(dest, t0, t1);
506 tcg_gen_add_i32(dest, dest, cpu_CF);
507 tcg_gen_subi_i32(dest, dest, 1);
510 /* dest = T0 + T1. Compute C, N, V and Z flags */
511 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
513 TCGv_i32 tmp = tcg_temp_new_i32();
514 tcg_gen_movi_i32(tmp, 0);
515 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
516 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
517 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
518 tcg_gen_xor_i32(tmp, t0, t1);
519 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
520 tcg_temp_free_i32(tmp);
521 tcg_gen_mov_i32(dest, cpu_NF);
524 /* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
525 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
527 TCGv_i32 tmp = tcg_temp_new_i32();
528 if (TCG_TARGET_HAS_add2_i32) {
529 tcg_gen_movi_i32(tmp, 0);
530 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
531 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
533 TCGv_i64 q0 = tcg_temp_new_i64();
534 TCGv_i64 q1 = tcg_temp_new_i64();
535 tcg_gen_extu_i32_i64(q0, t0);
536 tcg_gen_extu_i32_i64(q1, t1);
537 tcg_gen_add_i64(q0, q0, q1);
538 tcg_gen_extu_i32_i64(q1, cpu_CF);
539 tcg_gen_add_i64(q0, q0, q1);
540 tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
541 tcg_temp_free_i64(q0);
542 tcg_temp_free_i64(q1);
544 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
545 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
546 tcg_gen_xor_i32(tmp, t0, t1);
547 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
548 tcg_temp_free_i32(tmp);
549 tcg_gen_mov_i32(dest, cpu_NF);
552 /* dest = T0 - T1. Compute C, N, V and Z flags */
553 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
556 tcg_gen_sub_i32(cpu_NF, t0, t1);
557 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
558 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
559 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
560 tmp = tcg_temp_new_i32();
561 tcg_gen_xor_i32(tmp, t0, t1);
562 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
563 tcg_temp_free_i32(tmp);
564 tcg_gen_mov_i32(dest, cpu_NF);
567 /* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
568 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
570 TCGv_i32 tmp = tcg_temp_new_i32();
571 tcg_gen_not_i32(tmp, t1);
572 gen_adc_CC(dest, t0, tmp);
573 tcg_temp_free_i32(tmp);
576 #define GEN_SHIFT(name) \
577 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
579 TCGv_i32 tmp1, tmp2, tmp3; \
580 tmp1 = tcg_temp_new_i32(); \
581 tcg_gen_andi_i32(tmp1, t1, 0xff); \
582 tmp2 = tcg_const_i32(0); \
583 tmp3 = tcg_const_i32(0x1f); \
584 tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
585 tcg_temp_free_i32(tmp3); \
586 tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
587 tcg_gen_##name##_i32(dest, tmp2, tmp1); \
588 tcg_temp_free_i32(tmp2); \
589 tcg_temp_free_i32(tmp1); \
595 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
598 tmp1 = tcg_temp_new_i32();
599 tcg_gen_andi_i32(tmp1, t1, 0xff);
600 tmp2 = tcg_const_i32(0x1f);
601 tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
602 tcg_temp_free_i32(tmp2);
603 tcg_gen_sar_i32(dest, t0, tmp1);
604 tcg_temp_free_i32(tmp1);
607 static void shifter_out_im(TCGv_i32 var, int shift)
610 tcg_gen_andi_i32(cpu_CF, var, 1);
612 tcg_gen_shri_i32(cpu_CF, var, shift);
614 tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
619 /* Shift by immediate. Includes special handling for shift == 0. */
620 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
621 int shift, int flags)
627 shifter_out_im(var, 32 - shift);
628 tcg_gen_shli_i32(var, var, shift);
634 tcg_gen_shri_i32(cpu_CF, var, 31);
636 tcg_gen_movi_i32(var, 0);
639 shifter_out_im(var, shift - 1);
640 tcg_gen_shri_i32(var, var, shift);
647 shifter_out_im(var, shift - 1);
650 tcg_gen_sari_i32(var, var, shift);
652 case 3: /* ROR/RRX */
655 shifter_out_im(var, shift - 1);
656 tcg_gen_rotri_i32(var, var, shift); break;
658 TCGv_i32 tmp = tcg_temp_new_i32();
659 tcg_gen_shli_i32(tmp, cpu_CF, 31);
661 shifter_out_im(var, 0);
662 tcg_gen_shri_i32(var, var, 1);
663 tcg_gen_or_i32(var, var, tmp);
664 tcg_temp_free_i32(tmp);
669 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
670 TCGv_i32 shift, int flags)
674 case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
675 case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
676 case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
677 case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
682 gen_shl(var, var, shift);
685 gen_shr(var, var, shift);
688 gen_sar(var, var, shift);
690 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
691 tcg_gen_rotr_i32(var, var, shift); break;
694 tcg_temp_free_i32(shift);
697 #define PAS_OP(pfx) \
699 case 0: gen_pas_helper(glue(pfx,add16)); break; \
700 case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
701 case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
702 case 3: gen_pas_helper(glue(pfx,sub16)); break; \
703 case 4: gen_pas_helper(glue(pfx,add8)); break; \
704 case 7: gen_pas_helper(glue(pfx,sub8)); break; \
706 static void gen_arm_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
711 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
713 tmp = tcg_temp_new_ptr();
714 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
716 tcg_temp_free_ptr(tmp);
719 tmp = tcg_temp_new_ptr();
720 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
722 tcg_temp_free_ptr(tmp);
724 #undef gen_pas_helper
725 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
738 #undef gen_pas_helper
743 /* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */
744 #define PAS_OP(pfx) \
746 case 0: gen_pas_helper(glue(pfx,add8)); break; \
747 case 1: gen_pas_helper(glue(pfx,add16)); break; \
748 case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
749 case 4: gen_pas_helper(glue(pfx,sub8)); break; \
750 case 5: gen_pas_helper(glue(pfx,sub16)); break; \
751 case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
753 static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
758 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
760 tmp = tcg_temp_new_ptr();
761 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
763 tcg_temp_free_ptr(tmp);
766 tmp = tcg_temp_new_ptr();
767 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
769 tcg_temp_free_ptr(tmp);
771 #undef gen_pas_helper
772 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
785 #undef gen_pas_helper
791 * Generate a conditional based on ARM condition code cc.
792 * This is common between ARM and Aarch64 targets.
794 void arm_test_cc(DisasCompare *cmp, int cc)
825 case 8: /* hi: C && !Z */
826 case 9: /* ls: !C || Z -> !(C && !Z) */
828 value = tcg_temp_new_i32();
830 /* CF is 1 for C, so -CF is an all-bits-set mask for C;
831 ZF is non-zero for !Z; so AND the two subexpressions. */
832 tcg_gen_neg_i32(value, cpu_CF);
833 tcg_gen_and_i32(value, value, cpu_ZF);
836 case 10: /* ge: N == V -> N ^ V == 0 */
837 case 11: /* lt: N != V -> N ^ V != 0 */
838 /* Since we're only interested in the sign bit, == 0 is >= 0. */
840 value = tcg_temp_new_i32();
842 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
845 case 12: /* gt: !Z && N == V */
846 case 13: /* le: Z || N != V */
848 value = tcg_temp_new_i32();
850 /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
851 * the sign bit then AND with ZF to yield the result. */
852 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
853 tcg_gen_sari_i32(value, value, 31);
854 tcg_gen_andc_i32(value, cpu_ZF, value);
857 case 14: /* always */
858 case 15: /* always */
859 /* Use the ALWAYS condition, which will fold early.
860 * It doesn't matter what we use for the value. */
861 cond = TCG_COND_ALWAYS;
866 fprintf(stderr, "Bad condition code 0x%x\n", cc);
871 cond = tcg_invert_cond(cond);
877 cmp->value_global = global;
880 void arm_free_cc(DisasCompare *cmp)
882 if (!cmp->value_global) {
883 tcg_temp_free_i32(cmp->value);
887 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
889 tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
892 void arm_gen_test_cc(int cc, TCGLabel *label)
895 arm_test_cc(&cmp, cc);
896 arm_jump_cc(&cmp, label);
900 static const uint8_t table_logic_cc[16] = {
919 static inline void gen_set_condexec(DisasContext *s)
921 if (s->condexec_mask) {
922 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
923 TCGv_i32 tmp = tcg_temp_new_i32();
924 tcg_gen_movi_i32(tmp, val);
925 store_cpu_field(tmp, condexec_bits);
929 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
931 tcg_gen_movi_i32(cpu_R[15], val);
934 /* Set PC and Thumb state from an immediate address. */
935 static inline void gen_bx_im(DisasContext *s, uint32_t addr)
939 s->base.is_jmp = DISAS_JUMP;
940 if (s->thumb != (addr & 1)) {
941 tmp = tcg_temp_new_i32();
942 tcg_gen_movi_i32(tmp, addr & 1);
943 tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
944 tcg_temp_free_i32(tmp);
946 tcg_gen_movi_i32(cpu_R[15], addr & ~1);
949 /* Set PC and Thumb state from var. var is marked as dead. */
950 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
952 s->base.is_jmp = DISAS_JUMP;
953 tcg_gen_andi_i32(cpu_R[15], var, ~1);
954 tcg_gen_andi_i32(var, var, 1);
955 store_cpu_field(var, thumb);
958 /* Set PC and Thumb state from var. var is marked as dead.
959 * For M-profile CPUs, include logic to detect exception-return
960 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
961 * and BX reg, and no others, and happens only for code in Handler mode.
963 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
965 /* Generate the same code here as for a simple bx, but flag via
966 * s->base.is_jmp that we need to do the rest of the work later.
969 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
970 (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
971 s->base.is_jmp = DISAS_BX_EXCRET;
975 static inline void gen_bx_excret_final_code(DisasContext *s)
977 /* Generate the code to finish possible exception return and end the TB */
978 TCGLabel *excret_label = gen_new_label();
981 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
982 /* Covers FNC_RETURN and EXC_RETURN magic */
983 min_magic = FNC_RETURN_MIN_MAGIC;
985 /* EXC_RETURN magic only */
986 min_magic = EXC_RETURN_MIN_MAGIC;
989 /* Is the new PC value in the magic range indicating exception return? */
990 tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
991 /* No: end the TB as we would for a DISAS_JMP */
992 if (is_singlestepping(s)) {
993 gen_singlestep_exception(s);
995 tcg_gen_exit_tb(NULL, 0);
997 gen_set_label(excret_label);
998 /* Yes: this is an exception return.
999 * At this point in runtime env->regs[15] and env->thumb will hold
1000 * the exception-return magic number, which do_v7m_exception_exit()
1001 * will read. Nothing else will be able to see those values because
1002 * the cpu-exec main loop guarantees that we will always go straight
1003 * from raising the exception to the exception-handling code.
1005 * gen_ss_advance(s) does nothing on M profile currently but
1006 * calling it is conceptually the right thing as we have executed
1007 * this instruction (compare SWI, HVC, SMC handling).
1010 gen_exception_internal(EXCP_EXCEPTION_EXIT);
1013 static inline void gen_bxns(DisasContext *s, int rm)
1015 TCGv_i32 var = load_reg(s, rm);
1017 /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
1018 * we need to sync state before calling it, but:
1019 * - we don't need to do gen_set_pc_im() because the bxns helper will
1020 * always set the PC itself
1021 * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
1022 * unless it's outside an IT block or the last insn in an IT block,
1023 * so we know that condexec == 0 (already set at the top of the TB)
1024 * is correct in the non-UNPREDICTABLE cases, and we can choose
1025 * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
1027 gen_helper_v7m_bxns(cpu_env, var);
1028 tcg_temp_free_i32(var);
1029 s->base.is_jmp = DISAS_EXIT;
1032 static inline void gen_blxns(DisasContext *s, int rm)
1034 TCGv_i32 var = load_reg(s, rm);
1036 /* We don't need to sync condexec state, for the same reason as bxns.
1037 * We do however need to set the PC, because the blxns helper reads it.
1038 * The blxns helper may throw an exception.
1040 gen_set_pc_im(s, s->pc);
1041 gen_helper_v7m_blxns(cpu_env, var);
1042 tcg_temp_free_i32(var);
1043 s->base.is_jmp = DISAS_EXIT;
1046 /* Variant of store_reg which uses branch&exchange logic when storing
1047 to r15 in ARM architecture v7 and above. The source must be a temporary
1048 and will be marked as dead. */
1049 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
1051 if (reg == 15 && ENABLE_ARCH_7) {
1054 store_reg(s, reg, var);
1058 /* Variant of store_reg which uses branch&exchange logic when storing
1059 * to r15 in ARM architecture v5T and above. This is used for storing
1060 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
1061 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
1062 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
1064 if (reg == 15 && ENABLE_ARCH_5) {
1065 gen_bx_excret(s, var);
1067 store_reg(s, reg, var);
1071 #ifdef CONFIG_USER_ONLY
1072 #define IS_USER_ONLY 1
1074 #define IS_USER_ONLY 0
1077 /* Abstractions of "generate code to do a guest load/store for
1078 * AArch32", where a vaddr is always 32 bits (and is zero
1079 * extended if we're a 64 bit core) and data is also
1080 * 32 bits unless specifically doing a 64 bit access.
1081 * These functions work like tcg_gen_qemu_{ld,st}* except
1082 * that the address argument is TCGv_i32 rather than TCGv.
1085 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, TCGMemOp op)
1087 TCGv addr = tcg_temp_new();
1088 tcg_gen_extu_i32_tl(addr, a32);
1090 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1091 if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
1092 tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
1097 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1098 int index, TCGMemOp opc)
1102 if (arm_dc_feature(s, ARM_FEATURE_M) &&
1103 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
1107 addr = gen_aa32_addr(s, a32, opc);
1108 tcg_gen_qemu_ld_i32(val, addr, index, opc);
1109 tcg_temp_free(addr);
1112 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1113 int index, TCGMemOp opc)
1117 if (arm_dc_feature(s, ARM_FEATURE_M) &&
1118 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
1122 addr = gen_aa32_addr(s, a32, opc);
1123 tcg_gen_qemu_st_i32(val, addr, index, opc);
1124 tcg_temp_free(addr);
1127 #define DO_GEN_LD(SUFF, OPC) \
1128 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1129 TCGv_i32 a32, int index) \
1131 gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
1133 static inline void gen_aa32_ld##SUFF##_iss(DisasContext *s, \
1135 TCGv_i32 a32, int index, \
1138 gen_aa32_ld##SUFF(s, val, a32, index); \
1139 disas_set_da_iss(s, OPC, issinfo); \
1142 #define DO_GEN_ST(SUFF, OPC) \
1143 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1144 TCGv_i32 a32, int index) \
1146 gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
1148 static inline void gen_aa32_st##SUFF##_iss(DisasContext *s, \
1150 TCGv_i32 a32, int index, \
1153 gen_aa32_st##SUFF(s, val, a32, index); \
1154 disas_set_da_iss(s, OPC, issinfo | ISSIsWrite); \
1157 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
1159 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1160 if (!IS_USER_ONLY && s->sctlr_b) {
1161 tcg_gen_rotri_i64(val, val, 32);
1165 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1166 int index, TCGMemOp opc)
1168 TCGv addr = gen_aa32_addr(s, a32, opc);
1169 tcg_gen_qemu_ld_i64(val, addr, index, opc);
1170 gen_aa32_frob64(s, val);
1171 tcg_temp_free(addr);
1174 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
1175 TCGv_i32 a32, int index)
1177 gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1180 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1181 int index, TCGMemOp opc)
1183 TCGv addr = gen_aa32_addr(s, a32, opc);
1185 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1186 if (!IS_USER_ONLY && s->sctlr_b) {
1187 TCGv_i64 tmp = tcg_temp_new_i64();
1188 tcg_gen_rotri_i64(tmp, val, 32);
1189 tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1190 tcg_temp_free_i64(tmp);
1192 tcg_gen_qemu_st_i64(val, addr, index, opc);
1194 tcg_temp_free(addr);
1197 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1198 TCGv_i32 a32, int index)
1200 gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1203 DO_GEN_LD(8s, MO_SB)
1204 DO_GEN_LD(8u, MO_UB)
1205 DO_GEN_LD(16s, MO_SW)
1206 DO_GEN_LD(16u, MO_UW)
1207 DO_GEN_LD(32u, MO_UL)
1209 DO_GEN_ST(16, MO_UW)
1210 DO_GEN_ST(32, MO_UL)
1212 static inline void gen_hvc(DisasContext *s, int imm16)
1214 /* The pre HVC helper handles cases when HVC gets trapped
1215 * as an undefined insn by runtime configuration (ie before
1216 * the insn really executes).
1218 gen_set_pc_im(s, s->pc - 4);
1219 gen_helper_pre_hvc(cpu_env);
1220 /* Otherwise we will treat this as a real exception which
1221 * happens after execution of the insn. (The distinction matters
1222 * for the PC value reported to the exception handler and also
1223 * for single stepping.)
1226 gen_set_pc_im(s, s->pc);
1227 s->base.is_jmp = DISAS_HVC;
1230 static inline void gen_smc(DisasContext *s)
1232 /* As with HVC, we may take an exception either before or after
1233 * the insn executes.
1237 gen_set_pc_im(s, s->pc - 4);
1238 tmp = tcg_const_i32(syn_aa32_smc());
1239 gen_helper_pre_smc(cpu_env, tmp);
1240 tcg_temp_free_i32(tmp);
1241 gen_set_pc_im(s, s->pc);
1242 s->base.is_jmp = DISAS_SMC;
1245 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
1247 gen_set_condexec(s);
1248 gen_set_pc_im(s, s->pc - offset);
1249 gen_exception_internal(excp);
1250 s->base.is_jmp = DISAS_NORETURN;
1253 static void gen_exception_insn(DisasContext *s, int offset, int excp,
1254 int syn, uint32_t target_el)
1256 gen_set_condexec(s);
1257 gen_set_pc_im(s, s->pc - offset);
1258 gen_exception(excp, syn, target_el);
1259 s->base.is_jmp = DISAS_NORETURN;
1262 static void gen_exception_bkpt_insn(DisasContext *s, int offset, uint32_t syn)
1266 gen_set_condexec(s);
1267 gen_set_pc_im(s, s->pc - offset);
1268 tcg_syn = tcg_const_i32(syn);
1269 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1270 tcg_temp_free_i32(tcg_syn);
1271 s->base.is_jmp = DISAS_NORETURN;
1274 /* Force a TB lookup after an instruction that changes the CPU state. */
1275 static inline void gen_lookup_tb(DisasContext *s)
1277 tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
1278 s->base.is_jmp = DISAS_EXIT;
1281 static inline void gen_hlt(DisasContext *s, int imm)
1283 /* HLT. This has two purposes.
1284 * Architecturally, it is an external halting debug instruction.
1285 * Since QEMU doesn't implement external debug, we treat this as
1286 * it is required for halting debug disabled: it will UNDEF.
1287 * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1288 * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1289 * must trigger semihosting even for ARMv7 and earlier, where
1290 * HLT was an undefined encoding.
1291 * In system mode, we don't allow userspace access to
1292 * semihosting, to provide some semblance of security
1293 * (and for consistency with our 32-bit semihosting).
1295 if (semihosting_enabled() &&
1296 #ifndef CONFIG_USER_ONLY
1297 s->current_el != 0 &&
1299 (imm == (s->thumb ? 0x3c : 0xf000))) {
1300 gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1304 gen_exception_insn(s, s->thumb ? 2 : 4, EXCP_UDEF, syn_uncategorized(),
1305 default_exception_el(s));
1308 static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
1311 int val, rm, shift, shiftop;
1314 if (!(insn & (1 << 25))) {
1317 if (!(insn & (1 << 23)))
1320 tcg_gen_addi_i32(var, var, val);
1322 /* shift/register */
1324 shift = (insn >> 7) & 0x1f;
1325 shiftop = (insn >> 5) & 3;
1326 offset = load_reg(s, rm);
1327 gen_arm_shift_im(offset, shiftop, shift, 0);
1328 if (!(insn & (1 << 23)))
1329 tcg_gen_sub_i32(var, var, offset);
1331 tcg_gen_add_i32(var, var, offset);
1332 tcg_temp_free_i32(offset);
1336 static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
1337 int extra, TCGv_i32 var)
1342 if (insn & (1 << 22)) {
1344 val = (insn & 0xf) | ((insn >> 4) & 0xf0);
1345 if (!(insn & (1 << 23)))
1349 tcg_gen_addi_i32(var, var, val);
1353 tcg_gen_addi_i32(var, var, extra);
1355 offset = load_reg(s, rm);
1356 if (!(insn & (1 << 23)))
1357 tcg_gen_sub_i32(var, var, offset);
1359 tcg_gen_add_i32(var, var, offset);
1360 tcg_temp_free_i32(offset);
1364 static TCGv_ptr get_fpstatus_ptr(int neon)
1366 TCGv_ptr statusptr = tcg_temp_new_ptr();
1369 offset = offsetof(CPUARMState, vfp.standard_fp_status);
1371 offset = offsetof(CPUARMState, vfp.fp_status);
1373 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1377 #define VFP_GEN_ITOF(name) \
1378 static inline void gen_vfp_##name(int dp, int neon) \
1380 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1382 gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
1384 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1386 tcg_temp_free_ptr(statusptr); \
1393 #define VFP_GEN_FTOI(name) \
1394 static inline void gen_vfp_##name(int dp, int neon) \
1396 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1398 gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
1400 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1402 tcg_temp_free_ptr(statusptr); \
1409 #define VFP_GEN_FIX(name, round) \
1410 static inline void gen_vfp_##name(int dp, int shift, int neon) \
1412 TCGv_i32 tmp_shift = tcg_const_i32(shift); \
1413 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1415 gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \
1418 gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \
1421 tcg_temp_free_i32(tmp_shift); \
1422 tcg_temp_free_ptr(statusptr); \
1424 VFP_GEN_FIX(tosl, _round_to_zero)
1425 VFP_GEN_FIX(toul, _round_to_zero)
1430 static inline long vfp_reg_offset(bool dp, unsigned reg)
1433 return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1435 long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1437 ofs += offsetof(CPU_DoubleU, l.upper);
1439 ofs += offsetof(CPU_DoubleU, l.lower);
1445 /* Return the offset of a 32-bit piece of a NEON register.
1446 zero is the least significant end of the register. */
1448 neon_reg_offset (int reg, int n)
1452 return vfp_reg_offset(0, sreg);
1455 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1456 * where 0 is the least significant end of the register.
1459 neon_element_offset(int reg, int element, TCGMemOp size)
1461 int element_size = 1 << size;
1462 int ofs = element * element_size;
1463 #ifdef HOST_WORDS_BIGENDIAN
1464 /* Calculate the offset assuming fully little-endian,
1465 * then XOR to account for the order of the 8-byte units.
1467 if (element_size < 8) {
1468 ofs ^= 8 - element_size;
1471 return neon_reg_offset(reg, 0) + ofs;
1474 static TCGv_i32 neon_load_reg(int reg, int pass)
1476 TCGv_i32 tmp = tcg_temp_new_i32();
1477 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1481 static void neon_load_element(TCGv_i32 var, int reg, int ele, TCGMemOp mop)
1483 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1487 tcg_gen_ld8u_i32(var, cpu_env, offset);
1490 tcg_gen_ld16u_i32(var, cpu_env, offset);
1493 tcg_gen_ld_i32(var, cpu_env, offset);
1496 g_assert_not_reached();
1500 static void neon_load_element64(TCGv_i64 var, int reg, int ele, TCGMemOp mop)
1502 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1506 tcg_gen_ld8u_i64(var, cpu_env, offset);
1509 tcg_gen_ld16u_i64(var, cpu_env, offset);
1512 tcg_gen_ld32u_i64(var, cpu_env, offset);
1515 tcg_gen_ld_i64(var, cpu_env, offset);
1518 g_assert_not_reached();
1522 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1524 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1525 tcg_temp_free_i32(var);
1528 static void neon_store_element(int reg, int ele, TCGMemOp size, TCGv_i32 var)
1530 long offset = neon_element_offset(reg, ele, size);
1534 tcg_gen_st8_i32(var, cpu_env, offset);
1537 tcg_gen_st16_i32(var, cpu_env, offset);
1540 tcg_gen_st_i32(var, cpu_env, offset);
1543 g_assert_not_reached();
1547 static void neon_store_element64(int reg, int ele, TCGMemOp size, TCGv_i64 var)
1549 long offset = neon_element_offset(reg, ele, size);
1553 tcg_gen_st8_i64(var, cpu_env, offset);
1556 tcg_gen_st16_i64(var, cpu_env, offset);
1559 tcg_gen_st32_i64(var, cpu_env, offset);
1562 tcg_gen_st_i64(var, cpu_env, offset);
1565 g_assert_not_reached();
1569 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1571 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1574 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1576 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1579 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1581 tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1584 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1586 tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1589 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1591 TCGv_ptr ret = tcg_temp_new_ptr();
1592 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1596 #define tcg_gen_ld_f32 tcg_gen_ld_i32
1597 #define tcg_gen_st_f32 tcg_gen_st_i32
1599 #define ARM_CP_RW_BIT (1 << 20)
1601 /* Include the VFP decoder */
1602 #include "translate-vfp.inc.c"
1604 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1606 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1609 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1611 tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1614 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1616 TCGv_i32 var = tcg_temp_new_i32();
1617 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1621 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1623 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1624 tcg_temp_free_i32(var);
1627 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1629 iwmmxt_store_reg(cpu_M0, rn);
1632 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1634 iwmmxt_load_reg(cpu_M0, rn);
1637 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1639 iwmmxt_load_reg(cpu_V1, rn);
1640 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1643 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1645 iwmmxt_load_reg(cpu_V1, rn);
1646 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1649 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1651 iwmmxt_load_reg(cpu_V1, rn);
1652 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1655 #define IWMMXT_OP(name) \
1656 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1658 iwmmxt_load_reg(cpu_V1, rn); \
1659 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1662 #define IWMMXT_OP_ENV(name) \
1663 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1665 iwmmxt_load_reg(cpu_V1, rn); \
1666 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1669 #define IWMMXT_OP_ENV_SIZE(name) \
1670 IWMMXT_OP_ENV(name##b) \
1671 IWMMXT_OP_ENV(name##w) \
1672 IWMMXT_OP_ENV(name##l)
1674 #define IWMMXT_OP_ENV1(name) \
1675 static inline void gen_op_iwmmxt_##name##_M0(void) \
1677 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1691 IWMMXT_OP_ENV_SIZE(unpackl)
1692 IWMMXT_OP_ENV_SIZE(unpackh)
1694 IWMMXT_OP_ENV1(unpacklub)
1695 IWMMXT_OP_ENV1(unpackluw)
1696 IWMMXT_OP_ENV1(unpacklul)
1697 IWMMXT_OP_ENV1(unpackhub)
1698 IWMMXT_OP_ENV1(unpackhuw)
1699 IWMMXT_OP_ENV1(unpackhul)
1700 IWMMXT_OP_ENV1(unpacklsb)
1701 IWMMXT_OP_ENV1(unpacklsw)
1702 IWMMXT_OP_ENV1(unpacklsl)
1703 IWMMXT_OP_ENV1(unpackhsb)
1704 IWMMXT_OP_ENV1(unpackhsw)
1705 IWMMXT_OP_ENV1(unpackhsl)
1707 IWMMXT_OP_ENV_SIZE(cmpeq)
1708 IWMMXT_OP_ENV_SIZE(cmpgtu)
1709 IWMMXT_OP_ENV_SIZE(cmpgts)
1711 IWMMXT_OP_ENV_SIZE(mins)
1712 IWMMXT_OP_ENV_SIZE(minu)
1713 IWMMXT_OP_ENV_SIZE(maxs)
1714 IWMMXT_OP_ENV_SIZE(maxu)
1716 IWMMXT_OP_ENV_SIZE(subn)
1717 IWMMXT_OP_ENV_SIZE(addn)
1718 IWMMXT_OP_ENV_SIZE(subu)
1719 IWMMXT_OP_ENV_SIZE(addu)
1720 IWMMXT_OP_ENV_SIZE(subs)
1721 IWMMXT_OP_ENV_SIZE(adds)
1723 IWMMXT_OP_ENV(avgb0)
1724 IWMMXT_OP_ENV(avgb1)
1725 IWMMXT_OP_ENV(avgw0)
1726 IWMMXT_OP_ENV(avgw1)
1728 IWMMXT_OP_ENV(packuw)
1729 IWMMXT_OP_ENV(packul)
1730 IWMMXT_OP_ENV(packuq)
1731 IWMMXT_OP_ENV(packsw)
1732 IWMMXT_OP_ENV(packsl)
1733 IWMMXT_OP_ENV(packsq)
1735 static void gen_op_iwmmxt_set_mup(void)
1738 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1739 tcg_gen_ori_i32(tmp, tmp, 2);
1740 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1743 static void gen_op_iwmmxt_set_cup(void)
1746 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1747 tcg_gen_ori_i32(tmp, tmp, 1);
1748 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1751 static void gen_op_iwmmxt_setpsr_nz(void)
1753 TCGv_i32 tmp = tcg_temp_new_i32();
1754 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1755 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1758 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1760 iwmmxt_load_reg(cpu_V1, rn);
1761 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1762 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1765 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1772 rd = (insn >> 16) & 0xf;
1773 tmp = load_reg(s, rd);
1775 offset = (insn & 0xff) << ((insn >> 7) & 2);
1776 if (insn & (1 << 24)) {
1778 if (insn & (1 << 23))
1779 tcg_gen_addi_i32(tmp, tmp, offset);
1781 tcg_gen_addi_i32(tmp, tmp, -offset);
1782 tcg_gen_mov_i32(dest, tmp);
1783 if (insn & (1 << 21))
1784 store_reg(s, rd, tmp);
1786 tcg_temp_free_i32(tmp);
1787 } else if (insn & (1 << 21)) {
1789 tcg_gen_mov_i32(dest, tmp);
1790 if (insn & (1 << 23))
1791 tcg_gen_addi_i32(tmp, tmp, offset);
1793 tcg_gen_addi_i32(tmp, tmp, -offset);
1794 store_reg(s, rd, tmp);
1795 } else if (!(insn & (1 << 23)))
1800 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1802 int rd = (insn >> 0) & 0xf;
1805 if (insn & (1 << 8)) {
1806 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1809 tmp = iwmmxt_load_creg(rd);
1812 tmp = tcg_temp_new_i32();
1813 iwmmxt_load_reg(cpu_V0, rd);
1814 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1816 tcg_gen_andi_i32(tmp, tmp, mask);
1817 tcg_gen_mov_i32(dest, tmp);
1818 tcg_temp_free_i32(tmp);
1822 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1823 (ie. an undefined instruction). */
1824 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1827 int rdhi, rdlo, rd0, rd1, i;
1829 TCGv_i32 tmp, tmp2, tmp3;
1831 if ((insn & 0x0e000e00) == 0x0c000000) {
1832 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1834 rdlo = (insn >> 12) & 0xf;
1835 rdhi = (insn >> 16) & 0xf;
1836 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1837 iwmmxt_load_reg(cpu_V0, wrd);
1838 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1839 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1840 tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
1841 } else { /* TMCRR */
1842 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1843 iwmmxt_store_reg(cpu_V0, wrd);
1844 gen_op_iwmmxt_set_mup();
1849 wrd = (insn >> 12) & 0xf;
1850 addr = tcg_temp_new_i32();
1851 if (gen_iwmmxt_address(s, insn, addr)) {
1852 tcg_temp_free_i32(addr);
1855 if (insn & ARM_CP_RW_BIT) {
1856 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1857 tmp = tcg_temp_new_i32();
1858 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1859 iwmmxt_store_creg(wrd, tmp);
1862 if (insn & (1 << 8)) {
1863 if (insn & (1 << 22)) { /* WLDRD */
1864 gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1866 } else { /* WLDRW wRd */
1867 tmp = tcg_temp_new_i32();
1868 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1871 tmp = tcg_temp_new_i32();
1872 if (insn & (1 << 22)) { /* WLDRH */
1873 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1874 } else { /* WLDRB */
1875 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1879 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1880 tcg_temp_free_i32(tmp);
1882 gen_op_iwmmxt_movq_wRn_M0(wrd);
1885 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1886 tmp = iwmmxt_load_creg(wrd);
1887 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1889 gen_op_iwmmxt_movq_M0_wRn(wrd);
1890 tmp = tcg_temp_new_i32();
1891 if (insn & (1 << 8)) {
1892 if (insn & (1 << 22)) { /* WSTRD */
1893 gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1894 } else { /* WSTRW wRd */
1895 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1896 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1899 if (insn & (1 << 22)) { /* WSTRH */
1900 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1901 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1902 } else { /* WSTRB */
1903 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1904 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1908 tcg_temp_free_i32(tmp);
1910 tcg_temp_free_i32(addr);
1914 if ((insn & 0x0f000000) != 0x0e000000)
1917 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1918 case 0x000: /* WOR */
1919 wrd = (insn >> 12) & 0xf;
1920 rd0 = (insn >> 0) & 0xf;
1921 rd1 = (insn >> 16) & 0xf;
1922 gen_op_iwmmxt_movq_M0_wRn(rd0);
1923 gen_op_iwmmxt_orq_M0_wRn(rd1);
1924 gen_op_iwmmxt_setpsr_nz();
1925 gen_op_iwmmxt_movq_wRn_M0(wrd);
1926 gen_op_iwmmxt_set_mup();
1927 gen_op_iwmmxt_set_cup();
1929 case 0x011: /* TMCR */
1932 rd = (insn >> 12) & 0xf;
1933 wrd = (insn >> 16) & 0xf;
1935 case ARM_IWMMXT_wCID:
1936 case ARM_IWMMXT_wCASF:
1938 case ARM_IWMMXT_wCon:
1939 gen_op_iwmmxt_set_cup();
1941 case ARM_IWMMXT_wCSSF:
1942 tmp = iwmmxt_load_creg(wrd);
1943 tmp2 = load_reg(s, rd);
1944 tcg_gen_andc_i32(tmp, tmp, tmp2);
1945 tcg_temp_free_i32(tmp2);
1946 iwmmxt_store_creg(wrd, tmp);
1948 case ARM_IWMMXT_wCGR0:
1949 case ARM_IWMMXT_wCGR1:
1950 case ARM_IWMMXT_wCGR2:
1951 case ARM_IWMMXT_wCGR3:
1952 gen_op_iwmmxt_set_cup();
1953 tmp = load_reg(s, rd);
1954 iwmmxt_store_creg(wrd, tmp);
1960 case 0x100: /* WXOR */
1961 wrd = (insn >> 12) & 0xf;
1962 rd0 = (insn >> 0) & 0xf;
1963 rd1 = (insn >> 16) & 0xf;
1964 gen_op_iwmmxt_movq_M0_wRn(rd0);
1965 gen_op_iwmmxt_xorq_M0_wRn(rd1);
1966 gen_op_iwmmxt_setpsr_nz();
1967 gen_op_iwmmxt_movq_wRn_M0(wrd);
1968 gen_op_iwmmxt_set_mup();
1969 gen_op_iwmmxt_set_cup();
1971 case 0x111: /* TMRC */
1974 rd = (insn >> 12) & 0xf;
1975 wrd = (insn >> 16) & 0xf;
1976 tmp = iwmmxt_load_creg(wrd);
1977 store_reg(s, rd, tmp);
1979 case 0x300: /* WANDN */
1980 wrd = (insn >> 12) & 0xf;
1981 rd0 = (insn >> 0) & 0xf;
1982 rd1 = (insn >> 16) & 0xf;
1983 gen_op_iwmmxt_movq_M0_wRn(rd0);
1984 tcg_gen_neg_i64(cpu_M0, cpu_M0);
1985 gen_op_iwmmxt_andq_M0_wRn(rd1);
1986 gen_op_iwmmxt_setpsr_nz();
1987 gen_op_iwmmxt_movq_wRn_M0(wrd);
1988 gen_op_iwmmxt_set_mup();
1989 gen_op_iwmmxt_set_cup();
1991 case 0x200: /* WAND */
1992 wrd = (insn >> 12) & 0xf;
1993 rd0 = (insn >> 0) & 0xf;
1994 rd1 = (insn >> 16) & 0xf;
1995 gen_op_iwmmxt_movq_M0_wRn(rd0);
1996 gen_op_iwmmxt_andq_M0_wRn(rd1);
1997 gen_op_iwmmxt_setpsr_nz();
1998 gen_op_iwmmxt_movq_wRn_M0(wrd);
1999 gen_op_iwmmxt_set_mup();
2000 gen_op_iwmmxt_set_cup();
2002 case 0x810: case 0xa10: /* WMADD */
2003 wrd = (insn >> 12) & 0xf;
2004 rd0 = (insn >> 0) & 0xf;
2005 rd1 = (insn >> 16) & 0xf;
2006 gen_op_iwmmxt_movq_M0_wRn(rd0);
2007 if (insn & (1 << 21))
2008 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
2010 gen_op_iwmmxt_madduq_M0_wRn(rd1);
2011 gen_op_iwmmxt_movq_wRn_M0(wrd);
2012 gen_op_iwmmxt_set_mup();
2014 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
2015 wrd = (insn >> 12) & 0xf;
2016 rd0 = (insn >> 16) & 0xf;
2017 rd1 = (insn >> 0) & 0xf;
2018 gen_op_iwmmxt_movq_M0_wRn(rd0);
2019 switch ((insn >> 22) & 3) {
2021 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
2024 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
2027 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
2032 gen_op_iwmmxt_movq_wRn_M0(wrd);
2033 gen_op_iwmmxt_set_mup();
2034 gen_op_iwmmxt_set_cup();
2036 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
2037 wrd = (insn >> 12) & 0xf;
2038 rd0 = (insn >> 16) & 0xf;
2039 rd1 = (insn >> 0) & 0xf;
2040 gen_op_iwmmxt_movq_M0_wRn(rd0);
2041 switch ((insn >> 22) & 3) {
2043 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
2046 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
2049 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
2054 gen_op_iwmmxt_movq_wRn_M0(wrd);
2055 gen_op_iwmmxt_set_mup();
2056 gen_op_iwmmxt_set_cup();
2058 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
2059 wrd = (insn >> 12) & 0xf;
2060 rd0 = (insn >> 16) & 0xf;
2061 rd1 = (insn >> 0) & 0xf;
2062 gen_op_iwmmxt_movq_M0_wRn(rd0);
2063 if (insn & (1 << 22))
2064 gen_op_iwmmxt_sadw_M0_wRn(rd1);
2066 gen_op_iwmmxt_sadb_M0_wRn(rd1);
2067 if (!(insn & (1 << 20)))
2068 gen_op_iwmmxt_addl_M0_wRn(wrd);
2069 gen_op_iwmmxt_movq_wRn_M0(wrd);
2070 gen_op_iwmmxt_set_mup();
2072 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
2073 wrd = (insn >> 12) & 0xf;
2074 rd0 = (insn >> 16) & 0xf;
2075 rd1 = (insn >> 0) & 0xf;
2076 gen_op_iwmmxt_movq_M0_wRn(rd0);
2077 if (insn & (1 << 21)) {
2078 if (insn & (1 << 20))
2079 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
2081 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
2083 if (insn & (1 << 20))
2084 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
2086 gen_op_iwmmxt_mululw_M0_wRn(rd1);
2088 gen_op_iwmmxt_movq_wRn_M0(wrd);
2089 gen_op_iwmmxt_set_mup();
2091 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
2092 wrd = (insn >> 12) & 0xf;
2093 rd0 = (insn >> 16) & 0xf;
2094 rd1 = (insn >> 0) & 0xf;
2095 gen_op_iwmmxt_movq_M0_wRn(rd0);
2096 if (insn & (1 << 21))
2097 gen_op_iwmmxt_macsw_M0_wRn(rd1);
2099 gen_op_iwmmxt_macuw_M0_wRn(rd1);
2100 if (!(insn & (1 << 20))) {
2101 iwmmxt_load_reg(cpu_V1, wrd);
2102 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
2104 gen_op_iwmmxt_movq_wRn_M0(wrd);
2105 gen_op_iwmmxt_set_mup();
2107 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
2108 wrd = (insn >> 12) & 0xf;
2109 rd0 = (insn >> 16) & 0xf;
2110 rd1 = (insn >> 0) & 0xf;
2111 gen_op_iwmmxt_movq_M0_wRn(rd0);
2112 switch ((insn >> 22) & 3) {
2114 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
2117 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
2120 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
2125 gen_op_iwmmxt_movq_wRn_M0(wrd);
2126 gen_op_iwmmxt_set_mup();
2127 gen_op_iwmmxt_set_cup();
2129 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
2130 wrd = (insn >> 12) & 0xf;
2131 rd0 = (insn >> 16) & 0xf;
2132 rd1 = (insn >> 0) & 0xf;
2133 gen_op_iwmmxt_movq_M0_wRn(rd0);
2134 if (insn & (1 << 22)) {
2135 if (insn & (1 << 20))
2136 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
2138 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
2140 if (insn & (1 << 20))
2141 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
2143 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
2145 gen_op_iwmmxt_movq_wRn_M0(wrd);
2146 gen_op_iwmmxt_set_mup();
2147 gen_op_iwmmxt_set_cup();
2149 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
2150 wrd = (insn >> 12) & 0xf;
2151 rd0 = (insn >> 16) & 0xf;
2152 rd1 = (insn >> 0) & 0xf;
2153 gen_op_iwmmxt_movq_M0_wRn(rd0);
2154 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
2155 tcg_gen_andi_i32(tmp, tmp, 7);
2156 iwmmxt_load_reg(cpu_V1, rd1);
2157 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2158 tcg_temp_free_i32(tmp);
2159 gen_op_iwmmxt_movq_wRn_M0(wrd);
2160 gen_op_iwmmxt_set_mup();
2162 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
2163 if (((insn >> 6) & 3) == 3)
2165 rd = (insn >> 12) & 0xf;
2166 wrd = (insn >> 16) & 0xf;
2167 tmp = load_reg(s, rd);
2168 gen_op_iwmmxt_movq_M0_wRn(wrd);
2169 switch ((insn >> 6) & 3) {
2171 tmp2 = tcg_const_i32(0xff);
2172 tmp3 = tcg_const_i32((insn & 7) << 3);
2175 tmp2 = tcg_const_i32(0xffff);
2176 tmp3 = tcg_const_i32((insn & 3) << 4);
2179 tmp2 = tcg_const_i32(0xffffffff);
2180 tmp3 = tcg_const_i32((insn & 1) << 5);
2186 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
2187 tcg_temp_free_i32(tmp3);
2188 tcg_temp_free_i32(tmp2);
2189 tcg_temp_free_i32(tmp);
2190 gen_op_iwmmxt_movq_wRn_M0(wrd);
2191 gen_op_iwmmxt_set_mup();
2193 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
2194 rd = (insn >> 12) & 0xf;
2195 wrd = (insn >> 16) & 0xf;
2196 if (rd == 15 || ((insn >> 22) & 3) == 3)
2198 gen_op_iwmmxt_movq_M0_wRn(wrd);
2199 tmp = tcg_temp_new_i32();
2200 switch ((insn >> 22) & 3) {
2202 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
2203 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2205 tcg_gen_ext8s_i32(tmp, tmp);
2207 tcg_gen_andi_i32(tmp, tmp, 0xff);
2211 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
2212 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2214 tcg_gen_ext16s_i32(tmp, tmp);
2216 tcg_gen_andi_i32(tmp, tmp, 0xffff);
2220 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
2221 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2224 store_reg(s, rd, tmp);
2226 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
2227 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2229 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2230 switch ((insn >> 22) & 3) {
2232 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
2235 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
2238 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
2241 tcg_gen_shli_i32(tmp, tmp, 28);
2243 tcg_temp_free_i32(tmp);
2245 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
2246 if (((insn >> 6) & 3) == 3)
2248 rd = (insn >> 12) & 0xf;
2249 wrd = (insn >> 16) & 0xf;
2250 tmp = load_reg(s, rd);
2251 switch ((insn >> 6) & 3) {
2253 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
2256 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
2259 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
2262 tcg_temp_free_i32(tmp);
2263 gen_op_iwmmxt_movq_wRn_M0(wrd);
2264 gen_op_iwmmxt_set_mup();
2266 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
2267 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2269 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2270 tmp2 = tcg_temp_new_i32();
2271 tcg_gen_mov_i32(tmp2, tmp);
2272 switch ((insn >> 22) & 3) {
2274 for (i = 0; i < 7; i ++) {
2275 tcg_gen_shli_i32(tmp2, tmp2, 4);
2276 tcg_gen_and_i32(tmp, tmp, tmp2);
2280 for (i = 0; i < 3; i ++) {
2281 tcg_gen_shli_i32(tmp2, tmp2, 8);
2282 tcg_gen_and_i32(tmp, tmp, tmp2);
2286 tcg_gen_shli_i32(tmp2, tmp2, 16);
2287 tcg_gen_and_i32(tmp, tmp, tmp2);
2291 tcg_temp_free_i32(tmp2);
2292 tcg_temp_free_i32(tmp);
2294 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
2295 wrd = (insn >> 12) & 0xf;
2296 rd0 = (insn >> 16) & 0xf;
2297 gen_op_iwmmxt_movq_M0_wRn(rd0);
2298 switch ((insn >> 22) & 3) {
2300 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2303 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2306 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2311 gen_op_iwmmxt_movq_wRn_M0(wrd);
2312 gen_op_iwmmxt_set_mup();
2314 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
2315 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2317 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2318 tmp2 = tcg_temp_new_i32();
2319 tcg_gen_mov_i32(tmp2, tmp);
2320 switch ((insn >> 22) & 3) {
2322 for (i = 0; i < 7; i ++) {
2323 tcg_gen_shli_i32(tmp2, tmp2, 4);
2324 tcg_gen_or_i32(tmp, tmp, tmp2);
2328 for (i = 0; i < 3; i ++) {
2329 tcg_gen_shli_i32(tmp2, tmp2, 8);
2330 tcg_gen_or_i32(tmp, tmp, tmp2);
2334 tcg_gen_shli_i32(tmp2, tmp2, 16);
2335 tcg_gen_or_i32(tmp, tmp, tmp2);
2339 tcg_temp_free_i32(tmp2);
2340 tcg_temp_free_i32(tmp);
2342 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
2343 rd = (insn >> 12) & 0xf;
2344 rd0 = (insn >> 16) & 0xf;
2345 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2347 gen_op_iwmmxt_movq_M0_wRn(rd0);
2348 tmp = tcg_temp_new_i32();
2349 switch ((insn >> 22) & 3) {
2351 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2354 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2357 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2360 store_reg(s, rd, tmp);
2362 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
2363 case 0x906: case 0xb06: case 0xd06: case 0xf06:
2364 wrd = (insn >> 12) & 0xf;
2365 rd0 = (insn >> 16) & 0xf;
2366 rd1 = (insn >> 0) & 0xf;
2367 gen_op_iwmmxt_movq_M0_wRn(rd0);
2368 switch ((insn >> 22) & 3) {
2370 if (insn & (1 << 21))
2371 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2373 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2376 if (insn & (1 << 21))
2377 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2379 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2382 if (insn & (1 << 21))
2383 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2385 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2390 gen_op_iwmmxt_movq_wRn_M0(wrd);
2391 gen_op_iwmmxt_set_mup();
2392 gen_op_iwmmxt_set_cup();
2394 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
2395 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2396 wrd = (insn >> 12) & 0xf;
2397 rd0 = (insn >> 16) & 0xf;
2398 gen_op_iwmmxt_movq_M0_wRn(rd0);
2399 switch ((insn >> 22) & 3) {
2401 if (insn & (1 << 21))
2402 gen_op_iwmmxt_unpacklsb_M0();
2404 gen_op_iwmmxt_unpacklub_M0();
2407 if (insn & (1 << 21))
2408 gen_op_iwmmxt_unpacklsw_M0();
2410 gen_op_iwmmxt_unpackluw_M0();
2413 if (insn & (1 << 21))
2414 gen_op_iwmmxt_unpacklsl_M0();
2416 gen_op_iwmmxt_unpacklul_M0();
2421 gen_op_iwmmxt_movq_wRn_M0(wrd);
2422 gen_op_iwmmxt_set_mup();
2423 gen_op_iwmmxt_set_cup();
2425 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
2426 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2427 wrd = (insn >> 12) & 0xf;
2428 rd0 = (insn >> 16) & 0xf;
2429 gen_op_iwmmxt_movq_M0_wRn(rd0);
2430 switch ((insn >> 22) & 3) {
2432 if (insn & (1 << 21))
2433 gen_op_iwmmxt_unpackhsb_M0();
2435 gen_op_iwmmxt_unpackhub_M0();
2438 if (insn & (1 << 21))
2439 gen_op_iwmmxt_unpackhsw_M0();
2441 gen_op_iwmmxt_unpackhuw_M0();
2444 if (insn & (1 << 21))
2445 gen_op_iwmmxt_unpackhsl_M0();
2447 gen_op_iwmmxt_unpackhul_M0();
2452 gen_op_iwmmxt_movq_wRn_M0(wrd);
2453 gen_op_iwmmxt_set_mup();
2454 gen_op_iwmmxt_set_cup();
2456 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2457 case 0x214: case 0x614: case 0xa14: case 0xe14:
2458 if (((insn >> 22) & 3) == 0)
2460 wrd = (insn >> 12) & 0xf;
2461 rd0 = (insn >> 16) & 0xf;
2462 gen_op_iwmmxt_movq_M0_wRn(rd0);
2463 tmp = tcg_temp_new_i32();
2464 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2465 tcg_temp_free_i32(tmp);
2468 switch ((insn >> 22) & 3) {
2470 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2473 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2476 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2479 tcg_temp_free_i32(tmp);
2480 gen_op_iwmmxt_movq_wRn_M0(wrd);
2481 gen_op_iwmmxt_set_mup();
2482 gen_op_iwmmxt_set_cup();
2484 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2485 case 0x014: case 0x414: case 0x814: case 0xc14:
2486 if (((insn >> 22) & 3) == 0)
2488 wrd = (insn >> 12) & 0xf;
2489 rd0 = (insn >> 16) & 0xf;
2490 gen_op_iwmmxt_movq_M0_wRn(rd0);
2491 tmp = tcg_temp_new_i32();
2492 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2493 tcg_temp_free_i32(tmp);
2496 switch ((insn >> 22) & 3) {
2498 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2501 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2504 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2507 tcg_temp_free_i32(tmp);
2508 gen_op_iwmmxt_movq_wRn_M0(wrd);
2509 gen_op_iwmmxt_set_mup();
2510 gen_op_iwmmxt_set_cup();
2512 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2513 case 0x114: case 0x514: case 0x914: case 0xd14:
2514 if (((insn >> 22) & 3) == 0)
2516 wrd = (insn >> 12) & 0xf;
2517 rd0 = (insn >> 16) & 0xf;
2518 gen_op_iwmmxt_movq_M0_wRn(rd0);
2519 tmp = tcg_temp_new_i32();
2520 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2521 tcg_temp_free_i32(tmp);
2524 switch ((insn >> 22) & 3) {
2526 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2529 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2532 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2535 tcg_temp_free_i32(tmp);
2536 gen_op_iwmmxt_movq_wRn_M0(wrd);
2537 gen_op_iwmmxt_set_mup();
2538 gen_op_iwmmxt_set_cup();
2540 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2541 case 0x314: case 0x714: case 0xb14: case 0xf14:
2542 if (((insn >> 22) & 3) == 0)
2544 wrd = (insn >> 12) & 0xf;
2545 rd0 = (insn >> 16) & 0xf;
2546 gen_op_iwmmxt_movq_M0_wRn(rd0);
2547 tmp = tcg_temp_new_i32();
2548 switch ((insn >> 22) & 3) {
2550 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2551 tcg_temp_free_i32(tmp);
2554 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2557 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2558 tcg_temp_free_i32(tmp);
2561 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2564 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2565 tcg_temp_free_i32(tmp);
2568 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2571 tcg_temp_free_i32(tmp);
2572 gen_op_iwmmxt_movq_wRn_M0(wrd);
2573 gen_op_iwmmxt_set_mup();
2574 gen_op_iwmmxt_set_cup();
2576 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2577 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2578 wrd = (insn >> 12) & 0xf;
2579 rd0 = (insn >> 16) & 0xf;
2580 rd1 = (insn >> 0) & 0xf;
2581 gen_op_iwmmxt_movq_M0_wRn(rd0);
2582 switch ((insn >> 22) & 3) {
2584 if (insn & (1 << 21))
2585 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2587 gen_op_iwmmxt_minub_M0_wRn(rd1);
2590 if (insn & (1 << 21))
2591 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2593 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2596 if (insn & (1 << 21))
2597 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2599 gen_op_iwmmxt_minul_M0_wRn(rd1);
2604 gen_op_iwmmxt_movq_wRn_M0(wrd);
2605 gen_op_iwmmxt_set_mup();
2607 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2608 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2609 wrd = (insn >> 12) & 0xf;
2610 rd0 = (insn >> 16) & 0xf;
2611 rd1 = (insn >> 0) & 0xf;
2612 gen_op_iwmmxt_movq_M0_wRn(rd0);
2613 switch ((insn >> 22) & 3) {
2615 if (insn & (1 << 21))
2616 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2618 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2621 if (insn & (1 << 21))
2622 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2624 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2627 if (insn & (1 << 21))
2628 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2630 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2635 gen_op_iwmmxt_movq_wRn_M0(wrd);
2636 gen_op_iwmmxt_set_mup();
2638 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2639 case 0x402: case 0x502: case 0x602: case 0x702:
2640 wrd = (insn >> 12) & 0xf;
2641 rd0 = (insn >> 16) & 0xf;
2642 rd1 = (insn >> 0) & 0xf;
2643 gen_op_iwmmxt_movq_M0_wRn(rd0);
2644 tmp = tcg_const_i32((insn >> 20) & 3);
2645 iwmmxt_load_reg(cpu_V1, rd1);
2646 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2647 tcg_temp_free_i32(tmp);
2648 gen_op_iwmmxt_movq_wRn_M0(wrd);
2649 gen_op_iwmmxt_set_mup();
2651 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2652 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2653 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2654 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2655 wrd = (insn >> 12) & 0xf;
2656 rd0 = (insn >> 16) & 0xf;
2657 rd1 = (insn >> 0) & 0xf;
2658 gen_op_iwmmxt_movq_M0_wRn(rd0);
2659 switch ((insn >> 20) & 0xf) {
2661 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2664 gen_op_iwmmxt_subub_M0_wRn(rd1);
2667 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2670 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2673 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2676 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2679 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2682 gen_op_iwmmxt_subul_M0_wRn(rd1);
2685 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2690 gen_op_iwmmxt_movq_wRn_M0(wrd);
2691 gen_op_iwmmxt_set_mup();
2692 gen_op_iwmmxt_set_cup();
2694 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2695 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2696 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2697 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2698 wrd = (insn >> 12) & 0xf;
2699 rd0 = (insn >> 16) & 0xf;
2700 gen_op_iwmmxt_movq_M0_wRn(rd0);
2701 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2702 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2703 tcg_temp_free_i32(tmp);
2704 gen_op_iwmmxt_movq_wRn_M0(wrd);
2705 gen_op_iwmmxt_set_mup();
2706 gen_op_iwmmxt_set_cup();
2708 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2709 case 0x418: case 0x518: case 0x618: case 0x718:
2710 case 0x818: case 0x918: case 0xa18: case 0xb18:
2711 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2712 wrd = (insn >> 12) & 0xf;
2713 rd0 = (insn >> 16) & 0xf;
2714 rd1 = (insn >> 0) & 0xf;
2715 gen_op_iwmmxt_movq_M0_wRn(rd0);
2716 switch ((insn >> 20) & 0xf) {
2718 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2721 gen_op_iwmmxt_addub_M0_wRn(rd1);
2724 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2727 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2730 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2733 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2736 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2739 gen_op_iwmmxt_addul_M0_wRn(rd1);
2742 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2747 gen_op_iwmmxt_movq_wRn_M0(wrd);
2748 gen_op_iwmmxt_set_mup();
2749 gen_op_iwmmxt_set_cup();
2751 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2752 case 0x408: case 0x508: case 0x608: case 0x708:
2753 case 0x808: case 0x908: case 0xa08: case 0xb08:
2754 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2755 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2757 wrd = (insn >> 12) & 0xf;
2758 rd0 = (insn >> 16) & 0xf;
2759 rd1 = (insn >> 0) & 0xf;
2760 gen_op_iwmmxt_movq_M0_wRn(rd0);
2761 switch ((insn >> 22) & 3) {
2763 if (insn & (1 << 21))
2764 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2766 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2769 if (insn & (1 << 21))
2770 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2772 gen_op_iwmmxt_packul_M0_wRn(rd1);
2775 if (insn & (1 << 21))
2776 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2778 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2781 gen_op_iwmmxt_movq_wRn_M0(wrd);
2782 gen_op_iwmmxt_set_mup();
2783 gen_op_iwmmxt_set_cup();
2785 case 0x201: case 0x203: case 0x205: case 0x207:
2786 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2787 case 0x211: case 0x213: case 0x215: case 0x217:
2788 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2789 wrd = (insn >> 5) & 0xf;
2790 rd0 = (insn >> 12) & 0xf;
2791 rd1 = (insn >> 0) & 0xf;
2792 if (rd0 == 0xf || rd1 == 0xf)
2794 gen_op_iwmmxt_movq_M0_wRn(wrd);
2795 tmp = load_reg(s, rd0);
2796 tmp2 = load_reg(s, rd1);
2797 switch ((insn >> 16) & 0xf) {
2798 case 0x0: /* TMIA */
2799 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2801 case 0x8: /* TMIAPH */
2802 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2804 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2805 if (insn & (1 << 16))
2806 tcg_gen_shri_i32(tmp, tmp, 16);
2807 if (insn & (1 << 17))
2808 tcg_gen_shri_i32(tmp2, tmp2, 16);
2809 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2812 tcg_temp_free_i32(tmp2);
2813 tcg_temp_free_i32(tmp);
2816 tcg_temp_free_i32(tmp2);
2817 tcg_temp_free_i32(tmp);
2818 gen_op_iwmmxt_movq_wRn_M0(wrd);
2819 gen_op_iwmmxt_set_mup();
2828 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2829 (ie. an undefined instruction). */
2830 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2832 int acc, rd0, rd1, rdhi, rdlo;
2835 if ((insn & 0x0ff00f10) == 0x0e200010) {
2836 /* Multiply with Internal Accumulate Format */
2837 rd0 = (insn >> 12) & 0xf;
2839 acc = (insn >> 5) & 7;
2844 tmp = load_reg(s, rd0);
2845 tmp2 = load_reg(s, rd1);
2846 switch ((insn >> 16) & 0xf) {
2848 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2850 case 0x8: /* MIAPH */
2851 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2853 case 0xc: /* MIABB */
2854 case 0xd: /* MIABT */
2855 case 0xe: /* MIATB */
2856 case 0xf: /* MIATT */
2857 if (insn & (1 << 16))
2858 tcg_gen_shri_i32(tmp, tmp, 16);
2859 if (insn & (1 << 17))
2860 tcg_gen_shri_i32(tmp2, tmp2, 16);
2861 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2866 tcg_temp_free_i32(tmp2);
2867 tcg_temp_free_i32(tmp);
2869 gen_op_iwmmxt_movq_wRn_M0(acc);
2873 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2874 /* Internal Accumulator Access Format */
2875 rdhi = (insn >> 16) & 0xf;
2876 rdlo = (insn >> 12) & 0xf;
2882 if (insn & ARM_CP_RW_BIT) { /* MRA */
2883 iwmmxt_load_reg(cpu_V0, acc);
2884 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2885 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
2886 tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
2887 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2889 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2890 iwmmxt_store_reg(cpu_V0, acc);
2898 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2899 #define VFP_SREG(insn, bigbit, smallbit) \
2900 ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2901 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2902 if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
2903 reg = (((insn) >> (bigbit)) & 0x0f) \
2904 | (((insn) >> ((smallbit) - 4)) & 0x10); \
2906 if (insn & (1 << (smallbit))) \
2908 reg = ((insn) >> (bigbit)) & 0x0f; \
2911 #define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2912 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2913 #define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
2914 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2915 #define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
2916 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2918 static void gen_neon_dup_low16(TCGv_i32 var)
2920 TCGv_i32 tmp = tcg_temp_new_i32();
2921 tcg_gen_ext16u_i32(var, var);
2922 tcg_gen_shli_i32(tmp, var, 16);
2923 tcg_gen_or_i32(var, var, tmp);
2924 tcg_temp_free_i32(tmp);
2927 static void gen_neon_dup_high16(TCGv_i32 var)
2929 TCGv_i32 tmp = tcg_temp_new_i32();
2930 tcg_gen_andi_i32(var, var, 0xffff0000);
2931 tcg_gen_shri_i32(tmp, var, 16);
2932 tcg_gen_or_i32(var, var, tmp);
2933 tcg_temp_free_i32(tmp);
2937 * Disassemble a VFP instruction. Returns nonzero if an error occurred
2938 * (ie. an undefined instruction).
2940 static int disas_vfp_insn(DisasContext *s, uint32_t insn)
2942 if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
2947 * If the decodetree decoder handles this insn it will always
2948 * emit code to either execute the insn or generate an appropriate
2949 * exception; so we don't need to ever return non-zero to tell
2950 * the calling code to emit an UNDEF exception.
2952 if (extract32(insn, 28, 4) == 0xf) {
2953 if (disas_vfp_uncond(s, insn)) {
2957 if (disas_vfp(s, insn)) {
2961 /* If the decodetree decoder didn't handle this insn, it must be UNDEF */
2965 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2967 #ifndef CONFIG_USER_ONLY
2968 return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2969 ((s->pc - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2975 static void gen_goto_ptr(void)
2977 tcg_gen_lookup_and_goto_ptr();
2980 /* This will end the TB but doesn't guarantee we'll return to
2981 * cpu_loop_exec. Any live exit_requests will be processed as we
2982 * enter the next TB.
2984 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2986 if (use_goto_tb(s, dest)) {
2988 gen_set_pc_im(s, dest);
2989 tcg_gen_exit_tb(s->base.tb, n);
2991 gen_set_pc_im(s, dest);
2994 s->base.is_jmp = DISAS_NORETURN;
2997 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2999 if (unlikely(is_singlestepping(s))) {
3000 /* An indirect jump so that we still trigger the debug exception. */
3005 gen_goto_tb(s, 0, dest);
3009 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
3012 tcg_gen_sari_i32(t0, t0, 16);
3016 tcg_gen_sari_i32(t1, t1, 16);
3019 tcg_gen_mul_i32(t0, t0, t1);
3022 /* Return the mask of PSR bits set by a MSR instruction. */
3023 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
3028 if (flags & (1 << 0))
3030 if (flags & (1 << 1))
3032 if (flags & (1 << 2))
3034 if (flags & (1 << 3))
3037 /* Mask out undefined bits. */
3038 mask &= ~CPSR_RESERVED;
3039 if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
3042 if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
3043 mask &= ~CPSR_Q; /* V5TE in reality*/
3045 if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
3046 mask &= ~(CPSR_E | CPSR_GE);
3048 if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
3051 /* Mask out execution state and reserved bits. */
3053 mask &= ~(CPSR_EXEC | CPSR_RESERVED);
3055 /* Mask out privileged bits. */
3061 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
3062 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
3066 /* ??? This is also undefined in system mode. */
3070 tmp = load_cpu_field(spsr);
3071 tcg_gen_andi_i32(tmp, tmp, ~mask);
3072 tcg_gen_andi_i32(t0, t0, mask);
3073 tcg_gen_or_i32(tmp, tmp, t0);
3074 store_cpu_field(tmp, spsr);
3076 gen_set_cpsr(t0, mask);
3078 tcg_temp_free_i32(t0);
3083 /* Returns nonzero if access to the PSR is not permitted. */
3084 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
3087 tmp = tcg_temp_new_i32();
3088 tcg_gen_movi_i32(tmp, val);
3089 return gen_set_psr(s, mask, spsr, tmp);
3092 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
3093 int *tgtmode, int *regno)
3095 /* Decode the r and sysm fields of MSR/MRS banked accesses into
3096 * the target mode and register number, and identify the various
3097 * unpredictable cases.
3098 * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
3099 * + executed in user mode
3100 * + using R15 as the src/dest register
3101 * + accessing an unimplemented register
3102 * + accessing a register that's inaccessible at current PL/security state*
3103 * + accessing a register that you could access with a different insn
3104 * We choose to UNDEF in all these cases.
3105 * Since we don't know which of the various AArch32 modes we are in
3106 * we have to defer some checks to runtime.
3107 * Accesses to Monitor mode registers from Secure EL1 (which implies
3108 * that EL3 is AArch64) must trap to EL3.
3110 * If the access checks fail this function will emit code to take
3111 * an exception and return false. Otherwise it will return true,
3112 * and set *tgtmode and *regno appropriately.
3114 int exc_target = default_exception_el(s);
3116 /* These instructions are present only in ARMv8, or in ARMv7 with the
3117 * Virtualization Extensions.
3119 if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
3120 !arm_dc_feature(s, ARM_FEATURE_EL2)) {
3124 if (IS_USER(s) || rn == 15) {
3128 /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
3129 * of registers into (r, sysm).
3132 /* SPSRs for other modes */
3134 case 0xe: /* SPSR_fiq */
3135 *tgtmode = ARM_CPU_MODE_FIQ;
3137 case 0x10: /* SPSR_irq */
3138 *tgtmode = ARM_CPU_MODE_IRQ;
3140 case 0x12: /* SPSR_svc */
3141 *tgtmode = ARM_CPU_MODE_SVC;
3143 case 0x14: /* SPSR_abt */
3144 *tgtmode = ARM_CPU_MODE_ABT;
3146 case 0x16: /* SPSR_und */
3147 *tgtmode = ARM_CPU_MODE_UND;
3149 case 0x1c: /* SPSR_mon */
3150 *tgtmode = ARM_CPU_MODE_MON;
3152 case 0x1e: /* SPSR_hyp */
3153 *tgtmode = ARM_CPU_MODE_HYP;
3155 default: /* unallocated */
3158 /* We arbitrarily assign SPSR a register number of 16. */
3161 /* general purpose registers for other modes */
3163 case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
3164 *tgtmode = ARM_CPU_MODE_USR;
3167 case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
3168 *tgtmode = ARM_CPU_MODE_FIQ;
3171 case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
3172 *tgtmode = ARM_CPU_MODE_IRQ;
3173 *regno = sysm & 1 ? 13 : 14;
3175 case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
3176 *tgtmode = ARM_CPU_MODE_SVC;
3177 *regno = sysm & 1 ? 13 : 14;
3179 case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
3180 *tgtmode = ARM_CPU_MODE_ABT;
3181 *regno = sysm & 1 ? 13 : 14;
3183 case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
3184 *tgtmode = ARM_CPU_MODE_UND;
3185 *regno = sysm & 1 ? 13 : 14;
3187 case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
3188 *tgtmode = ARM_CPU_MODE_MON;
3189 *regno = sysm & 1 ? 13 : 14;
3191 case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
3192 *tgtmode = ARM_CPU_MODE_HYP;
3193 /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
3194 *regno = sysm & 1 ? 13 : 17;
3196 default: /* unallocated */
3201 /* Catch the 'accessing inaccessible register' cases we can detect
3202 * at translate time.
3205 case ARM_CPU_MODE_MON:
3206 if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
3209 if (s->current_el == 1) {
3210 /* If we're in Secure EL1 (which implies that EL3 is AArch64)
3211 * then accesses to Mon registers trap to EL3
3217 case ARM_CPU_MODE_HYP:
3219 * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
3220 * (and so we can forbid accesses from EL2 or below). elr_hyp
3221 * can be accessed also from Hyp mode, so forbid accesses from
3224 if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
3225 (s->current_el < 3 && *regno != 17)) {
3236 /* If we get here then some access check did not pass */
3237 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), exc_target);
3241 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
3243 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
3244 int tgtmode = 0, regno = 0;
3246 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) {
3250 /* Sync state because msr_banked() can raise exceptions */
3251 gen_set_condexec(s);
3252 gen_set_pc_im(s, s->pc - 4);
3253 tcg_reg = load_reg(s, rn);
3254 tcg_tgtmode = tcg_const_i32(tgtmode);
3255 tcg_regno = tcg_const_i32(regno);
3256 gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
3257 tcg_temp_free_i32(tcg_tgtmode);
3258 tcg_temp_free_i32(tcg_regno);
3259 tcg_temp_free_i32(tcg_reg);
3260 s->base.is_jmp = DISAS_UPDATE;
3263 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
3265 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
3266 int tgtmode = 0, regno = 0;
3268 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) {
3272 /* Sync state because mrs_banked() can raise exceptions */
3273 gen_set_condexec(s);
3274 gen_set_pc_im(s, s->pc - 4);
3275 tcg_reg = tcg_temp_new_i32();
3276 tcg_tgtmode = tcg_const_i32(tgtmode);
3277 tcg_regno = tcg_const_i32(regno);
3278 gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
3279 tcg_temp_free_i32(tcg_tgtmode);
3280 tcg_temp_free_i32(tcg_regno);
3281 store_reg(s, rn, tcg_reg);
3282 s->base.is_jmp = DISAS_UPDATE;
3285 /* Store value to PC as for an exception return (ie don't
3286 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
3287 * will do the masking based on the new value of the Thumb bit.
3289 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
3291 tcg_gen_mov_i32(cpu_R[15], pc);
3292 tcg_temp_free_i32(pc);
3295 /* Generate a v6 exception return. Marks both values as dead. */
3296 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
3298 store_pc_exc_ret(s, pc);
3299 /* The cpsr_write_eret helper will mask the low bits of PC
3300 * appropriately depending on the new Thumb bit, so it must
3301 * be called after storing the new PC.
3303 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
3306 gen_helper_cpsr_write_eret(cpu_env, cpsr);
3307 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
3310 tcg_temp_free_i32(cpsr);
3311 /* Must exit loop to check un-masked IRQs */
3312 s->base.is_jmp = DISAS_EXIT;
3315 /* Generate an old-style exception return. Marks pc as dead. */
3316 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
3318 gen_rfe(s, pc, load_cpu_field(spsr));
3322 * For WFI we will halt the vCPU until an IRQ. For WFE and YIELD we
3323 * only call the helper when running single threaded TCG code to ensure
3324 * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
3325 * just skip this instruction. Currently the SEV/SEVL instructions
3326 * which are *one* of many ways to wake the CPU from WFE are not
3327 * implemented so we can't sleep like WFI does.
3329 static void gen_nop_hint(DisasContext *s, int val)
3332 /* When running in MTTCG we don't generate jumps to the yield and
3333 * WFE helpers as it won't affect the scheduling of other vCPUs.
3334 * If we wanted to more completely model WFE/SEV so we don't busy
3335 * spin unnecessarily we would need to do something more involved.
3338 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
3339 gen_set_pc_im(s, s->pc);
3340 s->base.is_jmp = DISAS_YIELD;
3344 gen_set_pc_im(s, s->pc);
3345 s->base.is_jmp = DISAS_WFI;
3348 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
3349 gen_set_pc_im(s, s->pc);
3350 s->base.is_jmp = DISAS_WFE;
3355 /* TODO: Implement SEV, SEVL and WFE. May help SMP performance. */
3361 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3363 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
3366 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
3367 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
3368 case 2: tcg_gen_add_i32(t0, t0, t1); break;
3373 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3376 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3377 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3378 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3383 /* 32-bit pairwise ops end up the same as the elementwise versions. */
3384 #define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
3385 #define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
3386 #define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
3387 #define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
3389 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3390 switch ((size << 1) | u) { \
3392 gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3395 gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3398 gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3401 gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3404 gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3407 gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3409 default: return 1; \
3412 #define GEN_NEON_INTEGER_OP(name) do { \
3413 switch ((size << 1) | u) { \
3415 gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3418 gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3421 gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3424 gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3427 gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3430 gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3432 default: return 1; \
3435 static TCGv_i32 neon_load_scratch(int scratch)
3437 TCGv_i32 tmp = tcg_temp_new_i32();
3438 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3442 static void neon_store_scratch(int scratch, TCGv_i32 var)
3444 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3445 tcg_temp_free_i32(var);
3448 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3452 tmp = neon_load_reg(reg & 7, reg >> 4);
3454 gen_neon_dup_high16(tmp);
3456 gen_neon_dup_low16(tmp);
3459 tmp = neon_load_reg(reg & 15, reg >> 4);
3464 static int gen_neon_unzip(int rd, int rm, int size, int q)
3468 if (!q && size == 2) {
3471 pd = vfp_reg_ptr(true, rd);
3472 pm = vfp_reg_ptr(true, rm);
3476 gen_helper_neon_qunzip8(pd, pm);
3479 gen_helper_neon_qunzip16(pd, pm);
3482 gen_helper_neon_qunzip32(pd, pm);
3490 gen_helper_neon_unzip8(pd, pm);
3493 gen_helper_neon_unzip16(pd, pm);
3499 tcg_temp_free_ptr(pd);
3500 tcg_temp_free_ptr(pm);
3504 static int gen_neon_zip(int rd, int rm, int size, int q)
3508 if (!q && size == 2) {
3511 pd = vfp_reg_ptr(true, rd);
3512 pm = vfp_reg_ptr(true, rm);
3516 gen_helper_neon_qzip8(pd, pm);
3519 gen_helper_neon_qzip16(pd, pm);
3522 gen_helper_neon_qzip32(pd, pm);
3530 gen_helper_neon_zip8(pd, pm);
3533 gen_helper_neon_zip16(pd, pm);
3539 tcg_temp_free_ptr(pd);
3540 tcg_temp_free_ptr(pm);
3544 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3548 rd = tcg_temp_new_i32();
3549 tmp = tcg_temp_new_i32();
3551 tcg_gen_shli_i32(rd, t0, 8);
3552 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3553 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3554 tcg_gen_or_i32(rd, rd, tmp);
3556 tcg_gen_shri_i32(t1, t1, 8);
3557 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3558 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3559 tcg_gen_or_i32(t1, t1, tmp);
3560 tcg_gen_mov_i32(t0, rd);
3562 tcg_temp_free_i32(tmp);
3563 tcg_temp_free_i32(rd);
3566 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3570 rd = tcg_temp_new_i32();
3571 tmp = tcg_temp_new_i32();
3573 tcg_gen_shli_i32(rd, t0, 16);
3574 tcg_gen_andi_i32(tmp, t1, 0xffff);
3575 tcg_gen_or_i32(rd, rd, tmp);
3576 tcg_gen_shri_i32(t1, t1, 16);
3577 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3578 tcg_gen_or_i32(t1, t1, tmp);
3579 tcg_gen_mov_i32(t0, rd);
3581 tcg_temp_free_i32(tmp);
3582 tcg_temp_free_i32(rd);
3590 } const neon_ls_element_type[11] = {
3604 /* Translate a NEON load/store element instruction. Return nonzero if the
3605 instruction is invalid. */
3606 static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
3626 /* FIXME: this access check should not take precedence over UNDEF
3627 * for invalid encodings; we will generate incorrect syndrome information
3628 * for attempts to execute invalid vfp/neon encodings with FP disabled.
3630 if (s->fp_excp_el) {
3631 gen_exception_insn(s, 4, EXCP_UDEF,
3632 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
3636 if (!s->vfp_enabled)
3638 VFP_DREG_D(rd, insn);
3639 rn = (insn >> 16) & 0xf;
3641 load = (insn & (1 << 21)) != 0;
3642 endian = s->be_data;
3643 mmu_idx = get_mem_index(s);
3644 if ((insn & (1 << 23)) == 0) {
3645 /* Load store all elements. */
3646 op = (insn >> 8) & 0xf;
3647 size = (insn >> 6) & 3;
3650 /* Catch UNDEF cases for bad values of align field */
3653 if (((insn >> 5) & 1) == 1) {
3658 if (((insn >> 4) & 3) == 3) {
3665 nregs = neon_ls_element_type[op].nregs;
3666 interleave = neon_ls_element_type[op].interleave;
3667 spacing = neon_ls_element_type[op].spacing;
3668 if (size == 3 && (interleave | spacing) != 1) {
3671 /* For our purposes, bytes are always little-endian. */
3675 /* Consecutive little-endian elements from a single register
3676 * can be promoted to a larger little-endian operation.
3678 if (interleave == 1 && endian == MO_LE) {
3681 tmp64 = tcg_temp_new_i64();
3682 addr = tcg_temp_new_i32();
3683 tmp2 = tcg_const_i32(1 << size);
3684 load_reg_var(s, addr, rn);
3685 for (reg = 0; reg < nregs; reg++) {
3686 for (n = 0; n < 8 >> size; n++) {
3688 for (xs = 0; xs < interleave; xs++) {
3689 int tt = rd + reg + spacing * xs;
3692 gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
3693 neon_store_element64(tt, n, size, tmp64);
3695 neon_load_element64(tmp64, tt, n, size);
3696 gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
3698 tcg_gen_add_i32(addr, addr, tmp2);
3702 tcg_temp_free_i32(addr);
3703 tcg_temp_free_i32(tmp2);
3704 tcg_temp_free_i64(tmp64);
3705 stride = nregs * interleave * 8;
3707 size = (insn >> 10) & 3;
3709 /* Load single element to all lanes. */
3710 int a = (insn >> 4) & 1;
3714 size = (insn >> 6) & 3;
3715 nregs = ((insn >> 8) & 3) + 1;
3718 if (nregs != 4 || a == 0) {
3721 /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
3724 if (nregs == 1 && a == 1 && size == 0) {
3727 if (nregs == 3 && a == 1) {
3730 addr = tcg_temp_new_i32();
3731 load_reg_var(s, addr, rn);
3733 /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
3734 * VLD2/3/4 to all lanes: bit 5 indicates register stride.
3736 stride = (insn & (1 << 5)) ? 2 : 1;
3737 vec_size = nregs == 1 ? stride * 8 : 8;
3739 tmp = tcg_temp_new_i32();
3740 for (reg = 0; reg < nregs; reg++) {
3741 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
3743 if ((rd & 1) && vec_size == 16) {
3744 /* We cannot write 16 bytes at once because the
3745 * destination is unaligned.
3747 tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
3749 tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
3750 neon_reg_offset(rd, 0), 8, 8);
3752 tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
3753 vec_size, vec_size, tmp);
3755 tcg_gen_addi_i32(addr, addr, 1 << size);
3758 tcg_temp_free_i32(tmp);
3759 tcg_temp_free_i32(addr);
3760 stride = (1 << size) * nregs;
3762 /* Single element. */
3763 int idx = (insn >> 4) & 0xf;
3767 reg_idx = (insn >> 5) & 7;
3771 reg_idx = (insn >> 6) & 3;
3772 stride = (insn & (1 << 5)) ? 2 : 1;
3775 reg_idx = (insn >> 7) & 1;
3776 stride = (insn & (1 << 6)) ? 2 : 1;
3781 nregs = ((insn >> 8) & 3) + 1;
3782 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
3785 if (((idx & (1 << size)) != 0) ||
3786 (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
3791 if ((idx & 1) != 0) {
3796 if (size == 2 && (idx & 2) != 0) {
3801 if ((size == 2) && ((idx & 3) == 3)) {
3808 if ((rd + stride * (nregs - 1)) > 31) {
3809 /* Attempts to write off the end of the register file
3810 * are UNPREDICTABLE; we choose to UNDEF because otherwise
3811 * the neon_load_reg() would write off the end of the array.
3815 tmp = tcg_temp_new_i32();
3816 addr = tcg_temp_new_i32();
3817 load_reg_var(s, addr, rn);
3818 for (reg = 0; reg < nregs; reg++) {
3820 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
3822 neon_store_element(rd, reg_idx, size, tmp);
3823 } else { /* Store */
3824 neon_load_element(tmp, rd, reg_idx, size);
3825 gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
3829 tcg_gen_addi_i32(addr, addr, 1 << size);
3831 tcg_temp_free_i32(addr);
3832 tcg_temp_free_i32(tmp);
3833 stride = nregs * (1 << size);
3839 base = load_reg(s, rn);
3841 tcg_gen_addi_i32(base, base, stride);
3844 index = load_reg(s, rm);
3845 tcg_gen_add_i32(base, base, index);
3846 tcg_temp_free_i32(index);
3848 store_reg(s, rn, base);
3853 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3856 case 0: gen_helper_neon_narrow_u8(dest, src); break;
3857 case 1: gen_helper_neon_narrow_u16(dest, src); break;
3858 case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3863 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3866 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3867 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3868 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3873 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3876 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3877 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3878 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3883 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3886 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3887 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3888 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3893 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3899 case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3900 case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3905 case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3906 case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3913 case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3914 case 2: gen_helper_neon_shl_u32(var, var, shift); break;
3919 case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3920 case 2: gen_helper_neon_shl_s32(var, var, shift); break;
3927 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3931 case 0: gen_helper_neon_widen_u8(dest, src); break;
3932 case 1: gen_helper_neon_widen_u16(dest, src); break;
3933 case 2: tcg_gen_extu_i32_i64(dest, src); break;
3938 case 0: gen_helper_neon_widen_s8(dest, src); break;
3939 case 1: gen_helper_neon_widen_s16(dest, src); break;
3940 case 2: tcg_gen_ext_i32_i64(dest, src); break;
3944 tcg_temp_free_i32(src);
3947 static inline void gen_neon_addl(int size)
3950 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3951 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3952 case 2: tcg_gen_add_i64(CPU_V001); break;
3957 static inline void gen_neon_subl(int size)
3960 case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3961 case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3962 case 2: tcg_gen_sub_i64(CPU_V001); break;
3967 static inline void gen_neon_negl(TCGv_i64 var, int size)
3970 case 0: gen_helper_neon_negl_u16(var, var); break;
3971 case 1: gen_helper_neon_negl_u32(var, var); break;
3973 tcg_gen_neg_i64(var, var);
3979 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3982 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3983 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3988 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3993 switch ((size << 1) | u) {
3994 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3995 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3996 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3997 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3999 tmp = gen_muls_i64_i32(a, b);
4000 tcg_gen_mov_i64(dest, tmp);
4001 tcg_temp_free_i64(tmp);
4004 tmp = gen_mulu_i64_i32(a, b);
4005 tcg_gen_mov_i64(dest, tmp);
4006 tcg_temp_free_i64(tmp);
4011 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
4012 Don't forget to clean them now. */
4014 tcg_temp_free_i32(a);
4015 tcg_temp_free_i32(b);
4019 static void gen_neon_narrow_op(int op, int u, int size,
4020 TCGv_i32 dest, TCGv_i64 src)
4024 gen_neon_unarrow_sats(size, dest, src);
4026 gen_neon_narrow(size, dest, src);
4030 gen_neon_narrow_satu(size, dest, src);
4032 gen_neon_narrow_sats(size, dest, src);
4037 /* Symbolic constants for op fields for Neon 3-register same-length.
4038 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
4041 #define NEON_3R_VHADD 0
4042 #define NEON_3R_VQADD 1
4043 #define NEON_3R_VRHADD 2
4044 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
4045 #define NEON_3R_VHSUB 4
4046 #define NEON_3R_VQSUB 5
4047 #define NEON_3R_VCGT 6
4048 #define NEON_3R_VCGE 7
4049 #define NEON_3R_VSHL 8
4050 #define NEON_3R_VQSHL 9
4051 #define NEON_3R_VRSHL 10
4052 #define NEON_3R_VQRSHL 11
4053 #define NEON_3R_VMAX 12
4054 #define NEON_3R_VMIN 13
4055 #define NEON_3R_VABD 14
4056 #define NEON_3R_VABA 15
4057 #define NEON_3R_VADD_VSUB 16
4058 #define NEON_3R_VTST_VCEQ 17
4059 #define NEON_3R_VML 18 /* VMLA, VMLS */
4060 #define NEON_3R_VMUL 19
4061 #define NEON_3R_VPMAX 20
4062 #define NEON_3R_VPMIN 21
4063 #define NEON_3R_VQDMULH_VQRDMULH 22
4064 #define NEON_3R_VPADD_VQRDMLAH 23
4065 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
4066 #define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
4067 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
4068 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
4069 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
4070 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
4071 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
4072 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
4074 static const uint8_t neon_3r_sizes[] = {
4075 [NEON_3R_VHADD] = 0x7,
4076 [NEON_3R_VQADD] = 0xf,
4077 [NEON_3R_VRHADD] = 0x7,
4078 [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
4079 [NEON_3R_VHSUB] = 0x7,
4080 [NEON_3R_VQSUB] = 0xf,
4081 [NEON_3R_VCGT] = 0x7,
4082 [NEON_3R_VCGE] = 0x7,
4083 [NEON_3R_VSHL] = 0xf,
4084 [NEON_3R_VQSHL] = 0xf,
4085 [NEON_3R_VRSHL] = 0xf,
4086 [NEON_3R_VQRSHL] = 0xf,
4087 [NEON_3R_VMAX] = 0x7,
4088 [NEON_3R_VMIN] = 0x7,
4089 [NEON_3R_VABD] = 0x7,
4090 [NEON_3R_VABA] = 0x7,
4091 [NEON_3R_VADD_VSUB] = 0xf,
4092 [NEON_3R_VTST_VCEQ] = 0x7,
4093 [NEON_3R_VML] = 0x7,
4094 [NEON_3R_VMUL] = 0x7,
4095 [NEON_3R_VPMAX] = 0x7,
4096 [NEON_3R_VPMIN] = 0x7,
4097 [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
4098 [NEON_3R_VPADD_VQRDMLAH] = 0x7,
4099 [NEON_3R_SHA] = 0xf, /* size field encodes op type */
4100 [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
4101 [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
4102 [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
4103 [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
4104 [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
4105 [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
4106 [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
4109 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
4110 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
4113 #define NEON_2RM_VREV64 0
4114 #define NEON_2RM_VREV32 1
4115 #define NEON_2RM_VREV16 2
4116 #define NEON_2RM_VPADDL 4
4117 #define NEON_2RM_VPADDL_U 5
4118 #define NEON_2RM_AESE 6 /* Includes AESD */
4119 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
4120 #define NEON_2RM_VCLS 8
4121 #define NEON_2RM_VCLZ 9
4122 #define NEON_2RM_VCNT 10
4123 #define NEON_2RM_VMVN 11
4124 #define NEON_2RM_VPADAL 12
4125 #define NEON_2RM_VPADAL_U 13
4126 #define NEON_2RM_VQABS 14
4127 #define NEON_2RM_VQNEG 15
4128 #define NEON_2RM_VCGT0 16
4129 #define NEON_2RM_VCGE0 17
4130 #define NEON_2RM_VCEQ0 18
4131 #define NEON_2RM_VCLE0 19
4132 #define NEON_2RM_VCLT0 20
4133 #define NEON_2RM_SHA1H 21
4134 #define NEON_2RM_VABS 22
4135 #define NEON_2RM_VNEG 23
4136 #define NEON_2RM_VCGT0_F 24
4137 #define NEON_2RM_VCGE0_F 25
4138 #define NEON_2RM_VCEQ0_F 26
4139 #define NEON_2RM_VCLE0_F 27
4140 #define NEON_2RM_VCLT0_F 28
4141 #define NEON_2RM_VABS_F 30
4142 #define NEON_2RM_VNEG_F 31
4143 #define NEON_2RM_VSWP 32
4144 #define NEON_2RM_VTRN 33
4145 #define NEON_2RM_VUZP 34
4146 #define NEON_2RM_VZIP 35
4147 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
4148 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
4149 #define NEON_2RM_VSHLL 38
4150 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
4151 #define NEON_2RM_VRINTN 40
4152 #define NEON_2RM_VRINTX 41
4153 #define NEON_2RM_VRINTA 42
4154 #define NEON_2RM_VRINTZ 43
4155 #define NEON_2RM_VCVT_F16_F32 44
4156 #define NEON_2RM_VRINTM 45
4157 #define NEON_2RM_VCVT_F32_F16 46
4158 #define NEON_2RM_VRINTP 47
4159 #define NEON_2RM_VCVTAU 48
4160 #define NEON_2RM_VCVTAS 49
4161 #define NEON_2RM_VCVTNU 50
4162 #define NEON_2RM_VCVTNS 51
4163 #define NEON_2RM_VCVTPU 52
4164 #define NEON_2RM_VCVTPS 53
4165 #define NEON_2RM_VCVTMU 54
4166 #define NEON_2RM_VCVTMS 55
4167 #define NEON_2RM_VRECPE 56
4168 #define NEON_2RM_VRSQRTE 57
4169 #define NEON_2RM_VRECPE_F 58
4170 #define NEON_2RM_VRSQRTE_F 59
4171 #define NEON_2RM_VCVT_FS 60
4172 #define NEON_2RM_VCVT_FU 61
4173 #define NEON_2RM_VCVT_SF 62
4174 #define NEON_2RM_VCVT_UF 63
4176 static int neon_2rm_is_float_op(int op)
4179 * Return true if this neon 2reg-misc op is float-to-float.
4180 * This is not a property of the operation but of our code --
4181 * what we are asking here is "does the code for this case in
4182 * the Neon for-each-pass loop use cpu_F0s?".
4184 return ((op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
4185 op == NEON_2RM_VRINTM ||
4186 (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
4187 op >= NEON_2RM_VRECPE_F);
4190 static bool neon_2rm_is_v8_op(int op)
4192 /* Return true if this neon 2reg-misc op is ARMv8 and up */
4194 case NEON_2RM_VRINTN:
4195 case NEON_2RM_VRINTA:
4196 case NEON_2RM_VRINTM:
4197 case NEON_2RM_VRINTP:
4198 case NEON_2RM_VRINTZ:
4199 case NEON_2RM_VRINTX:
4200 case NEON_2RM_VCVTAU:
4201 case NEON_2RM_VCVTAS:
4202 case NEON_2RM_VCVTNU:
4203 case NEON_2RM_VCVTNS:
4204 case NEON_2RM_VCVTPU:
4205 case NEON_2RM_VCVTPS:
4206 case NEON_2RM_VCVTMU:
4207 case NEON_2RM_VCVTMS:
4214 /* Each entry in this array has bit n set if the insn allows
4215 * size value n (otherwise it will UNDEF). Since unallocated
4216 * op values will have no bits set they always UNDEF.
4218 static const uint8_t neon_2rm_sizes[] = {
4219 [NEON_2RM_VREV64] = 0x7,
4220 [NEON_2RM_VREV32] = 0x3,
4221 [NEON_2RM_VREV16] = 0x1,
4222 [NEON_2RM_VPADDL] = 0x7,
4223 [NEON_2RM_VPADDL_U] = 0x7,
4224 [NEON_2RM_AESE] = 0x1,
4225 [NEON_2RM_AESMC] = 0x1,
4226 [NEON_2RM_VCLS] = 0x7,
4227 [NEON_2RM_VCLZ] = 0x7,
4228 [NEON_2RM_VCNT] = 0x1,
4229 [NEON_2RM_VMVN] = 0x1,
4230 [NEON_2RM_VPADAL] = 0x7,
4231 [NEON_2RM_VPADAL_U] = 0x7,
4232 [NEON_2RM_VQABS] = 0x7,
4233 [NEON_2RM_VQNEG] = 0x7,
4234 [NEON_2RM_VCGT0] = 0x7,
4235 [NEON_2RM_VCGE0] = 0x7,
4236 [NEON_2RM_VCEQ0] = 0x7,
4237 [NEON_2RM_VCLE0] = 0x7,
4238 [NEON_2RM_VCLT0] = 0x7,
4239 [NEON_2RM_SHA1H] = 0x4,
4240 [NEON_2RM_VABS] = 0x7,
4241 [NEON_2RM_VNEG] = 0x7,
4242 [NEON_2RM_VCGT0_F] = 0x4,
4243 [NEON_2RM_VCGE0_F] = 0x4,
4244 [NEON_2RM_VCEQ0_F] = 0x4,
4245 [NEON_2RM_VCLE0_F] = 0x4,
4246 [NEON_2RM_VCLT0_F] = 0x4,
4247 [NEON_2RM_VABS_F] = 0x4,
4248 [NEON_2RM_VNEG_F] = 0x4,
4249 [NEON_2RM_VSWP] = 0x1,
4250 [NEON_2RM_VTRN] = 0x7,
4251 [NEON_2RM_VUZP] = 0x7,
4252 [NEON_2RM_VZIP] = 0x7,
4253 [NEON_2RM_VMOVN] = 0x7,
4254 [NEON_2RM_VQMOVN] = 0x7,
4255 [NEON_2RM_VSHLL] = 0x7,
4256 [NEON_2RM_SHA1SU1] = 0x4,
4257 [NEON_2RM_VRINTN] = 0x4,
4258 [NEON_2RM_VRINTX] = 0x4,
4259 [NEON_2RM_VRINTA] = 0x4,
4260 [NEON_2RM_VRINTZ] = 0x4,
4261 [NEON_2RM_VCVT_F16_F32] = 0x2,
4262 [NEON_2RM_VRINTM] = 0x4,
4263 [NEON_2RM_VCVT_F32_F16] = 0x2,
4264 [NEON_2RM_VRINTP] = 0x4,
4265 [NEON_2RM_VCVTAU] = 0x4,
4266 [NEON_2RM_VCVTAS] = 0x4,
4267 [NEON_2RM_VCVTNU] = 0x4,
4268 [NEON_2RM_VCVTNS] = 0x4,
4269 [NEON_2RM_VCVTPU] = 0x4,
4270 [NEON_2RM_VCVTPS] = 0x4,
4271 [NEON_2RM_VCVTMU] = 0x4,
4272 [NEON_2RM_VCVTMS] = 0x4,
4273 [NEON_2RM_VRECPE] = 0x4,
4274 [NEON_2RM_VRSQRTE] = 0x4,
4275 [NEON_2RM_VRECPE_F] = 0x4,
4276 [NEON_2RM_VRSQRTE_F] = 0x4,
4277 [NEON_2RM_VCVT_FS] = 0x4,
4278 [NEON_2RM_VCVT_FU] = 0x4,
4279 [NEON_2RM_VCVT_SF] = 0x4,
4280 [NEON_2RM_VCVT_UF] = 0x4,
4284 /* Expand v8.1 simd helper. */
4285 static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
4286 int q, int rd, int rn, int rm)
4288 if (dc_isar_feature(aa32_rdm, s)) {
4289 int opr_sz = (1 + q) * 8;
4290 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
4291 vfp_reg_offset(1, rn),
4292 vfp_reg_offset(1, rm), cpu_env,
4293 opr_sz, opr_sz, 0, fn);
4299 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4301 tcg_gen_vec_sar8i_i64(a, a, shift);
4302 tcg_gen_vec_add8_i64(d, d, a);
4305 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4307 tcg_gen_vec_sar16i_i64(a, a, shift);
4308 tcg_gen_vec_add16_i64(d, d, a);
4311 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4313 tcg_gen_sari_i32(a, a, shift);
4314 tcg_gen_add_i32(d, d, a);
4317 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4319 tcg_gen_sari_i64(a, a, shift);
4320 tcg_gen_add_i64(d, d, a);
4323 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4325 tcg_gen_sari_vec(vece, a, a, sh);
4326 tcg_gen_add_vec(vece, d, d, a);
4329 static const TCGOpcode vecop_list_ssra[] = {
4330 INDEX_op_sari_vec, INDEX_op_add_vec, 0
4333 const GVecGen2i ssra_op[4] = {
4334 { .fni8 = gen_ssra8_i64,
4335 .fniv = gen_ssra_vec,
4337 .opt_opc = vecop_list_ssra,
4339 { .fni8 = gen_ssra16_i64,
4340 .fniv = gen_ssra_vec,
4342 .opt_opc = vecop_list_ssra,
4344 { .fni4 = gen_ssra32_i32,
4345 .fniv = gen_ssra_vec,
4347 .opt_opc = vecop_list_ssra,
4349 { .fni8 = gen_ssra64_i64,
4350 .fniv = gen_ssra_vec,
4351 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4352 .opt_opc = vecop_list_ssra,
4357 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4359 tcg_gen_vec_shr8i_i64(a, a, shift);
4360 tcg_gen_vec_add8_i64(d, d, a);
4363 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4365 tcg_gen_vec_shr16i_i64(a, a, shift);
4366 tcg_gen_vec_add16_i64(d, d, a);
4369 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4371 tcg_gen_shri_i32(a, a, shift);
4372 tcg_gen_add_i32(d, d, a);
4375 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4377 tcg_gen_shri_i64(a, a, shift);
4378 tcg_gen_add_i64(d, d, a);
4381 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4383 tcg_gen_shri_vec(vece, a, a, sh);
4384 tcg_gen_add_vec(vece, d, d, a);
4387 static const TCGOpcode vecop_list_usra[] = {
4388 INDEX_op_shri_vec, INDEX_op_add_vec, 0
4391 const GVecGen2i usra_op[4] = {
4392 { .fni8 = gen_usra8_i64,
4393 .fniv = gen_usra_vec,
4395 .opt_opc = vecop_list_usra,
4397 { .fni8 = gen_usra16_i64,
4398 .fniv = gen_usra_vec,
4400 .opt_opc = vecop_list_usra,
4402 { .fni4 = gen_usra32_i32,
4403 .fniv = gen_usra_vec,
4405 .opt_opc = vecop_list_usra,
4407 { .fni8 = gen_usra64_i64,
4408 .fniv = gen_usra_vec,
4409 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4411 .opt_opc = vecop_list_usra,
4415 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4417 uint64_t mask = dup_const(MO_8, 0xff >> shift);
4418 TCGv_i64 t = tcg_temp_new_i64();
4420 tcg_gen_shri_i64(t, a, shift);
4421 tcg_gen_andi_i64(t, t, mask);
4422 tcg_gen_andi_i64(d, d, ~mask);
4423 tcg_gen_or_i64(d, d, t);
4424 tcg_temp_free_i64(t);
4427 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4429 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4430 TCGv_i64 t = tcg_temp_new_i64();
4432 tcg_gen_shri_i64(t, a, shift);
4433 tcg_gen_andi_i64(t, t, mask);
4434 tcg_gen_andi_i64(d, d, ~mask);
4435 tcg_gen_or_i64(d, d, t);
4436 tcg_temp_free_i64(t);
4439 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4441 tcg_gen_shri_i32(a, a, shift);
4442 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4445 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4447 tcg_gen_shri_i64(a, a, shift);
4448 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4451 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4454 tcg_gen_mov_vec(d, a);
4456 TCGv_vec t = tcg_temp_new_vec_matching(d);
4457 TCGv_vec m = tcg_temp_new_vec_matching(d);
4459 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4460 tcg_gen_shri_vec(vece, t, a, sh);
4461 tcg_gen_and_vec(vece, d, d, m);
4462 tcg_gen_or_vec(vece, d, d, t);
4464 tcg_temp_free_vec(t);
4465 tcg_temp_free_vec(m);
4469 static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 };
4471 const GVecGen2i sri_op[4] = {
4472 { .fni8 = gen_shr8_ins_i64,
4473 .fniv = gen_shr_ins_vec,
4475 .opt_opc = vecop_list_sri,
4477 { .fni8 = gen_shr16_ins_i64,
4478 .fniv = gen_shr_ins_vec,
4480 .opt_opc = vecop_list_sri,
4482 { .fni4 = gen_shr32_ins_i32,
4483 .fniv = gen_shr_ins_vec,
4485 .opt_opc = vecop_list_sri,
4487 { .fni8 = gen_shr64_ins_i64,
4488 .fniv = gen_shr_ins_vec,
4489 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4491 .opt_opc = vecop_list_sri,
4495 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4497 uint64_t mask = dup_const(MO_8, 0xff << shift);
4498 TCGv_i64 t = tcg_temp_new_i64();
4500 tcg_gen_shli_i64(t, a, shift);
4501 tcg_gen_andi_i64(t, t, mask);
4502 tcg_gen_andi_i64(d, d, ~mask);
4503 tcg_gen_or_i64(d, d, t);
4504 tcg_temp_free_i64(t);
4507 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4509 uint64_t mask = dup_const(MO_16, 0xffff << shift);
4510 TCGv_i64 t = tcg_temp_new_i64();
4512 tcg_gen_shli_i64(t, a, shift);
4513 tcg_gen_andi_i64(t, t, mask);
4514 tcg_gen_andi_i64(d, d, ~mask);
4515 tcg_gen_or_i64(d, d, t);
4516 tcg_temp_free_i64(t);
4519 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4521 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4524 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4526 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4529 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4532 tcg_gen_mov_vec(d, a);
4534 TCGv_vec t = tcg_temp_new_vec_matching(d);
4535 TCGv_vec m = tcg_temp_new_vec_matching(d);
4537 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4538 tcg_gen_shli_vec(vece, t, a, sh);
4539 tcg_gen_and_vec(vece, d, d, m);
4540 tcg_gen_or_vec(vece, d, d, t);
4542 tcg_temp_free_vec(t);
4543 tcg_temp_free_vec(m);
4547 static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 };
4549 const GVecGen2i sli_op[4] = {
4550 { .fni8 = gen_shl8_ins_i64,
4551 .fniv = gen_shl_ins_vec,
4553 .opt_opc = vecop_list_sli,
4555 { .fni8 = gen_shl16_ins_i64,
4556 .fniv = gen_shl_ins_vec,
4558 .opt_opc = vecop_list_sli,
4560 { .fni4 = gen_shl32_ins_i32,
4561 .fniv = gen_shl_ins_vec,
4563 .opt_opc = vecop_list_sli,
4565 { .fni8 = gen_shl64_ins_i64,
4566 .fniv = gen_shl_ins_vec,
4567 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4569 .opt_opc = vecop_list_sli,
4573 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4575 gen_helper_neon_mul_u8(a, a, b);
4576 gen_helper_neon_add_u8(d, d, a);
4579 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4581 gen_helper_neon_mul_u8(a, a, b);
4582 gen_helper_neon_sub_u8(d, d, a);
4585 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4587 gen_helper_neon_mul_u16(a, a, b);
4588 gen_helper_neon_add_u16(d, d, a);
4591 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4593 gen_helper_neon_mul_u16(a, a, b);
4594 gen_helper_neon_sub_u16(d, d, a);
4597 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4599 tcg_gen_mul_i32(a, a, b);
4600 tcg_gen_add_i32(d, d, a);
4603 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4605 tcg_gen_mul_i32(a, a, b);
4606 tcg_gen_sub_i32(d, d, a);
4609 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4611 tcg_gen_mul_i64(a, a, b);
4612 tcg_gen_add_i64(d, d, a);
4615 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4617 tcg_gen_mul_i64(a, a, b);
4618 tcg_gen_sub_i64(d, d, a);
4621 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4623 tcg_gen_mul_vec(vece, a, a, b);
4624 tcg_gen_add_vec(vece, d, d, a);
4627 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4629 tcg_gen_mul_vec(vece, a, a, b);
4630 tcg_gen_sub_vec(vece, d, d, a);
4633 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4634 * these tables are shared with AArch64 which does support them.
4637 static const TCGOpcode vecop_list_mla[] = {
4638 INDEX_op_mul_vec, INDEX_op_add_vec, 0
4641 static const TCGOpcode vecop_list_mls[] = {
4642 INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4645 const GVecGen3 mla_op[4] = {
4646 { .fni4 = gen_mla8_i32,
4647 .fniv = gen_mla_vec,
4649 .opt_opc = vecop_list_mla,
4651 { .fni4 = gen_mla16_i32,
4652 .fniv = gen_mla_vec,
4654 .opt_opc = vecop_list_mla,
4656 { .fni4 = gen_mla32_i32,
4657 .fniv = gen_mla_vec,
4659 .opt_opc = vecop_list_mla,
4661 { .fni8 = gen_mla64_i64,
4662 .fniv = gen_mla_vec,
4663 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4665 .opt_opc = vecop_list_mla,
4669 const GVecGen3 mls_op[4] = {
4670 { .fni4 = gen_mls8_i32,
4671 .fniv = gen_mls_vec,
4673 .opt_opc = vecop_list_mls,
4675 { .fni4 = gen_mls16_i32,
4676 .fniv = gen_mls_vec,
4678 .opt_opc = vecop_list_mls,
4680 { .fni4 = gen_mls32_i32,
4681 .fniv = gen_mls_vec,
4683 .opt_opc = vecop_list_mls,
4685 { .fni8 = gen_mls64_i64,
4686 .fniv = gen_mls_vec,
4687 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4689 .opt_opc = vecop_list_mls,
4693 /* CMTST : test is "if (X & Y != 0)". */
4694 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4696 tcg_gen_and_i32(d, a, b);
4697 tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4698 tcg_gen_neg_i32(d, d);
4701 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4703 tcg_gen_and_i64(d, a, b);
4704 tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4705 tcg_gen_neg_i64(d, d);
4708 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4710 tcg_gen_and_vec(vece, d, a, b);
4711 tcg_gen_dupi_vec(vece, a, 0);
4712 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4715 static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 };
4717 const GVecGen3 cmtst_op[4] = {
4718 { .fni4 = gen_helper_neon_tst_u8,
4719 .fniv = gen_cmtst_vec,
4720 .opt_opc = vecop_list_cmtst,
4722 { .fni4 = gen_helper_neon_tst_u16,
4723 .fniv = gen_cmtst_vec,
4724 .opt_opc = vecop_list_cmtst,
4726 { .fni4 = gen_cmtst_i32,
4727 .fniv = gen_cmtst_vec,
4728 .opt_opc = vecop_list_cmtst,
4730 { .fni8 = gen_cmtst_i64,
4731 .fniv = gen_cmtst_vec,
4732 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4733 .opt_opc = vecop_list_cmtst,
4737 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4738 TCGv_vec a, TCGv_vec b)
4740 TCGv_vec x = tcg_temp_new_vec_matching(t);
4741 tcg_gen_add_vec(vece, x, a, b);
4742 tcg_gen_usadd_vec(vece, t, a, b);
4743 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4744 tcg_gen_or_vec(vece, sat, sat, x);
4745 tcg_temp_free_vec(x);
4748 static const TCGOpcode vecop_list_uqadd[] = {
4749 INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4752 const GVecGen4 uqadd_op[4] = {
4753 { .fniv = gen_uqadd_vec,
4754 .fno = gen_helper_gvec_uqadd_b,
4756 .opt_opc = vecop_list_uqadd,
4758 { .fniv = gen_uqadd_vec,
4759 .fno = gen_helper_gvec_uqadd_h,
4761 .opt_opc = vecop_list_uqadd,
4763 { .fniv = gen_uqadd_vec,
4764 .fno = gen_helper_gvec_uqadd_s,
4766 .opt_opc = vecop_list_uqadd,
4768 { .fniv = gen_uqadd_vec,
4769 .fno = gen_helper_gvec_uqadd_d,
4771 .opt_opc = vecop_list_uqadd,
4775 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4776 TCGv_vec a, TCGv_vec b)
4778 TCGv_vec x = tcg_temp_new_vec_matching(t);
4779 tcg_gen_add_vec(vece, x, a, b);
4780 tcg_gen_ssadd_vec(vece, t, a, b);
4781 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4782 tcg_gen_or_vec(vece, sat, sat, x);
4783 tcg_temp_free_vec(x);
4786 static const TCGOpcode vecop_list_sqadd[] = {
4787 INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4790 const GVecGen4 sqadd_op[4] = {
4791 { .fniv = gen_sqadd_vec,
4792 .fno = gen_helper_gvec_sqadd_b,
4793 .opt_opc = vecop_list_sqadd,
4796 { .fniv = gen_sqadd_vec,
4797 .fno = gen_helper_gvec_sqadd_h,
4798 .opt_opc = vecop_list_sqadd,
4801 { .fniv = gen_sqadd_vec,
4802 .fno = gen_helper_gvec_sqadd_s,
4803 .opt_opc = vecop_list_sqadd,
4806 { .fniv = gen_sqadd_vec,
4807 .fno = gen_helper_gvec_sqadd_d,
4808 .opt_opc = vecop_list_sqadd,
4813 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4814 TCGv_vec a, TCGv_vec b)
4816 TCGv_vec x = tcg_temp_new_vec_matching(t);
4817 tcg_gen_sub_vec(vece, x, a, b);
4818 tcg_gen_ussub_vec(vece, t, a, b);
4819 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4820 tcg_gen_or_vec(vece, sat, sat, x);
4821 tcg_temp_free_vec(x);
4824 static const TCGOpcode vecop_list_uqsub[] = {
4825 INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4828 const GVecGen4 uqsub_op[4] = {
4829 { .fniv = gen_uqsub_vec,
4830 .fno = gen_helper_gvec_uqsub_b,
4831 .opt_opc = vecop_list_uqsub,
4834 { .fniv = gen_uqsub_vec,
4835 .fno = gen_helper_gvec_uqsub_h,
4836 .opt_opc = vecop_list_uqsub,
4839 { .fniv = gen_uqsub_vec,
4840 .fno = gen_helper_gvec_uqsub_s,
4841 .opt_opc = vecop_list_uqsub,
4844 { .fniv = gen_uqsub_vec,
4845 .fno = gen_helper_gvec_uqsub_d,
4846 .opt_opc = vecop_list_uqsub,
4851 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4852 TCGv_vec a, TCGv_vec b)
4854 TCGv_vec x = tcg_temp_new_vec_matching(t);
4855 tcg_gen_sub_vec(vece, x, a, b);
4856 tcg_gen_sssub_vec(vece, t, a, b);
4857 tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4858 tcg_gen_or_vec(vece, sat, sat, x);
4859 tcg_temp_free_vec(x);
4862 static const TCGOpcode vecop_list_sqsub[] = {
4863 INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4866 const GVecGen4 sqsub_op[4] = {
4867 { .fniv = gen_sqsub_vec,
4868 .fno = gen_helper_gvec_sqsub_b,
4869 .opt_opc = vecop_list_sqsub,
4872 { .fniv = gen_sqsub_vec,
4873 .fno = gen_helper_gvec_sqsub_h,
4874 .opt_opc = vecop_list_sqsub,
4877 { .fniv = gen_sqsub_vec,
4878 .fno = gen_helper_gvec_sqsub_s,
4879 .opt_opc = vecop_list_sqsub,
4882 { .fniv = gen_sqsub_vec,
4883 .fno = gen_helper_gvec_sqsub_d,
4884 .opt_opc = vecop_list_sqsub,
4889 /* Translate a NEON data processing instruction. Return nonzero if the
4890 instruction is invalid.
4891 We process data in a mixture of 32-bit and 64-bit chunks.
4892 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
4894 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
4898 int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
4907 TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
4908 TCGv_ptr ptr1, ptr2, ptr3;
4911 /* FIXME: this access check should not take precedence over UNDEF
4912 * for invalid encodings; we will generate incorrect syndrome information
4913 * for attempts to execute invalid vfp/neon encodings with FP disabled.
4915 if (s->fp_excp_el) {
4916 gen_exception_insn(s, 4, EXCP_UDEF,
4917 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
4921 if (!s->vfp_enabled)
4923 q = (insn & (1 << 6)) != 0;
4924 u = (insn >> 24) & 1;
4925 VFP_DREG_D(rd, insn);
4926 VFP_DREG_N(rn, insn);
4927 VFP_DREG_M(rm, insn);
4928 size = (insn >> 20) & 3;
4929 vec_size = q ? 16 : 8;
4930 rd_ofs = neon_reg_offset(rd, 0);
4931 rn_ofs = neon_reg_offset(rn, 0);
4932 rm_ofs = neon_reg_offset(rm, 0);
4934 if ((insn & (1 << 23)) == 0) {
4935 /* Three register same length. */
4936 op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
4937 /* Catch invalid op and bad size combinations: UNDEF */
4938 if ((neon_3r_sizes[op] & (1 << size)) == 0) {
4941 /* All insns of this form UNDEF for either this condition or the
4942 * superset of cases "Q==1"; we catch the latter later.
4944 if (q && ((rd | rn | rm) & 1)) {
4949 /* The SHA-1/SHA-256 3-register instructions require special
4950 * treatment here, as their size field is overloaded as an
4951 * op type selector, and they all consume their input in a
4957 if (!u) { /* SHA-1 */
4958 if (!dc_isar_feature(aa32_sha1, s)) {
4961 ptr1 = vfp_reg_ptr(true, rd);
4962 ptr2 = vfp_reg_ptr(true, rn);
4963 ptr3 = vfp_reg_ptr(true, rm);
4964 tmp4 = tcg_const_i32(size);
4965 gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
4966 tcg_temp_free_i32(tmp4);
4967 } else { /* SHA-256 */
4968 if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
4971 ptr1 = vfp_reg_ptr(true, rd);
4972 ptr2 = vfp_reg_ptr(true, rn);
4973 ptr3 = vfp_reg_ptr(true, rm);
4976 gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
4979 gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
4982 gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
4986 tcg_temp_free_ptr(ptr1);
4987 tcg_temp_free_ptr(ptr2);
4988 tcg_temp_free_ptr(ptr3);
4991 case NEON_3R_VPADD_VQRDMLAH:
4998 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
5001 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
5006 case NEON_3R_VFM_VQRDMLSH:
5017 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
5020 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
5025 case NEON_3R_LOGIC: /* Logic ops. */
5026 switch ((u << 2) | size) {
5028 tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
5029 vec_size, vec_size);
5032 tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
5033 vec_size, vec_size);
5036 tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
5037 vec_size, vec_size);
5040 tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
5041 vec_size, vec_size);
5044 tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
5045 vec_size, vec_size);
5048 tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
5049 vec_size, vec_size);
5052 tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
5053 vec_size, vec_size);
5056 tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
5057 vec_size, vec_size);
5062 case NEON_3R_VADD_VSUB:
5064 tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
5065 vec_size, vec_size);
5067 tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
5068 vec_size, vec_size);
5073 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5074 rn_ofs, rm_ofs, vec_size, vec_size,
5075 (u ? uqadd_op : sqadd_op) + size);
5079 tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5080 rn_ofs, rm_ofs, vec_size, vec_size,
5081 (u ? uqsub_op : sqsub_op) + size);
5084 case NEON_3R_VMUL: /* VMUL */
5086 /* Polynomial case allows only P8 and is handled below. */
5091 tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
5092 vec_size, vec_size);
5097 case NEON_3R_VML: /* VMLA, VMLS */
5098 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
5099 u ? &mls_op[size] : &mla_op[size]);
5102 case NEON_3R_VTST_VCEQ:
5104 tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
5105 vec_size, vec_size);
5107 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
5108 vec_size, vec_size, &cmtst_op[size]);
5113 tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
5114 rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
5118 tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
5119 rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
5124 tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
5125 vec_size, vec_size);
5127 tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
5128 vec_size, vec_size);
5133 tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
5134 vec_size, vec_size);
5136 tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
5137 vec_size, vec_size);
5143 /* 64-bit element instructions. */
5144 for (pass = 0; pass < (q ? 2 : 1); pass++) {
5145 neon_load_reg64(cpu_V0, rn + pass);
5146 neon_load_reg64(cpu_V1, rm + pass);
5150 gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
5152 gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
5157 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5160 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5166 gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
5168 gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
5171 case NEON_3R_VQRSHL:
5173 gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
5176 gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
5183 neon_store_reg64(cpu_V0, rd + pass);
5192 case NEON_3R_VQRSHL:
5195 /* Shift instruction operands are reversed. */
5201 case NEON_3R_VPADD_VQRDMLAH:
5206 case NEON_3R_FLOAT_ARITH:
5207 pairwise = (u && size < 2); /* if VPADD (float) */
5209 case NEON_3R_FLOAT_MINMAX:
5210 pairwise = u; /* if VPMIN/VPMAX (float) */
5212 case NEON_3R_FLOAT_CMP:
5214 /* no encoding for U=0 C=1x */
5218 case NEON_3R_FLOAT_ACMP:
5223 case NEON_3R_FLOAT_MISC:
5224 /* VMAXNM/VMINNM in ARMv8 */
5225 if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5229 case NEON_3R_VFM_VQRDMLSH:
5230 if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
5238 if (pairwise && q) {
5239 /* All the pairwise insns UNDEF if Q is set */
5243 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5248 tmp = neon_load_reg(rn, 0);
5249 tmp2 = neon_load_reg(rn, 1);
5251 tmp = neon_load_reg(rm, 0);
5252 tmp2 = neon_load_reg(rm, 1);
5256 tmp = neon_load_reg(rn, pass);
5257 tmp2 = neon_load_reg(rm, pass);
5261 GEN_NEON_INTEGER_OP(hadd);
5263 case NEON_3R_VRHADD:
5264 GEN_NEON_INTEGER_OP(rhadd);
5267 GEN_NEON_INTEGER_OP(hsub);
5270 GEN_NEON_INTEGER_OP(shl);
5273 GEN_NEON_INTEGER_OP_ENV(qshl);
5276 GEN_NEON_INTEGER_OP(rshl);
5278 case NEON_3R_VQRSHL:
5279 GEN_NEON_INTEGER_OP_ENV(qrshl);
5282 GEN_NEON_INTEGER_OP(abd);
5285 GEN_NEON_INTEGER_OP(abd);
5286 tcg_temp_free_i32(tmp2);
5287 tmp2 = neon_load_reg(rd, pass);
5288 gen_neon_add(size, tmp, tmp2);
5291 /* VMUL.P8; other cases already eliminated. */
5292 gen_helper_neon_mul_p8(tmp, tmp, tmp2);
5295 GEN_NEON_INTEGER_OP(pmax);
5298 GEN_NEON_INTEGER_OP(pmin);
5300 case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
5301 if (!u) { /* VQDMULH */
5304 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5307 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5311 } else { /* VQRDMULH */
5314 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5317 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5323 case NEON_3R_VPADD_VQRDMLAH:
5325 case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
5326 case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
5327 case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
5331 case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
5333 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5334 switch ((u << 2) | size) {
5337 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5340 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
5343 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
5348 tcg_temp_free_ptr(fpstatus);
5351 case NEON_3R_FLOAT_MULTIPLY:
5353 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5354 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5356 tcg_temp_free_i32(tmp2);
5357 tmp2 = neon_load_reg(rd, pass);
5359 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5361 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5364 tcg_temp_free_ptr(fpstatus);
5367 case NEON_3R_FLOAT_CMP:
5369 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5371 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5374 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5376 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5379 tcg_temp_free_ptr(fpstatus);
5382 case NEON_3R_FLOAT_ACMP:
5384 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5386 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5388 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5390 tcg_temp_free_ptr(fpstatus);
5393 case NEON_3R_FLOAT_MINMAX:
5395 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5397 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5399 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5401 tcg_temp_free_ptr(fpstatus);
5404 case NEON_3R_FLOAT_MISC:
5407 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5409 gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5411 gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5413 tcg_temp_free_ptr(fpstatus);
5416 gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5418 gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5422 case NEON_3R_VFM_VQRDMLSH:
5424 /* VFMA, VFMS: fused multiply-add */
5425 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5426 TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5429 gen_helper_vfp_negs(tmp, tmp);
5431 gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5432 tcg_temp_free_i32(tmp3);
5433 tcg_temp_free_ptr(fpstatus);
5439 tcg_temp_free_i32(tmp2);
5441 /* Save the result. For elementwise operations we can put it
5442 straight into the destination register. For pairwise operations
5443 we have to be careful to avoid clobbering the source operands. */
5444 if (pairwise && rd == rm) {
5445 neon_store_scratch(pass, tmp);
5447 neon_store_reg(rd, pass, tmp);
5451 if (pairwise && rd == rm) {
5452 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5453 tmp = neon_load_scratch(pass);
5454 neon_store_reg(rd, pass, tmp);
5457 /* End of 3 register same size operations. */
5458 } else if (insn & (1 << 4)) {
5459 if ((insn & 0x00380080) != 0) {
5460 /* Two registers and shift. */
5461 op = (insn >> 8) & 0xf;
5462 if (insn & (1 << 7)) {
5470 while ((insn & (1 << (size + 19))) == 0)
5473 shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5475 /* Shift by immediate:
5476 VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
5477 if (q && ((rd | rm) & 1)) {
5480 if (!u && (op == 4 || op == 6)) {
5483 /* Right shifts are encoded as N - shift, where N is the
5484 element size in bits. */
5486 shift = shift - (1 << (size + 3));
5491 /* Right shift comes here negative. */
5493 /* Shifts larger than the element size are architecturally
5494 * valid. Unsigned results in all zeros; signed results
5498 tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5499 MIN(shift, (8 << size) - 1),
5500 vec_size, vec_size);
5501 } else if (shift >= 8 << size) {
5502 tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
5504 tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5505 vec_size, vec_size);
5510 /* Right shift comes here negative. */
5512 /* Shifts larger than the element size are architecturally
5513 * valid. Unsigned results in all zeros; signed results
5517 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5518 MIN(shift, (8 << size) - 1),
5520 } else if (shift >= 8 << size) {
5523 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5524 shift, &usra_op[size]);
5532 /* Right shift comes here negative. */
5534 /* Shift out of range leaves destination unchanged. */
5535 if (shift < 8 << size) {
5536 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5537 shift, &sri_op[size]);
5541 case 5: /* VSHL, VSLI */
5543 /* Shift out of range leaves destination unchanged. */
5544 if (shift < 8 << size) {
5545 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
5546 vec_size, shift, &sli_op[size]);
5549 /* Shifts larger than the element size are
5550 * architecturally valid and results in zero.
5552 if (shift >= 8 << size) {
5553 tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
5555 tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5556 vec_size, vec_size);
5568 /* To avoid excessive duplication of ops we implement shift
5569 * by immediate using the variable shift operations.
5571 imm = dup_const(size, shift);
5573 for (pass = 0; pass < count; pass++) {
5575 neon_load_reg64(cpu_V0, rm + pass);
5576 tcg_gen_movi_i64(cpu_V1, imm);
5581 gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5583 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5585 case 6: /* VQSHLU */
5586 gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5591 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5594 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5599 g_assert_not_reached();
5603 neon_load_reg64(cpu_V1, rd + pass);
5604 tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5606 neon_store_reg64(cpu_V0, rd + pass);
5607 } else { /* size < 3 */
5608 /* Operands in T0 and T1. */
5609 tmp = neon_load_reg(rm, pass);
5610 tmp2 = tcg_temp_new_i32();
5611 tcg_gen_movi_i32(tmp2, imm);
5615 GEN_NEON_INTEGER_OP(rshl);
5617 case 6: /* VQSHLU */
5620 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5624 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5628 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5636 GEN_NEON_INTEGER_OP_ENV(qshl);
5639 g_assert_not_reached();
5641 tcg_temp_free_i32(tmp2);
5645 tmp2 = neon_load_reg(rd, pass);
5646 gen_neon_add(size, tmp, tmp2);
5647 tcg_temp_free_i32(tmp2);
5649 neon_store_reg(rd, pass, tmp);
5652 } else if (op < 10) {
5653 /* Shift by immediate and narrow:
5654 VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
5655 int input_unsigned = (op == 8) ? !u : u;
5659 shift = shift - (1 << (size + 3));
5662 tmp64 = tcg_const_i64(shift);
5663 neon_load_reg64(cpu_V0, rm);
5664 neon_load_reg64(cpu_V1, rm + 1);
5665 for (pass = 0; pass < 2; pass++) {
5673 if (input_unsigned) {
5674 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5676 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5679 if (input_unsigned) {
5680 gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5682 gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5685 tmp = tcg_temp_new_i32();
5686 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5687 neon_store_reg(rd, pass, tmp);
5689 tcg_temp_free_i64(tmp64);
5692 imm = (uint16_t)shift;
5696 imm = (uint32_t)shift;
5698 tmp2 = tcg_const_i32(imm);
5699 tmp4 = neon_load_reg(rm + 1, 0);
5700 tmp5 = neon_load_reg(rm + 1, 1);
5701 for (pass = 0; pass < 2; pass++) {
5703 tmp = neon_load_reg(rm, 0);
5707 gen_neon_shift_narrow(size, tmp, tmp2, q,
5710 tmp3 = neon_load_reg(rm, 1);
5714 gen_neon_shift_narrow(size, tmp3, tmp2, q,
5716 tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5717 tcg_temp_free_i32(tmp);
5718 tcg_temp_free_i32(tmp3);
5719 tmp = tcg_temp_new_i32();
5720 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5721 neon_store_reg(rd, pass, tmp);
5723 tcg_temp_free_i32(tmp2);
5725 } else if (op == 10) {
5727 if (q || (rd & 1)) {
5730 tmp = neon_load_reg(rm, 0);
5731 tmp2 = neon_load_reg(rm, 1);
5732 for (pass = 0; pass < 2; pass++) {
5736 gen_neon_widen(cpu_V0, tmp, size, u);
5739 /* The shift is less than the width of the source
5740 type, so we can just shift the whole register. */
5741 tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5742 /* Widen the result of shift: we need to clear
5743 * the potential overflow bits resulting from
5744 * left bits of the narrow input appearing as
5745 * right bits of left the neighbour narrow
5747 if (size < 2 || !u) {
5750 imm = (0xffu >> (8 - shift));
5752 } else if (size == 1) {
5753 imm = 0xffff >> (16 - shift);
5756 imm = 0xffffffff >> (32 - shift);
5759 imm64 = imm | (((uint64_t)imm) << 32);
5763 tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5766 neon_store_reg64(cpu_V0, rd + pass);
5768 } else if (op >= 14) {
5769 /* VCVT fixed-point. */
5770 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5773 /* We have already masked out the must-be-1 top bit of imm6,
5774 * hence this 32-shift where the ARM ARM has 64-imm6.
5777 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5778 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
5781 gen_vfp_ulto(0, shift, 1);
5783 gen_vfp_slto(0, shift, 1);
5786 gen_vfp_toul(0, shift, 1);
5788 gen_vfp_tosl(0, shift, 1);
5790 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
5795 } else { /* (insn & 0x00380080) == 0 */
5796 int invert, reg_ofs, vec_size;
5798 if (q && (rd & 1)) {
5802 op = (insn >> 8) & 0xf;
5803 /* One register and immediate. */
5804 imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5805 invert = (insn & (1 << 5)) != 0;
5806 /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5807 * We choose to not special-case this and will behave as if a
5808 * valid constant encoding of 0 had been given.
5827 imm = (imm << 8) | (imm << 24);
5830 imm = (imm << 8) | 0xff;
5833 imm = (imm << 16) | 0xffff;
5836 imm |= (imm << 8) | (imm << 16) | (imm << 24);
5845 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5846 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5853 reg_ofs = neon_reg_offset(rd, 0);
5854 vec_size = q ? 16 : 8;
5856 if (op & 1 && op < 12) {
5858 /* The immediate value has already been inverted,
5859 * so BIC becomes AND.
5861 tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
5862 vec_size, vec_size);
5864 tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
5865 vec_size, vec_size);
5869 if (op == 14 && invert) {
5870 TCGv_i64 t64 = tcg_temp_new_i64();
5872 for (pass = 0; pass <= q; ++pass) {
5876 for (n = 0; n < 8; n++) {
5877 if (imm & (1 << (n + pass * 8))) {
5878 val |= 0xffull << (n * 8);
5881 tcg_gen_movi_i64(t64, val);
5882 neon_store_reg64(t64, rd + pass);
5884 tcg_temp_free_i64(t64);
5886 tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
5890 } else { /* (insn & 0x00800010 == 0x00800000) */
5892 op = (insn >> 8) & 0xf;
5893 if ((insn & (1 << 6)) == 0) {
5894 /* Three registers of different lengths. */
5898 /* undefreq: bit 0 : UNDEF if size == 0
5899 * bit 1 : UNDEF if size == 1
5900 * bit 2 : UNDEF if size == 2
5901 * bit 3 : UNDEF if U == 1
5902 * Note that [2:0] set implies 'always UNDEF'
5905 /* prewiden, src1_wide, src2_wide, undefreq */
5906 static const int neon_3reg_wide[16][4] = {
5907 {1, 0, 0, 0}, /* VADDL */
5908 {1, 1, 0, 0}, /* VADDW */
5909 {1, 0, 0, 0}, /* VSUBL */
5910 {1, 1, 0, 0}, /* VSUBW */
5911 {0, 1, 1, 0}, /* VADDHN */
5912 {0, 0, 0, 0}, /* VABAL */
5913 {0, 1, 1, 0}, /* VSUBHN */
5914 {0, 0, 0, 0}, /* VABDL */
5915 {0, 0, 0, 0}, /* VMLAL */
5916 {0, 0, 0, 9}, /* VQDMLAL */
5917 {0, 0, 0, 0}, /* VMLSL */
5918 {0, 0, 0, 9}, /* VQDMLSL */
5919 {0, 0, 0, 0}, /* Integer VMULL */
5920 {0, 0, 0, 1}, /* VQDMULL */
5921 {0, 0, 0, 0xa}, /* Polynomial VMULL */
5922 {0, 0, 0, 7}, /* Reserved: always UNDEF */
5925 prewiden = neon_3reg_wide[op][0];
5926 src1_wide = neon_3reg_wide[op][1];
5927 src2_wide = neon_3reg_wide[op][2];
5928 undefreq = neon_3reg_wide[op][3];
5930 if ((undefreq & (1 << size)) ||
5931 ((undefreq & 8) && u)) {
5934 if ((src1_wide && (rn & 1)) ||
5935 (src2_wide && (rm & 1)) ||
5936 (!src2_wide && (rd & 1))) {
5940 /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
5941 * outside the loop below as it only performs a single pass.
5943 if (op == 14 && size == 2) {
5944 TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
5946 if (!dc_isar_feature(aa32_pmull, s)) {
5949 tcg_rn = tcg_temp_new_i64();
5950 tcg_rm = tcg_temp_new_i64();
5951 tcg_rd = tcg_temp_new_i64();
5952 neon_load_reg64(tcg_rn, rn);
5953 neon_load_reg64(tcg_rm, rm);
5954 gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
5955 neon_store_reg64(tcg_rd, rd);
5956 gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
5957 neon_store_reg64(tcg_rd, rd + 1);
5958 tcg_temp_free_i64(tcg_rn);
5959 tcg_temp_free_i64(tcg_rm);
5960 tcg_temp_free_i64(tcg_rd);
5964 /* Avoid overlapping operands. Wide source operands are
5965 always aligned so will never overlap with wide
5966 destinations in problematic ways. */
5967 if (rd == rm && !src2_wide) {
5968 tmp = neon_load_reg(rm, 1);
5969 neon_store_scratch(2, tmp);
5970 } else if (rd == rn && !src1_wide) {
5971 tmp = neon_load_reg(rn, 1);
5972 neon_store_scratch(2, tmp);
5975 for (pass = 0; pass < 2; pass++) {
5977 neon_load_reg64(cpu_V0, rn + pass);
5980 if (pass == 1 && rd == rn) {
5981 tmp = neon_load_scratch(2);
5983 tmp = neon_load_reg(rn, pass);
5986 gen_neon_widen(cpu_V0, tmp, size, u);
5990 neon_load_reg64(cpu_V1, rm + pass);
5993 if (pass == 1 && rd == rm) {
5994 tmp2 = neon_load_scratch(2);
5996 tmp2 = neon_load_reg(rm, pass);
5999 gen_neon_widen(cpu_V1, tmp2, size, u);
6003 case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6004 gen_neon_addl(size);
6006 case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6007 gen_neon_subl(size);
6009 case 5: case 7: /* VABAL, VABDL */
6010 switch ((size << 1) | u) {
6012 gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6015 gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6018 gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6021 gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6024 gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6027 gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6031 tcg_temp_free_i32(tmp2);
6032 tcg_temp_free_i32(tmp);
6034 case 8: case 9: case 10: case 11: case 12: case 13:
6035 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6036 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6038 case 14: /* Polynomial VMULL */
6039 gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
6040 tcg_temp_free_i32(tmp2);
6041 tcg_temp_free_i32(tmp);
6043 default: /* 15 is RESERVED: caught earlier */
6048 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6049 neon_store_reg64(cpu_V0, rd + pass);
6050 } else if (op == 5 || (op >= 8 && op <= 11)) {
6052 neon_load_reg64(cpu_V1, rd + pass);
6054 case 10: /* VMLSL */
6055 gen_neon_negl(cpu_V0, size);
6057 case 5: case 8: /* VABAL, VMLAL */
6058 gen_neon_addl(size);
6060 case 9: case 11: /* VQDMLAL, VQDMLSL */
6061 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6063 gen_neon_negl(cpu_V0, size);
6065 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6070 neon_store_reg64(cpu_V0, rd + pass);
6071 } else if (op == 4 || op == 6) {
6072 /* Narrowing operation. */
6073 tmp = tcg_temp_new_i32();
6077 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6080 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6083 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6084 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
6091 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6094 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6097 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6098 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6099 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
6107 neon_store_reg(rd, 0, tmp3);
6108 neon_store_reg(rd, 1, tmp);
6111 /* Write back the result. */
6112 neon_store_reg64(cpu_V0, rd + pass);
6116 /* Two registers and a scalar. NB that for ops of this form
6117 * the ARM ARM labels bit 24 as Q, but it is in our variable
6124 case 1: /* Float VMLA scalar */
6125 case 5: /* Floating point VMLS scalar */
6126 case 9: /* Floating point VMUL scalar */
6131 case 0: /* Integer VMLA scalar */
6132 case 4: /* Integer VMLS scalar */
6133 case 8: /* Integer VMUL scalar */
6134 case 12: /* VQDMULH scalar */
6135 case 13: /* VQRDMULH scalar */
6136 if (u && ((rd | rn) & 1)) {
6139 tmp = neon_get_scalar(size, rm);
6140 neon_store_scratch(0, tmp);
6141 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6142 tmp = neon_load_scratch(0);
6143 tmp2 = neon_load_reg(rn, pass);
6146 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6148 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6150 } else if (op == 13) {
6152 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6154 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6156 } else if (op & 1) {
6157 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6158 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6159 tcg_temp_free_ptr(fpstatus);
6162 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6163 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6164 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6168 tcg_temp_free_i32(tmp2);
6171 tmp2 = neon_load_reg(rd, pass);
6174 gen_neon_add(size, tmp, tmp2);
6178 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6179 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6180 tcg_temp_free_ptr(fpstatus);
6184 gen_neon_rsb(size, tmp, tmp2);
6188 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6189 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6190 tcg_temp_free_ptr(fpstatus);
6196 tcg_temp_free_i32(tmp2);
6198 neon_store_reg(rd, pass, tmp);
6201 case 3: /* VQDMLAL scalar */
6202 case 7: /* VQDMLSL scalar */
6203 case 11: /* VQDMULL scalar */
6208 case 2: /* VMLAL sclar */
6209 case 6: /* VMLSL scalar */
6210 case 10: /* VMULL scalar */
6214 tmp2 = neon_get_scalar(size, rm);
6215 /* We need a copy of tmp2 because gen_neon_mull
6216 * deletes it during pass 0. */
6217 tmp4 = tcg_temp_new_i32();
6218 tcg_gen_mov_i32(tmp4, tmp2);
6219 tmp3 = neon_load_reg(rn, 1);
6221 for (pass = 0; pass < 2; pass++) {
6223 tmp = neon_load_reg(rn, 0);
6228 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6230 neon_load_reg64(cpu_V1, rd + pass);
6234 gen_neon_negl(cpu_V0, size);
6237 gen_neon_addl(size);
6240 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6242 gen_neon_negl(cpu_V0, size);
6244 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6250 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6255 neon_store_reg64(cpu_V0, rd + pass);
6258 case 14: /* VQRDMLAH scalar */
6259 case 15: /* VQRDMLSH scalar */
6261 NeonGenThreeOpEnvFn *fn;
6263 if (!dc_isar_feature(aa32_rdm, s)) {
6266 if (u && ((rd | rn) & 1)) {
6271 fn = gen_helper_neon_qrdmlah_s16;
6273 fn = gen_helper_neon_qrdmlah_s32;
6277 fn = gen_helper_neon_qrdmlsh_s16;
6279 fn = gen_helper_neon_qrdmlsh_s32;
6283 tmp2 = neon_get_scalar(size, rm);
6284 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6285 tmp = neon_load_reg(rn, pass);
6286 tmp3 = neon_load_reg(rd, pass);
6287 fn(tmp, cpu_env, tmp, tmp2, tmp3);
6288 tcg_temp_free_i32(tmp3);
6289 neon_store_reg(rd, pass, tmp);
6291 tcg_temp_free_i32(tmp2);
6295 g_assert_not_reached();
6298 } else { /* size == 3 */
6301 imm = (insn >> 8) & 0xf;
6306 if (q && ((rd | rn | rm) & 1)) {
6311 neon_load_reg64(cpu_V0, rn);
6313 neon_load_reg64(cpu_V1, rn + 1);
6315 } else if (imm == 8) {
6316 neon_load_reg64(cpu_V0, rn + 1);
6318 neon_load_reg64(cpu_V1, rm);
6321 tmp64 = tcg_temp_new_i64();
6323 neon_load_reg64(cpu_V0, rn);
6324 neon_load_reg64(tmp64, rn + 1);
6326 neon_load_reg64(cpu_V0, rn + 1);
6327 neon_load_reg64(tmp64, rm);
6329 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6330 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6331 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6333 neon_load_reg64(cpu_V1, rm);
6335 neon_load_reg64(cpu_V1, rm + 1);
6338 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6339 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6340 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6341 tcg_temp_free_i64(tmp64);
6344 neon_load_reg64(cpu_V0, rn);
6345 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6346 neon_load_reg64(cpu_V1, rm);
6347 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6348 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6350 neon_store_reg64(cpu_V0, rd);
6352 neon_store_reg64(cpu_V1, rd + 1);
6354 } else if ((insn & (1 << 11)) == 0) {
6355 /* Two register misc. */
6356 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6357 size = (insn >> 18) & 3;
6358 /* UNDEF for unknown op values and bad op-size combinations */
6359 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6362 if (neon_2rm_is_v8_op(op) &&
6363 !arm_dc_feature(s, ARM_FEATURE_V8)) {
6366 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6367 q && ((rm | rd) & 1)) {
6371 case NEON_2RM_VREV64:
6372 for (pass = 0; pass < (q ? 2 : 1); pass++) {
6373 tmp = neon_load_reg(rm, pass * 2);
6374 tmp2 = neon_load_reg(rm, pass * 2 + 1);
6376 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6377 case 1: gen_swap_half(tmp); break;
6378 case 2: /* no-op */ break;
6381 neon_store_reg(rd, pass * 2 + 1, tmp);
6383 neon_store_reg(rd, pass * 2, tmp2);
6386 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6387 case 1: gen_swap_half(tmp2); break;
6390 neon_store_reg(rd, pass * 2, tmp2);
6394 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6395 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6396 for (pass = 0; pass < q + 1; pass++) {
6397 tmp = neon_load_reg(rm, pass * 2);
6398 gen_neon_widen(cpu_V0, tmp, size, op & 1);
6399 tmp = neon_load_reg(rm, pass * 2 + 1);
6400 gen_neon_widen(cpu_V1, tmp, size, op & 1);
6402 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6403 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6404 case 2: tcg_gen_add_i64(CPU_V001); break;
6407 if (op >= NEON_2RM_VPADAL) {
6409 neon_load_reg64(cpu_V1, rd + pass);
6410 gen_neon_addl(size);
6412 neon_store_reg64(cpu_V0, rd + pass);
6418 for (n = 0; n < (q ? 4 : 2); n += 2) {
6419 tmp = neon_load_reg(rm, n);
6420 tmp2 = neon_load_reg(rd, n + 1);
6421 neon_store_reg(rm, n, tmp2);
6422 neon_store_reg(rd, n + 1, tmp);
6429 if (gen_neon_unzip(rd, rm, size, q)) {
6434 if (gen_neon_zip(rd, rm, size, q)) {
6438 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6439 /* also VQMOVUN; op field and mnemonics don't line up */
6444 for (pass = 0; pass < 2; pass++) {
6445 neon_load_reg64(cpu_V0, rm + pass);
6446 tmp = tcg_temp_new_i32();
6447 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6452 neon_store_reg(rd, 0, tmp2);
6453 neon_store_reg(rd, 1, tmp);
6457 case NEON_2RM_VSHLL:
6458 if (q || (rd & 1)) {
6461 tmp = neon_load_reg(rm, 0);
6462 tmp2 = neon_load_reg(rm, 1);
6463 for (pass = 0; pass < 2; pass++) {
6466 gen_neon_widen(cpu_V0, tmp, size, 1);
6467 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6468 neon_store_reg64(cpu_V0, rd + pass);
6471 case NEON_2RM_VCVT_F16_F32:
6476 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6480 tmp = tcg_temp_new_i32();
6481 tmp2 = tcg_temp_new_i32();
6482 fpst = get_fpstatus_ptr(true);
6483 ahp = get_ahp_flag();
6484 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
6485 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
6486 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
6487 gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
6488 tcg_gen_shli_i32(tmp2, tmp2, 16);
6489 tcg_gen_or_i32(tmp2, tmp2, tmp);
6490 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
6491 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
6492 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
6493 neon_store_reg(rd, 0, tmp2);
6494 tmp2 = tcg_temp_new_i32();
6495 gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
6496 tcg_gen_shli_i32(tmp2, tmp2, 16);
6497 tcg_gen_or_i32(tmp2, tmp2, tmp);
6498 neon_store_reg(rd, 1, tmp2);
6499 tcg_temp_free_i32(tmp);
6500 tcg_temp_free_i32(ahp);
6501 tcg_temp_free_ptr(fpst);
6504 case NEON_2RM_VCVT_F32_F16:
6508 if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6512 fpst = get_fpstatus_ptr(true);
6513 ahp = get_ahp_flag();
6514 tmp3 = tcg_temp_new_i32();
6515 tmp = neon_load_reg(rm, 0);
6516 tmp2 = neon_load_reg(rm, 1);
6517 tcg_gen_ext16u_i32(tmp3, tmp);
6518 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
6519 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
6520 tcg_gen_shri_i32(tmp3, tmp, 16);
6521 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
6522 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
6523 tcg_temp_free_i32(tmp);
6524 tcg_gen_ext16u_i32(tmp3, tmp2);
6525 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
6526 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
6527 tcg_gen_shri_i32(tmp3, tmp2, 16);
6528 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
6529 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
6530 tcg_temp_free_i32(tmp2);
6531 tcg_temp_free_i32(tmp3);
6532 tcg_temp_free_i32(ahp);
6533 tcg_temp_free_ptr(fpst);
6536 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6537 if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6540 ptr1 = vfp_reg_ptr(true, rd);
6541 ptr2 = vfp_reg_ptr(true, rm);
6543 /* Bit 6 is the lowest opcode bit; it distinguishes between
6544 * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6546 tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6548 if (op == NEON_2RM_AESE) {
6549 gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6551 gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6553 tcg_temp_free_ptr(ptr1);
6554 tcg_temp_free_ptr(ptr2);
6555 tcg_temp_free_i32(tmp3);
6557 case NEON_2RM_SHA1H:
6558 if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6561 ptr1 = vfp_reg_ptr(true, rd);
6562 ptr2 = vfp_reg_ptr(true, rm);
6564 gen_helper_crypto_sha1h(ptr1, ptr2);
6566 tcg_temp_free_ptr(ptr1);
6567 tcg_temp_free_ptr(ptr2);
6569 case NEON_2RM_SHA1SU1:
6570 if ((rm | rd) & 1) {
6573 /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6575 if (!dc_isar_feature(aa32_sha2, s)) {
6578 } else if (!dc_isar_feature(aa32_sha1, s)) {
6581 ptr1 = vfp_reg_ptr(true, rd);
6582 ptr2 = vfp_reg_ptr(true, rm);
6584 gen_helper_crypto_sha256su0(ptr1, ptr2);
6586 gen_helper_crypto_sha1su1(ptr1, ptr2);
6588 tcg_temp_free_ptr(ptr1);
6589 tcg_temp_free_ptr(ptr2);
6593 tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6596 tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6599 tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6604 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6605 if (neon_2rm_is_float_op(op)) {
6606 tcg_gen_ld_f32(cpu_F0s, cpu_env,
6607 neon_reg_offset(rm, pass));
6610 tmp = neon_load_reg(rm, pass);
6613 case NEON_2RM_VREV32:
6615 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6616 case 1: gen_swap_half(tmp); break;
6620 case NEON_2RM_VREV16:
6625 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6626 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6627 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6633 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6634 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6635 case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6640 gen_helper_neon_cnt_u8(tmp, tmp);
6642 case NEON_2RM_VQABS:
6645 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6648 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6651 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6656 case NEON_2RM_VQNEG:
6659 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6662 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6665 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6670 case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
6671 tmp2 = tcg_const_i32(0);
6673 case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
6674 case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
6675 case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
6678 tcg_temp_free_i32(tmp2);
6679 if (op == NEON_2RM_VCLE0) {
6680 tcg_gen_not_i32(tmp, tmp);
6683 case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
6684 tmp2 = tcg_const_i32(0);
6686 case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
6687 case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
6688 case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
6691 tcg_temp_free_i32(tmp2);
6692 if (op == NEON_2RM_VCLT0) {
6693 tcg_gen_not_i32(tmp, tmp);
6696 case NEON_2RM_VCEQ0:
6697 tmp2 = tcg_const_i32(0);
6699 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
6700 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
6701 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
6704 tcg_temp_free_i32(tmp2);
6706 case NEON_2RM_VCGT0_F:
6708 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6709 tmp2 = tcg_const_i32(0);
6710 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6711 tcg_temp_free_i32(tmp2);
6712 tcg_temp_free_ptr(fpstatus);
6715 case NEON_2RM_VCGE0_F:
6717 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6718 tmp2 = tcg_const_i32(0);
6719 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6720 tcg_temp_free_i32(tmp2);
6721 tcg_temp_free_ptr(fpstatus);
6724 case NEON_2RM_VCEQ0_F:
6726 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6727 tmp2 = tcg_const_i32(0);
6728 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6729 tcg_temp_free_i32(tmp2);
6730 tcg_temp_free_ptr(fpstatus);
6733 case NEON_2RM_VCLE0_F:
6735 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6736 tmp2 = tcg_const_i32(0);
6737 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6738 tcg_temp_free_i32(tmp2);
6739 tcg_temp_free_ptr(fpstatus);
6742 case NEON_2RM_VCLT0_F:
6744 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6745 tmp2 = tcg_const_i32(0);
6746 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6747 tcg_temp_free_i32(tmp2);
6748 tcg_temp_free_ptr(fpstatus);
6751 case NEON_2RM_VABS_F:
6752 gen_helper_vfp_abss(tmp, tmp);
6754 case NEON_2RM_VNEG_F:
6755 gen_helper_vfp_negs(tmp, tmp);
6758 tmp2 = neon_load_reg(rd, pass);
6759 neon_store_reg(rm, pass, tmp2);
6762 tmp2 = neon_load_reg(rd, pass);
6764 case 0: gen_neon_trn_u8(tmp, tmp2); break;
6765 case 1: gen_neon_trn_u16(tmp, tmp2); break;
6768 neon_store_reg(rm, pass, tmp2);
6770 case NEON_2RM_VRINTN:
6771 case NEON_2RM_VRINTA:
6772 case NEON_2RM_VRINTM:
6773 case NEON_2RM_VRINTP:
6774 case NEON_2RM_VRINTZ:
6777 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6780 if (op == NEON_2RM_VRINTZ) {
6781 rmode = FPROUNDING_ZERO;
6783 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6786 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6787 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6789 gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
6790 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6792 tcg_temp_free_ptr(fpstatus);
6793 tcg_temp_free_i32(tcg_rmode);
6796 case NEON_2RM_VRINTX:
6798 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6799 gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
6800 tcg_temp_free_ptr(fpstatus);
6803 case NEON_2RM_VCVTAU:
6804 case NEON_2RM_VCVTAS:
6805 case NEON_2RM_VCVTNU:
6806 case NEON_2RM_VCVTNS:
6807 case NEON_2RM_VCVTPU:
6808 case NEON_2RM_VCVTPS:
6809 case NEON_2RM_VCVTMU:
6810 case NEON_2RM_VCVTMS:
6812 bool is_signed = !extract32(insn, 7, 1);
6813 TCGv_ptr fpst = get_fpstatus_ptr(1);
6814 TCGv_i32 tcg_rmode, tcg_shift;
6815 int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6817 tcg_shift = tcg_const_i32(0);
6818 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6819 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6823 gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
6826 gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
6830 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6832 tcg_temp_free_i32(tcg_rmode);
6833 tcg_temp_free_i32(tcg_shift);
6834 tcg_temp_free_ptr(fpst);
6837 case NEON_2RM_VRECPE:
6839 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6840 gen_helper_recpe_u32(tmp, tmp, fpstatus);
6841 tcg_temp_free_ptr(fpstatus);
6844 case NEON_2RM_VRSQRTE:
6846 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6847 gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
6848 tcg_temp_free_ptr(fpstatus);
6851 case NEON_2RM_VRECPE_F:
6853 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6854 gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
6855 tcg_temp_free_ptr(fpstatus);
6858 case NEON_2RM_VRSQRTE_F:
6860 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6861 gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
6862 tcg_temp_free_ptr(fpstatus);
6865 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6868 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6871 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6872 gen_vfp_tosiz(0, 1);
6874 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6875 gen_vfp_touiz(0, 1);
6878 /* Reserved op values were caught by the
6879 * neon_2rm_sizes[] check earlier.
6883 if (neon_2rm_is_float_op(op)) {
6884 tcg_gen_st_f32(cpu_F0s, cpu_env,
6885 neon_reg_offset(rd, pass));
6887 neon_store_reg(rd, pass, tmp);
6892 } else if ((insn & (1 << 10)) == 0) {
6894 int n = ((insn >> 8) & 3) + 1;
6895 if ((rn + n) > 32) {
6896 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6897 * helper function running off the end of the register file.
6902 if (insn & (1 << 6)) {
6903 tmp = neon_load_reg(rd, 0);
6905 tmp = tcg_temp_new_i32();
6906 tcg_gen_movi_i32(tmp, 0);
6908 tmp2 = neon_load_reg(rm, 0);
6909 ptr1 = vfp_reg_ptr(true, rn);
6910 tmp5 = tcg_const_i32(n);
6911 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6912 tcg_temp_free_i32(tmp);
6913 if (insn & (1 << 6)) {
6914 tmp = neon_load_reg(rd, 1);
6916 tmp = tcg_temp_new_i32();
6917 tcg_gen_movi_i32(tmp, 0);
6919 tmp3 = neon_load_reg(rm, 1);
6920 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6921 tcg_temp_free_i32(tmp5);
6922 tcg_temp_free_ptr(ptr1);
6923 neon_store_reg(rd, 0, tmp2);
6924 neon_store_reg(rd, 1, tmp3);
6925 tcg_temp_free_i32(tmp);
6926 } else if ((insn & 0x380) == 0) {
6931 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6934 if (insn & (1 << 16)) {
6936 element = (insn >> 17) & 7;
6937 } else if (insn & (1 << 17)) {
6939 element = (insn >> 18) & 3;
6942 element = (insn >> 19) & 1;
6944 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6945 neon_element_offset(rm, element, size),
6946 q ? 16 : 8, q ? 16 : 8);
6955 /* Advanced SIMD three registers of the same length extension.
6956 * 31 25 23 22 20 16 12 11 10 9 8 3 0
6957 * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
6958 * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
6959 * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
6961 static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
6963 gen_helper_gvec_3 *fn_gvec = NULL;
6964 gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
6965 int rd, rn, rm, opr_sz;
6968 bool is_long = false, q = extract32(insn, 6, 1);
6969 bool ptr_is_env = false;
6971 if ((insn & 0xfe200f10) == 0xfc200800) {
6972 /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
6973 int size = extract32(insn, 20, 1);
6974 data = extract32(insn, 23, 2); /* rot */
6975 if (!dc_isar_feature(aa32_vcma, s)
6976 || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
6979 fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
6980 } else if ((insn & 0xfea00f10) == 0xfc800800) {
6981 /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
6982 int size = extract32(insn, 20, 1);
6983 data = extract32(insn, 24, 1); /* rot */
6984 if (!dc_isar_feature(aa32_vcma, s)
6985 || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
6988 fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
6989 } else if ((insn & 0xfeb00f00) == 0xfc200d00) {
6990 /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
6991 bool u = extract32(insn, 4, 1);
6992 if (!dc_isar_feature(aa32_dp, s)) {
6995 fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
6996 } else if ((insn & 0xff300f10) == 0xfc200810) {
6997 /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
6998 int is_s = extract32(insn, 23, 1);
6999 if (!dc_isar_feature(aa32_fhm, s)) {
7003 data = is_s; /* is_2 == 0 */
7004 fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
7010 VFP_DREG_D(rd, insn);
7014 if (q || !is_long) {
7015 VFP_DREG_N(rn, insn);
7016 VFP_DREG_M(rm, insn);
7017 if ((rn | rm) & q & !is_long) {
7020 off_rn = vfp_reg_offset(1, rn);
7021 off_rm = vfp_reg_offset(1, rm);
7023 rn = VFP_SREG_N(insn);
7024 rm = VFP_SREG_M(insn);
7025 off_rn = vfp_reg_offset(0, rn);
7026 off_rm = vfp_reg_offset(0, rm);
7029 if (s->fp_excp_el) {
7030 gen_exception_insn(s, 4, EXCP_UDEF,
7031 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
7034 if (!s->vfp_enabled) {
7038 opr_sz = (1 + q) * 8;
7044 ptr = get_fpstatus_ptr(1);
7046 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
7047 opr_sz, opr_sz, data, fn_gvec_ptr);
7049 tcg_temp_free_ptr(ptr);
7052 tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
7053 opr_sz, opr_sz, data, fn_gvec);
7058 /* Advanced SIMD two registers and a scalar extension.
7059 * 31 24 23 22 20 16 12 11 10 9 8 3 0
7060 * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
7061 * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
7062 * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
7066 static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
7068 gen_helper_gvec_3 *fn_gvec = NULL;
7069 gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
7070 int rd, rn, rm, opr_sz, data;
7072 bool is_long = false, q = extract32(insn, 6, 1);
7073 bool ptr_is_env = false;
7075 if ((insn & 0xff000f10) == 0xfe000800) {
7076 /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
7077 int rot = extract32(insn, 20, 2);
7078 int size = extract32(insn, 23, 1);
7081 if (!dc_isar_feature(aa32_vcma, s)) {
7085 if (!dc_isar_feature(aa32_fp16_arith, s)) {
7088 /* For fp16, rm is just Vm, and index is M. */
7089 rm = extract32(insn, 0, 4);
7090 index = extract32(insn, 5, 1);
7092 /* For fp32, rm is the usual M:Vm, and index is 0. */
7093 VFP_DREG_M(rm, insn);
7096 data = (index << 2) | rot;
7097 fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
7098 : gen_helper_gvec_fcmlah_idx);
7099 } else if ((insn & 0xffb00f00) == 0xfe200d00) {
7100 /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
7101 int u = extract32(insn, 4, 1);
7103 if (!dc_isar_feature(aa32_dp, s)) {
7106 fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
7107 /* rm is just Vm, and index is M. */
7108 data = extract32(insn, 5, 1); /* index */
7109 rm = extract32(insn, 0, 4);
7110 } else if ((insn & 0xffa00f10) == 0xfe000810) {
7111 /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
7112 int is_s = extract32(insn, 20, 1);
7113 int vm20 = extract32(insn, 0, 3);
7114 int vm3 = extract32(insn, 3, 1);
7115 int m = extract32(insn, 5, 1);
7118 if (!dc_isar_feature(aa32_fhm, s)) {
7123 index = m * 2 + vm3;
7129 data = (index << 2) | is_s; /* is_2 == 0 */
7130 fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
7136 VFP_DREG_D(rd, insn);
7140 if (q || !is_long) {
7141 VFP_DREG_N(rn, insn);
7142 if (rn & q & !is_long) {
7145 off_rn = vfp_reg_offset(1, rn);
7146 off_rm = vfp_reg_offset(1, rm);
7148 rn = VFP_SREG_N(insn);
7149 off_rn = vfp_reg_offset(0, rn);
7150 off_rm = vfp_reg_offset(0, rm);
7152 if (s->fp_excp_el) {
7153 gen_exception_insn(s, 4, EXCP_UDEF,
7154 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
7157 if (!s->vfp_enabled) {
7161 opr_sz = (1 + q) * 8;
7167 ptr = get_fpstatus_ptr(1);
7169 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
7170 opr_sz, opr_sz, data, fn_gvec_ptr);
7172 tcg_temp_free_ptr(ptr);
7175 tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
7176 opr_sz, opr_sz, data, fn_gvec);
7181 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
7183 int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
7184 const ARMCPRegInfo *ri;
7186 cpnum = (insn >> 8) & 0xf;
7188 /* First check for coprocessor space used for XScale/iwMMXt insns */
7189 if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
7190 if (extract32(s->c15_cpar, cpnum, 1) == 0) {
7193 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7194 return disas_iwmmxt_insn(s, insn);
7195 } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
7196 return disas_dsp_insn(s, insn);
7201 /* Otherwise treat as a generic register access */
7202 is64 = (insn & (1 << 25)) == 0;
7203 if (!is64 && ((insn & (1 << 4)) == 0)) {
7211 opc1 = (insn >> 4) & 0xf;
7213 rt2 = (insn >> 16) & 0xf;
7215 crn = (insn >> 16) & 0xf;
7216 opc1 = (insn >> 21) & 7;
7217 opc2 = (insn >> 5) & 7;
7220 isread = (insn >> 20) & 1;
7221 rt = (insn >> 12) & 0xf;
7223 ri = get_arm_cp_reginfo(s->cp_regs,
7224 ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
7226 /* Check access permissions */
7227 if (!cp_access_ok(s->current_el, ri, isread)) {
7232 (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7233 /* Emit code to perform further access permissions checks at
7234 * runtime; this may result in an exception.
7235 * Note that on XScale all cp0..c13 registers do an access check
7236 * call in order to handle c15_cpar.
7239 TCGv_i32 tcg_syn, tcg_isread;
7242 /* Note that since we are an implementation which takes an
7243 * exception on a trapped conditional instruction only if the
7244 * instruction passes its condition code check, we can take
7245 * advantage of the clause in the ARM ARM that allows us to set
7246 * the COND field in the instruction to 0xE in all cases.
7247 * We could fish the actual condition out of the insn (ARM)
7248 * or the condexec bits (Thumb) but it isn't necessary.
7253 syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7256 syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7262 syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7265 syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7270 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7271 * so this can only happen if this is an ARMv7 or earlier CPU,
7272 * in which case the syndrome information won't actually be
7275 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7276 syndrome = syn_uncategorized();
7280 gen_set_condexec(s);
7281 gen_set_pc_im(s, s->pc - 4);
7282 tmpptr = tcg_const_ptr(ri);
7283 tcg_syn = tcg_const_i32(syndrome);
7284 tcg_isread = tcg_const_i32(isread);
7285 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
7287 tcg_temp_free_ptr(tmpptr);
7288 tcg_temp_free_i32(tcg_syn);
7289 tcg_temp_free_i32(tcg_isread);
7292 /* Handle special cases first */
7293 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7300 gen_set_pc_im(s, s->pc);
7301 s->base.is_jmp = DISAS_WFI;
7307 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7316 if (ri->type & ARM_CP_CONST) {
7317 tmp64 = tcg_const_i64(ri->resetvalue);
7318 } else if (ri->readfn) {
7320 tmp64 = tcg_temp_new_i64();
7321 tmpptr = tcg_const_ptr(ri);
7322 gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7323 tcg_temp_free_ptr(tmpptr);
7325 tmp64 = tcg_temp_new_i64();
7326 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7328 tmp = tcg_temp_new_i32();
7329 tcg_gen_extrl_i64_i32(tmp, tmp64);
7330 store_reg(s, rt, tmp);
7331 tcg_gen_shri_i64(tmp64, tmp64, 32);
7332 tmp = tcg_temp_new_i32();
7333 tcg_gen_extrl_i64_i32(tmp, tmp64);
7334 tcg_temp_free_i64(tmp64);
7335 store_reg(s, rt2, tmp);
7338 if (ri->type & ARM_CP_CONST) {
7339 tmp = tcg_const_i32(ri->resetvalue);
7340 } else if (ri->readfn) {
7342 tmp = tcg_temp_new_i32();
7343 tmpptr = tcg_const_ptr(ri);
7344 gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7345 tcg_temp_free_ptr(tmpptr);
7347 tmp = load_cpu_offset(ri->fieldoffset);
7350 /* Destination register of r15 for 32 bit loads sets
7351 * the condition codes from the high 4 bits of the value
7354 tcg_temp_free_i32(tmp);
7356 store_reg(s, rt, tmp);
7361 if (ri->type & ARM_CP_CONST) {
7362 /* If not forbidden by access permissions, treat as WI */
7367 TCGv_i32 tmplo, tmphi;
7368 TCGv_i64 tmp64 = tcg_temp_new_i64();
7369 tmplo = load_reg(s, rt);
7370 tmphi = load_reg(s, rt2);
7371 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7372 tcg_temp_free_i32(tmplo);
7373 tcg_temp_free_i32(tmphi);
7375 TCGv_ptr tmpptr = tcg_const_ptr(ri);
7376 gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7377 tcg_temp_free_ptr(tmpptr);
7379 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7381 tcg_temp_free_i64(tmp64);
7386 tmp = load_reg(s, rt);
7387 tmpptr = tcg_const_ptr(ri);
7388 gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7389 tcg_temp_free_ptr(tmpptr);
7390 tcg_temp_free_i32(tmp);
7392 TCGv_i32 tmp = load_reg(s, rt);
7393 store_cpu_offset(tmp, ri->fieldoffset);
7398 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7399 /* I/O operations must end the TB here (whether read or write) */
7402 } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7403 /* We default to ending the TB on a coprocessor register write,
7404 * but allow this to be suppressed by the register definition
7405 * (usually only necessary to work around guest bugs).
7413 /* Unknown register; this might be a guest error or a QEMU
7414 * unimplemented feature.
7417 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7418 "64 bit system register cp:%d opc1: %d crm:%d "
7420 isread ? "read" : "write", cpnum, opc1, crm,
7421 s->ns ? "non-secure" : "secure");
7423 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7424 "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7426 isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7427 s->ns ? "non-secure" : "secure");
7434 /* Store a 64-bit value to a register pair. Clobbers val. */
7435 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7438 tmp = tcg_temp_new_i32();
7439 tcg_gen_extrl_i64_i32(tmp, val);
7440 store_reg(s, rlow, tmp);
7441 tmp = tcg_temp_new_i32();
7442 tcg_gen_shri_i64(val, val, 32);
7443 tcg_gen_extrl_i64_i32(tmp, val);
7444 store_reg(s, rhigh, tmp);
7447 /* load a 32-bit value from a register and perform a 64-bit accumulate. */
7448 static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
7453 /* Load value and extend to 64 bits. */
7454 tmp = tcg_temp_new_i64();
7455 tmp2 = load_reg(s, rlow);
7456 tcg_gen_extu_i32_i64(tmp, tmp2);
7457 tcg_temp_free_i32(tmp2);
7458 tcg_gen_add_i64(val, val, tmp);
7459 tcg_temp_free_i64(tmp);
7462 /* load and add a 64-bit value from a register pair. */
7463 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7469 /* Load 64-bit value rd:rn. */
7470 tmpl = load_reg(s, rlow);
7471 tmph = load_reg(s, rhigh);
7472 tmp = tcg_temp_new_i64();
7473 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7474 tcg_temp_free_i32(tmpl);
7475 tcg_temp_free_i32(tmph);
7476 tcg_gen_add_i64(val, val, tmp);
7477 tcg_temp_free_i64(tmp);
7480 /* Set N and Z flags from hi|lo. */
7481 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7483 tcg_gen_mov_i32(cpu_NF, hi);
7484 tcg_gen_or_i32(cpu_ZF, lo, hi);
7487 /* Load/Store exclusive instructions are implemented by remembering
7488 the value/address loaded, and seeing if these are the same
7489 when the store is performed. This should be sufficient to implement
7490 the architecturally mandated semantics, and avoids having to monitor
7491 regular stores. The compare vs the remembered value is done during
7492 the cmpxchg operation, but we must compare the addresses manually. */
7493 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7494 TCGv_i32 addr, int size)
7496 TCGv_i32 tmp = tcg_temp_new_i32();
7497 TCGMemOp opc = size | MO_ALIGN | s->be_data;
7502 TCGv_i32 tmp2 = tcg_temp_new_i32();
7503 TCGv_i64 t64 = tcg_temp_new_i64();
7505 /* For AArch32, architecturally the 32-bit word at the lowest
7506 * address is always Rt and the one at addr+4 is Rt2, even if
7507 * the CPU is big-endian. That means we don't want to do a
7508 * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7509 * for an architecturally 64-bit access, but instead do a
7510 * 64-bit access using MO_BE if appropriate and then split
7512 * This only makes a difference for BE32 user-mode, where
7513 * frob64() must not flip the two halves of the 64-bit data
7514 * but this code must treat BE32 user-mode like BE32 system.
7516 TCGv taddr = gen_aa32_addr(s, addr, opc);
7518 tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7519 tcg_temp_free(taddr);
7520 tcg_gen_mov_i64(cpu_exclusive_val, t64);
7521 if (s->be_data == MO_BE) {
7522 tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7524 tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7526 tcg_temp_free_i64(t64);
7528 store_reg(s, rt2, tmp2);
7530 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7531 tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7534 store_reg(s, rt, tmp);
7535 tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7538 static void gen_clrex(DisasContext *s)
7540 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7543 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7544 TCGv_i32 addr, int size)
7546 TCGv_i32 t0, t1, t2;
7549 TCGLabel *done_label;
7550 TCGLabel *fail_label;
7551 TCGMemOp opc = size | MO_ALIGN | s->be_data;
7553 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7559 fail_label = gen_new_label();
7560 done_label = gen_new_label();
7561 extaddr = tcg_temp_new_i64();
7562 tcg_gen_extu_i32_i64(extaddr, addr);
7563 tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7564 tcg_temp_free_i64(extaddr);
7566 taddr = gen_aa32_addr(s, addr, opc);
7567 t0 = tcg_temp_new_i32();
7568 t1 = load_reg(s, rt);
7570 TCGv_i64 o64 = tcg_temp_new_i64();
7571 TCGv_i64 n64 = tcg_temp_new_i64();
7573 t2 = load_reg(s, rt2);
7574 /* For AArch32, architecturally the 32-bit word at the lowest
7575 * address is always Rt and the one at addr+4 is Rt2, even if
7576 * the CPU is big-endian. Since we're going to treat this as a
7577 * single 64-bit BE store, we need to put the two halves in the
7578 * opposite order for BE to LE, so that they end up in the right
7580 * We don't want gen_aa32_frob64() because that does the wrong
7581 * thing for BE32 usermode.
7583 if (s->be_data == MO_BE) {
7584 tcg_gen_concat_i32_i64(n64, t2, t1);
7586 tcg_gen_concat_i32_i64(n64, t1, t2);
7588 tcg_temp_free_i32(t2);
7590 tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7591 get_mem_index(s), opc);
7592 tcg_temp_free_i64(n64);
7594 tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7595 tcg_gen_extrl_i64_i32(t0, o64);
7597 tcg_temp_free_i64(o64);
7599 t2 = tcg_temp_new_i32();
7600 tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7601 tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7602 tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7603 tcg_temp_free_i32(t2);
7605 tcg_temp_free_i32(t1);
7606 tcg_temp_free(taddr);
7607 tcg_gen_mov_i32(cpu_R[rd], t0);
7608 tcg_temp_free_i32(t0);
7609 tcg_gen_br(done_label);
7611 gen_set_label(fail_label);
7612 tcg_gen_movi_i32(cpu_R[rd], 1);
7613 gen_set_label(done_label);
7614 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7620 * @mode: mode field from insn (which stack to store to)
7621 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7622 * @writeback: true if writeback bit set
7624 * Generate code for the SRS (Store Return State) insn.
7626 static void gen_srs(DisasContext *s,
7627 uint32_t mode, uint32_t amode, bool writeback)
7634 * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7635 * and specified mode is monitor mode
7636 * - UNDEFINED in Hyp mode
7637 * - UNPREDICTABLE in User or System mode
7638 * - UNPREDICTABLE if the specified mode is:
7639 * -- not implemented
7640 * -- not a valid mode number
7641 * -- a mode that's at a higher exception level
7642 * -- Monitor, if we are Non-secure
7643 * For the UNPREDICTABLE cases we choose to UNDEF.
7645 if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7646 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), 3);
7650 if (s->current_el == 0 || s->current_el == 2) {
7655 case ARM_CPU_MODE_USR:
7656 case ARM_CPU_MODE_FIQ:
7657 case ARM_CPU_MODE_IRQ:
7658 case ARM_CPU_MODE_SVC:
7659 case ARM_CPU_MODE_ABT:
7660 case ARM_CPU_MODE_UND:
7661 case ARM_CPU_MODE_SYS:
7663 case ARM_CPU_MODE_HYP:
7664 if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7668 case ARM_CPU_MODE_MON:
7669 /* No need to check specifically for "are we non-secure" because
7670 * we've already made EL0 UNDEF and handled the trap for S-EL1;
7671 * so if this isn't EL3 then we must be non-secure.
7673 if (s->current_el != 3) {
7682 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
7683 default_exception_el(s));
7687 addr = tcg_temp_new_i32();
7688 tmp = tcg_const_i32(mode);
7689 /* get_r13_banked() will raise an exception if called from System mode */
7690 gen_set_condexec(s);
7691 gen_set_pc_im(s, s->pc - 4);
7692 gen_helper_get_r13_banked(addr, cpu_env, tmp);
7693 tcg_temp_free_i32(tmp);
7710 tcg_gen_addi_i32(addr, addr, offset);
7711 tmp = load_reg(s, 14);
7712 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7713 tcg_temp_free_i32(tmp);
7714 tmp = load_cpu_field(spsr);
7715 tcg_gen_addi_i32(addr, addr, 4);
7716 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7717 tcg_temp_free_i32(tmp);
7735 tcg_gen_addi_i32(addr, addr, offset);
7736 tmp = tcg_const_i32(mode);
7737 gen_helper_set_r13_banked(cpu_env, tmp, addr);
7738 tcg_temp_free_i32(tmp);
7740 tcg_temp_free_i32(addr);
7741 s->base.is_jmp = DISAS_UPDATE;
7744 /* Generate a label used for skipping this instruction */
7745 static void arm_gen_condlabel(DisasContext *s)
7748 s->condlabel = gen_new_label();
7753 /* Skip this instruction if the ARM condition is false */
7754 static void arm_skip_unless(DisasContext *s, uint32_t cond)
7756 arm_gen_condlabel(s);
7757 arm_gen_test_cc(cond ^ 1, s->condlabel);
7760 static void disas_arm_insn(DisasContext *s, unsigned int insn)
7762 unsigned int cond, val, op1, i, shift, rm, rs, rn, rd, sh;
7769 /* M variants do not implement ARM mode; this must raise the INVSTATE
7770 * UsageFault exception.
7772 if (arm_dc_feature(s, ARM_FEATURE_M)) {
7773 gen_exception_insn(s, 4, EXCP_INVSTATE, syn_uncategorized(),
7774 default_exception_el(s));
7779 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
7780 * choose to UNDEF. In ARMv5 and above the space is used
7781 * for miscellaneous unconditional instructions.
7785 /* Unconditional instructions. */
7786 if (((insn >> 25) & 7) == 1) {
7787 /* NEON Data processing. */
7788 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
7792 if (disas_neon_data_insn(s, insn)) {
7797 if ((insn & 0x0f100000) == 0x04000000) {
7798 /* NEON load/store. */
7799 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
7803 if (disas_neon_ls_insn(s, insn)) {
7808 if ((insn & 0x0f000e10) == 0x0e000a00) {
7810 if (disas_vfp_insn(s, insn)) {
7815 if (((insn & 0x0f30f000) == 0x0510f000) ||
7816 ((insn & 0x0f30f010) == 0x0710f000)) {
7817 if ((insn & (1 << 22)) == 0) {
7819 if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
7823 /* Otherwise PLD; v5TE+ */
7827 if (((insn & 0x0f70f000) == 0x0450f000) ||
7828 ((insn & 0x0f70f010) == 0x0650f000)) {
7830 return; /* PLI; V7 */
7832 if (((insn & 0x0f700000) == 0x04100000) ||
7833 ((insn & 0x0f700010) == 0x06100000)) {
7834 if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
7837 return; /* v7MP: Unallocated memory hint: must NOP */
7840 if ((insn & 0x0ffffdff) == 0x01010000) {
7843 if (((insn >> 9) & 1) != !!(s->be_data == MO_BE)) {
7844 gen_helper_setend(cpu_env);
7845 s->base.is_jmp = DISAS_UPDATE;
7848 } else if ((insn & 0x0fffff00) == 0x057ff000) {
7849 switch ((insn >> 4) & 0xf) {
7857 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
7860 /* We need to break the TB after this insn to execute
7861 * self-modifying code correctly and also to take
7862 * any pending interrupts immediately.
7864 gen_goto_tb(s, 0, s->pc & ~1);
7867 if ((insn & 0xf) || !dc_isar_feature(aa32_sb, s)) {
7871 * TODO: There is no speculation barrier opcode
7872 * for TCG; MB and end the TB instead.
7874 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
7875 gen_goto_tb(s, 0, s->pc & ~1);
7880 } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
7883 gen_srs(s, (insn & 0x1f), (insn >> 23) & 3, insn & (1 << 21));
7885 } else if ((insn & 0x0e50ffe0) == 0x08100a00) {
7891 rn = (insn >> 16) & 0xf;
7892 addr = load_reg(s, rn);
7893 i = (insn >> 23) & 3;
7895 case 0: offset = -4; break; /* DA */
7896 case 1: offset = 0; break; /* IA */
7897 case 2: offset = -8; break; /* DB */
7898 case 3: offset = 4; break; /* IB */
7902 tcg_gen_addi_i32(addr, addr, offset);
7903 /* Load PC into tmp and CPSR into tmp2. */
7904 tmp = tcg_temp_new_i32();
7905 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
7906 tcg_gen_addi_i32(addr, addr, 4);
7907 tmp2 = tcg_temp_new_i32();
7908 gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
7909 if (insn & (1 << 21)) {
7910 /* Base writeback. */
7912 case 0: offset = -8; break;
7913 case 1: offset = 4; break;
7914 case 2: offset = -4; break;
7915 case 3: offset = 0; break;
7919 tcg_gen_addi_i32(addr, addr, offset);
7920 store_reg(s, rn, addr);
7922 tcg_temp_free_i32(addr);
7924 gen_rfe(s, tmp, tmp2);
7926 } else if ((insn & 0x0e000000) == 0x0a000000) {
7927 /* branch link and change to thumb (blx <offset>) */
7930 val = (uint32_t)s->pc;
7931 tmp = tcg_temp_new_i32();
7932 tcg_gen_movi_i32(tmp, val);
7933 store_reg(s, 14, tmp);
7934 /* Sign-extend the 24-bit offset */
7935 offset = (((int32_t)insn) << 8) >> 8;
7936 /* offset * 4 + bit24 * 2 + (thumb bit) */
7937 val += (offset << 2) | ((insn >> 23) & 2) | 1;
7938 /* pipeline offset */
7940 /* protected by ARCH(5); above, near the start of uncond block */
7943 } else if ((insn & 0x0e000f00) == 0x0c000100) {
7944 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7945 /* iWMMXt register transfer. */
7946 if (extract32(s->c15_cpar, 1, 1)) {
7947 if (!disas_iwmmxt_insn(s, insn)) {
7952 } else if ((insn & 0x0e000a00) == 0x0c000800
7953 && arm_dc_feature(s, ARM_FEATURE_V8)) {
7954 if (disas_neon_insn_3same_ext(s, insn)) {
7958 } else if ((insn & 0x0f000a00) == 0x0e000800
7959 && arm_dc_feature(s, ARM_FEATURE_V8)) {
7960 if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
7964 } else if ((insn & 0x0fe00000) == 0x0c400000) {
7965 /* Coprocessor double register transfer. */
7967 } else if ((insn & 0x0f000010) == 0x0e000010) {
7968 /* Additional coprocessor register transfer. */
7969 } else if ((insn & 0x0ff10020) == 0x01000000) {
7972 /* cps (privileged) */
7976 if (insn & (1 << 19)) {
7977 if (insn & (1 << 8))
7979 if (insn & (1 << 7))
7981 if (insn & (1 << 6))
7983 if (insn & (1 << 18))
7986 if (insn & (1 << 17)) {
7988 val |= (insn & 0x1f);
7991 gen_set_psr_im(s, mask, 0, val);
7998 /* if not always execute, we generate a conditional jump to
8000 arm_skip_unless(s, cond);
8002 if ((insn & 0x0f900000) == 0x03000000) {
8003 if ((insn & (1 << 21)) == 0) {
8005 rd = (insn >> 12) & 0xf;
8006 val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
8007 if ((insn & (1 << 22)) == 0) {
8009 tmp = tcg_temp_new_i32();
8010 tcg_gen_movi_i32(tmp, val);
8013 tmp = load_reg(s, rd);
8014 tcg_gen_ext16u_i32(tmp, tmp);
8015 tcg_gen_ori_i32(tmp, tmp, val << 16);
8017 store_reg(s, rd, tmp);
8019 if (((insn >> 12) & 0xf) != 0xf)
8021 if (((insn >> 16) & 0xf) == 0) {
8022 gen_nop_hint(s, insn & 0xff);
8024 /* CPSR = immediate */
8026 shift = ((insn >> 8) & 0xf) * 2;
8028 val = (val >> shift) | (val << (32 - shift));
8029 i = ((insn & (1 << 22)) != 0);
8030 if (gen_set_psr_im(s, msr_mask(s, (insn >> 16) & 0xf, i),
8036 } else if ((insn & 0x0f900000) == 0x01000000
8037 && (insn & 0x00000090) != 0x00000090) {
8038 /* miscellaneous instructions */
8039 op1 = (insn >> 21) & 3;
8040 sh = (insn >> 4) & 0xf;
8043 case 0x0: /* MSR, MRS */
8044 if (insn & (1 << 9)) {
8045 /* MSR (banked) and MRS (banked) */
8046 int sysm = extract32(insn, 16, 4) |
8047 (extract32(insn, 8, 1) << 4);
8048 int r = extract32(insn, 22, 1);
8052 gen_msr_banked(s, r, sysm, rm);
8055 int rd = extract32(insn, 12, 4);
8057 gen_mrs_banked(s, r, sysm, rd);
8062 /* MSR, MRS (for PSRs) */
8065 tmp = load_reg(s, rm);
8066 i = ((op1 & 2) != 0);
8067 if (gen_set_psr(s, msr_mask(s, (insn >> 16) & 0xf, i), i, tmp))
8071 rd = (insn >> 12) & 0xf;
8075 tmp = load_cpu_field(spsr);
8077 tmp = tcg_temp_new_i32();
8078 gen_helper_cpsr_read(tmp, cpu_env);
8080 store_reg(s, rd, tmp);
8085 /* branch/exchange thumb (bx). */
8087 tmp = load_reg(s, rm);
8089 } else if (op1 == 3) {
8092 rd = (insn >> 12) & 0xf;
8093 tmp = load_reg(s, rm);
8094 tcg_gen_clzi_i32(tmp, tmp, 32);
8095 store_reg(s, rd, tmp);
8103 /* Trivial implementation equivalent to bx. */
8104 tmp = load_reg(s, rm);
8115 /* branch link/exchange thumb (blx) */
8116 tmp = load_reg(s, rm);
8117 tmp2 = tcg_temp_new_i32();
8118 tcg_gen_movi_i32(tmp2, s->pc);
8119 store_reg(s, 14, tmp2);
8125 uint32_t c = extract32(insn, 8, 4);
8127 /* Check this CPU supports ARMv8 CRC instructions.
8128 * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
8129 * Bits 8, 10 and 11 should be zero.
8131 if (!dc_isar_feature(aa32_crc32, s) || op1 == 0x3 || (c & 0xd) != 0) {
8135 rn = extract32(insn, 16, 4);
8136 rd = extract32(insn, 12, 4);
8138 tmp = load_reg(s, rn);
8139 tmp2 = load_reg(s, rm);
8141 tcg_gen_andi_i32(tmp2, tmp2, 0xff);
8142 } else if (op1 == 1) {
8143 tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
8145 tmp3 = tcg_const_i32(1 << op1);
8147 gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
8149 gen_helper_crc32(tmp, tmp, tmp2, tmp3);
8151 tcg_temp_free_i32(tmp2);
8152 tcg_temp_free_i32(tmp3);
8153 store_reg(s, rd, tmp);
8156 case 0x5: /* saturating add/subtract */
8158 rd = (insn >> 12) & 0xf;
8159 rn = (insn >> 16) & 0xf;
8160 tmp = load_reg(s, rm);
8161 tmp2 = load_reg(s, rn);
8163 gen_helper_double_saturate(tmp2, cpu_env, tmp2);
8165 gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
8167 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
8168 tcg_temp_free_i32(tmp2);
8169 store_reg(s, rd, tmp);
8171 case 0x6: /* ERET */
8175 if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8178 if ((insn & 0x000fff0f) != 0x0000000e) {
8179 /* UNPREDICTABLE; we choose to UNDEF */
8183 if (s->current_el == 2) {
8184 tmp = load_cpu_field(elr_el[2]);
8186 tmp = load_reg(s, 14);
8188 gen_exception_return(s, tmp);
8192 int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
8201 gen_exception_bkpt_insn(s, 4, syn_aa32_bkpt(imm16, false));
8204 /* Hypervisor call (v7) */
8212 /* Secure monitor call (v6+) */
8220 g_assert_not_reached();
8224 case 0x8: /* signed multiply */
8229 rs = (insn >> 8) & 0xf;
8230 rn = (insn >> 12) & 0xf;
8231 rd = (insn >> 16) & 0xf;
8233 /* (32 * 16) >> 16 */
8234 tmp = load_reg(s, rm);
8235 tmp2 = load_reg(s, rs);
8237 tcg_gen_sari_i32(tmp2, tmp2, 16);
8240 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8241 tcg_gen_shri_i64(tmp64, tmp64, 16);
8242 tmp = tcg_temp_new_i32();
8243 tcg_gen_extrl_i64_i32(tmp, tmp64);
8244 tcg_temp_free_i64(tmp64);
8245 if ((sh & 2) == 0) {
8246 tmp2 = load_reg(s, rn);
8247 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8248 tcg_temp_free_i32(tmp2);
8250 store_reg(s, rd, tmp);
8253 tmp = load_reg(s, rm);
8254 tmp2 = load_reg(s, rs);
8255 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
8256 tcg_temp_free_i32(tmp2);
8258 tmp64 = tcg_temp_new_i64();
8259 tcg_gen_ext_i32_i64(tmp64, tmp);
8260 tcg_temp_free_i32(tmp);
8261 gen_addq(s, tmp64, rn, rd);
8262 gen_storeq_reg(s, rn, rd, tmp64);
8263 tcg_temp_free_i64(tmp64);
8266 tmp2 = load_reg(s, rn);
8267 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8268 tcg_temp_free_i32(tmp2);
8270 store_reg(s, rd, tmp);
8277 } else if (((insn & 0x0e000000) == 0 &&
8278 (insn & 0x00000090) != 0x90) ||
8279 ((insn & 0x0e000000) == (1 << 25))) {
8280 int set_cc, logic_cc, shiftop;
8282 op1 = (insn >> 21) & 0xf;
8283 set_cc = (insn >> 20) & 1;
8284 logic_cc = table_logic_cc[op1] & set_cc;
8286 /* data processing instruction */
8287 if (insn & (1 << 25)) {
8288 /* immediate operand */
8290 shift = ((insn >> 8) & 0xf) * 2;
8292 val = (val >> shift) | (val << (32 - shift));
8294 tmp2 = tcg_temp_new_i32();
8295 tcg_gen_movi_i32(tmp2, val);
8296 if (logic_cc && shift) {
8297 gen_set_CF_bit31(tmp2);
8302 tmp2 = load_reg(s, rm);
8303 shiftop = (insn >> 5) & 3;
8304 if (!(insn & (1 << 4))) {
8305 shift = (insn >> 7) & 0x1f;
8306 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
8308 rs = (insn >> 8) & 0xf;
8309 tmp = load_reg(s, rs);
8310 gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
8313 if (op1 != 0x0f && op1 != 0x0d) {
8314 rn = (insn >> 16) & 0xf;
8315 tmp = load_reg(s, rn);
8319 rd = (insn >> 12) & 0xf;
8322 tcg_gen_and_i32(tmp, tmp, tmp2);
8326 store_reg_bx(s, rd, tmp);
8329 tcg_gen_xor_i32(tmp, tmp, tmp2);
8333 store_reg_bx(s, rd, tmp);
8336 if (set_cc && rd == 15) {
8337 /* SUBS r15, ... is used for exception return. */
8341 gen_sub_CC(tmp, tmp, tmp2);
8342 gen_exception_return(s, tmp);
8345 gen_sub_CC(tmp, tmp, tmp2);
8347 tcg_gen_sub_i32(tmp, tmp, tmp2);
8349 store_reg_bx(s, rd, tmp);
8354 gen_sub_CC(tmp, tmp2, tmp);
8356 tcg_gen_sub_i32(tmp, tmp2, tmp);
8358 store_reg_bx(s, rd, tmp);
8362 gen_add_CC(tmp, tmp, tmp2);
8364 tcg_gen_add_i32(tmp, tmp, tmp2);
8366 store_reg_bx(s, rd, tmp);
8370 gen_adc_CC(tmp, tmp, tmp2);
8372 gen_add_carry(tmp, tmp, tmp2);
8374 store_reg_bx(s, rd, tmp);
8378 gen_sbc_CC(tmp, tmp, tmp2);
8380 gen_sub_carry(tmp, tmp, tmp2);
8382 store_reg_bx(s, rd, tmp);
8386 gen_sbc_CC(tmp, tmp2, tmp);
8388 gen_sub_carry(tmp, tmp2, tmp);
8390 store_reg_bx(s, rd, tmp);
8394 tcg_gen_and_i32(tmp, tmp, tmp2);
8397 tcg_temp_free_i32(tmp);
8401 tcg_gen_xor_i32(tmp, tmp, tmp2);
8404 tcg_temp_free_i32(tmp);
8408 gen_sub_CC(tmp, tmp, tmp2);
8410 tcg_temp_free_i32(tmp);
8414 gen_add_CC(tmp, tmp, tmp2);
8416 tcg_temp_free_i32(tmp);
8419 tcg_gen_or_i32(tmp, tmp, tmp2);
8423 store_reg_bx(s, rd, tmp);
8426 if (logic_cc && rd == 15) {
8427 /* MOVS r15, ... is used for exception return. */
8431 gen_exception_return(s, tmp2);
8436 store_reg_bx(s, rd, tmp2);
8440 tcg_gen_andc_i32(tmp, tmp, tmp2);
8444 store_reg_bx(s, rd, tmp);
8448 tcg_gen_not_i32(tmp2, tmp2);
8452 store_reg_bx(s, rd, tmp2);
8455 if (op1 != 0x0f && op1 != 0x0d) {
8456 tcg_temp_free_i32(tmp2);
8459 /* other instructions */
8460 op1 = (insn >> 24) & 0xf;
8464 /* multiplies, extra load/stores */
8465 sh = (insn >> 5) & 3;
8468 rd = (insn >> 16) & 0xf;
8469 rn = (insn >> 12) & 0xf;
8470 rs = (insn >> 8) & 0xf;
8472 op1 = (insn >> 20) & 0xf;
8474 case 0: case 1: case 2: case 3: case 6:
8476 tmp = load_reg(s, rs);
8477 tmp2 = load_reg(s, rm);
8478 tcg_gen_mul_i32(tmp, tmp, tmp2);
8479 tcg_temp_free_i32(tmp2);
8480 if (insn & (1 << 22)) {
8481 /* Subtract (mls) */
8483 tmp2 = load_reg(s, rn);
8484 tcg_gen_sub_i32(tmp, tmp2, tmp);
8485 tcg_temp_free_i32(tmp2);
8486 } else if (insn & (1 << 21)) {
8488 tmp2 = load_reg(s, rn);
8489 tcg_gen_add_i32(tmp, tmp, tmp2);
8490 tcg_temp_free_i32(tmp2);
8492 if (insn & (1 << 20))
8494 store_reg(s, rd, tmp);
8497 /* 64 bit mul double accumulate (UMAAL) */
8499 tmp = load_reg(s, rs);
8500 tmp2 = load_reg(s, rm);
8501 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
8502 gen_addq_lo(s, tmp64, rn);
8503 gen_addq_lo(s, tmp64, rd);
8504 gen_storeq_reg(s, rn, rd, tmp64);
8505 tcg_temp_free_i64(tmp64);
8507 case 8: case 9: case 10: case 11:
8508 case 12: case 13: case 14: case 15:
8509 /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */
8510 tmp = load_reg(s, rs);
8511 tmp2 = load_reg(s, rm);
8512 if (insn & (1 << 22)) {
8513 tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2);
8515 tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2);
8517 if (insn & (1 << 21)) { /* mult accumulate */
8518 TCGv_i32 al = load_reg(s, rn);
8519 TCGv_i32 ah = load_reg(s, rd);
8520 tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, al, ah);
8521 tcg_temp_free_i32(al);
8522 tcg_temp_free_i32(ah);
8524 if (insn & (1 << 20)) {
8525 gen_logicq_cc(tmp, tmp2);
8527 store_reg(s, rn, tmp);
8528 store_reg(s, rd, tmp2);
8534 rn = (insn >> 16) & 0xf;
8535 rd = (insn >> 12) & 0xf;
8536 if (insn & (1 << 23)) {
8537 /* load/store exclusive */
8538 bool is_ld = extract32(insn, 20, 1);
8539 bool is_lasr = !extract32(insn, 8, 1);
8540 int op2 = (insn >> 8) & 3;
8541 op1 = (insn >> 21) & 0x3;
8544 case 0: /* lda/stl */
8550 case 1: /* reserved */
8552 case 2: /* ldaex/stlex */
8555 case 3: /* ldrex/strex */
8564 addr = tcg_temp_local_new_i32();
8565 load_reg_var(s, addr, rn);
8567 if (is_lasr && !is_ld) {
8568 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8573 tmp = tcg_temp_new_i32();
8576 gen_aa32_ld32u_iss(s, tmp, addr,
8581 gen_aa32_ld8u_iss(s, tmp, addr,
8586 gen_aa32_ld16u_iss(s, tmp, addr,
8593 store_reg(s, rd, tmp);
8596 tmp = load_reg(s, rm);
8599 gen_aa32_st32_iss(s, tmp, addr,
8604 gen_aa32_st8_iss(s, tmp, addr,
8609 gen_aa32_st16_iss(s, tmp, addr,
8616 tcg_temp_free_i32(tmp);
8621 gen_load_exclusive(s, rd, 15, addr, 2);
8623 case 1: /* ldrexd */
8624 gen_load_exclusive(s, rd, rd + 1, addr, 3);
8626 case 2: /* ldrexb */
8627 gen_load_exclusive(s, rd, 15, addr, 0);
8629 case 3: /* ldrexh */
8630 gen_load_exclusive(s, rd, 15, addr, 1);
8639 gen_store_exclusive(s, rd, rm, 15, addr, 2);
8641 case 1: /* strexd */
8642 gen_store_exclusive(s, rd, rm, rm + 1, addr, 3);
8644 case 2: /* strexb */
8645 gen_store_exclusive(s, rd, rm, 15, addr, 0);
8647 case 3: /* strexh */
8648 gen_store_exclusive(s, rd, rm, 15, addr, 1);
8654 tcg_temp_free_i32(addr);
8656 if (is_lasr && is_ld) {
8657 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
8659 } else if ((insn & 0x00300f00) == 0) {
8660 /* 0bcccc_0001_0x00_xxxx_xxxx_0000_1001_xxxx
8665 TCGMemOp opc = s->be_data;
8669 if (insn & (1 << 22)) {
8672 opc |= MO_UL | MO_ALIGN;
8675 addr = load_reg(s, rn);
8676 taddr = gen_aa32_addr(s, addr, opc);
8677 tcg_temp_free_i32(addr);
8679 tmp = load_reg(s, rm);
8680 tcg_gen_atomic_xchg_i32(tmp, taddr, tmp,
8681 get_mem_index(s), opc);
8682 tcg_temp_free(taddr);
8683 store_reg(s, rd, tmp);
8690 bool load = insn & (1 << 20);
8691 bool wbit = insn & (1 << 21);
8692 bool pbit = insn & (1 << 24);
8693 bool doubleword = false;
8696 /* Misc load/store */
8697 rn = (insn >> 16) & 0xf;
8698 rd = (insn >> 12) & 0xf;
8700 /* ISS not valid if writeback */
8701 issinfo = (pbit & !wbit) ? rd : ISSInvalid;
8703 if (!load && (sh & 2)) {
8707 /* UNPREDICTABLE; we choose to UNDEF */
8710 load = (sh & 1) == 0;
8714 addr = load_reg(s, rn);
8716 gen_add_datah_offset(s, insn, 0, addr);
8723 tmp = load_reg(s, rd);
8724 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
8725 tcg_temp_free_i32(tmp);
8726 tcg_gen_addi_i32(addr, addr, 4);
8727 tmp = load_reg(s, rd + 1);
8728 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
8729 tcg_temp_free_i32(tmp);
8732 tmp = tcg_temp_new_i32();
8733 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
8734 store_reg(s, rd, tmp);
8735 tcg_gen_addi_i32(addr, addr, 4);
8736 tmp = tcg_temp_new_i32();
8737 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
8740 address_offset = -4;
8743 tmp = tcg_temp_new_i32();
8746 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s),
8750 gen_aa32_ld8s_iss(s, tmp, addr, get_mem_index(s),
8755 gen_aa32_ld16s_iss(s, tmp, addr, get_mem_index(s),
8761 tmp = load_reg(s, rd);
8762 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), issinfo);
8763 tcg_temp_free_i32(tmp);
8765 /* Perform base writeback before the loaded value to
8766 ensure correct behavior with overlapping index registers.
8767 ldrd with base writeback is undefined if the
8768 destination and index registers overlap. */
8770 gen_add_datah_offset(s, insn, address_offset, addr);
8771 store_reg(s, rn, addr);
8774 tcg_gen_addi_i32(addr, addr, address_offset);
8775 store_reg(s, rn, addr);
8777 tcg_temp_free_i32(addr);
8780 /* Complete the load. */
8781 store_reg(s, rd, tmp);
8790 if (insn & (1 << 4)) {
8792 /* Armv6 Media instructions. */
8794 rn = (insn >> 16) & 0xf;
8795 rd = (insn >> 12) & 0xf;
8796 rs = (insn >> 8) & 0xf;
8797 switch ((insn >> 23) & 3) {
8798 case 0: /* Parallel add/subtract. */
8799 op1 = (insn >> 20) & 7;
8800 tmp = load_reg(s, rn);
8801 tmp2 = load_reg(s, rm);
8802 sh = (insn >> 5) & 7;
8803 if ((op1 & 3) == 0 || sh == 5 || sh == 6)
8805 gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
8806 tcg_temp_free_i32(tmp2);
8807 store_reg(s, rd, tmp);
8810 if ((insn & 0x00700020) == 0) {
8811 /* Halfword pack. */
8812 tmp = load_reg(s, rn);
8813 tmp2 = load_reg(s, rm);
8814 shift = (insn >> 7) & 0x1f;
8815 if (insn & (1 << 6)) {
8819 tcg_gen_sari_i32(tmp2, tmp2, shift);
8820 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
8821 tcg_gen_ext16u_i32(tmp2, tmp2);
8825 tcg_gen_shli_i32(tmp2, tmp2, shift);
8826 tcg_gen_ext16u_i32(tmp, tmp);
8827 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
8829 tcg_gen_or_i32(tmp, tmp, tmp2);
8830 tcg_temp_free_i32(tmp2);
8831 store_reg(s, rd, tmp);
8832 } else if ((insn & 0x00200020) == 0x00200000) {
8834 tmp = load_reg(s, rm);
8835 shift = (insn >> 7) & 0x1f;
8836 if (insn & (1 << 6)) {
8839 tcg_gen_sari_i32(tmp, tmp, shift);
8841 tcg_gen_shli_i32(tmp, tmp, shift);
8843 sh = (insn >> 16) & 0x1f;
8844 tmp2 = tcg_const_i32(sh);
8845 if (insn & (1 << 22))
8846 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
8848 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
8849 tcg_temp_free_i32(tmp2);
8850 store_reg(s, rd, tmp);
8851 } else if ((insn & 0x00300fe0) == 0x00200f20) {
8853 tmp = load_reg(s, rm);
8854 sh = (insn >> 16) & 0x1f;
8855 tmp2 = tcg_const_i32(sh);
8856 if (insn & (1 << 22))
8857 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
8859 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
8860 tcg_temp_free_i32(tmp2);
8861 store_reg(s, rd, tmp);
8862 } else if ((insn & 0x00700fe0) == 0x00000fa0) {
8864 tmp = load_reg(s, rn);
8865 tmp2 = load_reg(s, rm);
8866 tmp3 = tcg_temp_new_i32();
8867 tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
8868 gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
8869 tcg_temp_free_i32(tmp3);
8870 tcg_temp_free_i32(tmp2);
8871 store_reg(s, rd, tmp);
8872 } else if ((insn & 0x000003e0) == 0x00000060) {
8873 tmp = load_reg(s, rm);
8874 shift = (insn >> 10) & 3;
8875 /* ??? In many cases it's not necessary to do a
8876 rotate, a shift is sufficient. */
8878 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
8879 op1 = (insn >> 20) & 7;
8881 case 0: gen_sxtb16(tmp); break;
8882 case 2: gen_sxtb(tmp); break;
8883 case 3: gen_sxth(tmp); break;
8884 case 4: gen_uxtb16(tmp); break;
8885 case 6: gen_uxtb(tmp); break;
8886 case 7: gen_uxth(tmp); break;
8887 default: goto illegal_op;
8890 tmp2 = load_reg(s, rn);
8891 if ((op1 & 3) == 0) {
8892 gen_add16(tmp, tmp2);
8894 tcg_gen_add_i32(tmp, tmp, tmp2);
8895 tcg_temp_free_i32(tmp2);
8898 store_reg(s, rd, tmp);
8899 } else if ((insn & 0x003f0f60) == 0x003f0f20) {
8901 tmp = load_reg(s, rm);
8902 if (insn & (1 << 22)) {
8903 if (insn & (1 << 7)) {
8907 gen_helper_rbit(tmp, tmp);
8910 if (insn & (1 << 7))
8913 tcg_gen_bswap32_i32(tmp, tmp);
8915 store_reg(s, rd, tmp);
8920 case 2: /* Multiplies (Type 3). */
8921 switch ((insn >> 20) & 0x7) {
8923 if (((insn >> 6) ^ (insn >> 7)) & 1) {
8924 /* op2 not 00x or 11x : UNDEF */
8927 /* Signed multiply most significant [accumulate].
8928 (SMMUL, SMMLA, SMMLS) */
8929 tmp = load_reg(s, rm);
8930 tmp2 = load_reg(s, rs);
8931 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8934 tmp = load_reg(s, rd);
8935 if (insn & (1 << 6)) {
8936 tmp64 = gen_subq_msw(tmp64, tmp);
8938 tmp64 = gen_addq_msw(tmp64, tmp);
8941 if (insn & (1 << 5)) {
8942 tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
8944 tcg_gen_shri_i64(tmp64, tmp64, 32);
8945 tmp = tcg_temp_new_i32();
8946 tcg_gen_extrl_i64_i32(tmp, tmp64);
8947 tcg_temp_free_i64(tmp64);
8948 store_reg(s, rn, tmp);
8952 /* SMLAD, SMUAD, SMLSD, SMUSD, SMLALD, SMLSLD */
8953 if (insn & (1 << 7)) {
8956 tmp = load_reg(s, rm);
8957 tmp2 = load_reg(s, rs);
8958 if (insn & (1 << 5))
8959 gen_swap_half(tmp2);
8960 gen_smul_dual(tmp, tmp2);
8961 if (insn & (1 << 22)) {
8962 /* smlald, smlsld */
8965 tmp64 = tcg_temp_new_i64();
8966 tmp64_2 = tcg_temp_new_i64();
8967 tcg_gen_ext_i32_i64(tmp64, tmp);
8968 tcg_gen_ext_i32_i64(tmp64_2, tmp2);
8969 tcg_temp_free_i32(tmp);
8970 tcg_temp_free_i32(tmp2);
8971 if (insn & (1 << 6)) {
8972 tcg_gen_sub_i64(tmp64, tmp64, tmp64_2);
8974 tcg_gen_add_i64(tmp64, tmp64, tmp64_2);
8976 tcg_temp_free_i64(tmp64_2);
8977 gen_addq(s, tmp64, rd, rn);
8978 gen_storeq_reg(s, rd, rn, tmp64);
8979 tcg_temp_free_i64(tmp64);
8981 /* smuad, smusd, smlad, smlsd */
8982 if (insn & (1 << 6)) {
8983 /* This subtraction cannot overflow. */
8984 tcg_gen_sub_i32(tmp, tmp, tmp2);
8986 /* This addition cannot overflow 32 bits;
8987 * however it may overflow considered as a
8988 * signed operation, in which case we must set
8991 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8993 tcg_temp_free_i32(tmp2);
8996 tmp2 = load_reg(s, rd);
8997 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8998 tcg_temp_free_i32(tmp2);
9000 store_reg(s, rn, tmp);
9006 if (!dc_isar_feature(arm_div, s)) {
9009 if (((insn >> 5) & 7) || (rd != 15)) {
9012 tmp = load_reg(s, rm);
9013 tmp2 = load_reg(s, rs);
9014 if (insn & (1 << 21)) {
9015 gen_helper_udiv(tmp, tmp, tmp2);
9017 gen_helper_sdiv(tmp, tmp, tmp2);
9019 tcg_temp_free_i32(tmp2);
9020 store_reg(s, rn, tmp);
9027 op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
9029 case 0: /* Unsigned sum of absolute differences. */
9031 tmp = load_reg(s, rm);
9032 tmp2 = load_reg(s, rs);
9033 gen_helper_usad8(tmp, tmp, tmp2);
9034 tcg_temp_free_i32(tmp2);
9036 tmp2 = load_reg(s, rd);
9037 tcg_gen_add_i32(tmp, tmp, tmp2);
9038 tcg_temp_free_i32(tmp2);
9040 store_reg(s, rn, tmp);
9042 case 0x20: case 0x24: case 0x28: case 0x2c:
9043 /* Bitfield insert/clear. */
9045 shift = (insn >> 7) & 0x1f;
9046 i = (insn >> 16) & 0x1f;
9048 /* UNPREDICTABLE; we choose to UNDEF */
9053 tmp = tcg_temp_new_i32();
9054 tcg_gen_movi_i32(tmp, 0);
9056 tmp = load_reg(s, rm);
9059 tmp2 = load_reg(s, rd);
9060 tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
9061 tcg_temp_free_i32(tmp2);
9063 store_reg(s, rd, tmp);
9065 case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
9066 case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
9068 tmp = load_reg(s, rm);
9069 shift = (insn >> 7) & 0x1f;
9070 i = ((insn >> 16) & 0x1f) + 1;
9075 tcg_gen_extract_i32(tmp, tmp, shift, i);
9077 tcg_gen_sextract_i32(tmp, tmp, shift, i);
9080 store_reg(s, rd, tmp);
9090 /* Check for undefined extension instructions
9091 * per the ARM Bible IE:
9092 * xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx
9094 sh = (0xf << 20) | (0xf << 4);
9095 if (op1 == 0x7 && ((insn & sh) == sh))
9099 /* load/store byte/word */
9100 rn = (insn >> 16) & 0xf;
9101 rd = (insn >> 12) & 0xf;
9102 tmp2 = load_reg(s, rn);
9103 if ((insn & 0x01200000) == 0x00200000) {
9105 i = get_a32_user_mem_index(s);
9107 i = get_mem_index(s);
9109 if (insn & (1 << 24))
9110 gen_add_data_offset(s, insn, tmp2);
9111 if (insn & (1 << 20)) {
9113 tmp = tcg_temp_new_i32();
9114 if (insn & (1 << 22)) {
9115 gen_aa32_ld8u_iss(s, tmp, tmp2, i, rd);
9117 gen_aa32_ld32u_iss(s, tmp, tmp2, i, rd);
9121 tmp = load_reg(s, rd);
9122 if (insn & (1 << 22)) {
9123 gen_aa32_st8_iss(s, tmp, tmp2, i, rd);
9125 gen_aa32_st32_iss(s, tmp, tmp2, i, rd);
9127 tcg_temp_free_i32(tmp);
9129 if (!(insn & (1 << 24))) {
9130 gen_add_data_offset(s, insn, tmp2);
9131 store_reg(s, rn, tmp2);
9132 } else if (insn & (1 << 21)) {
9133 store_reg(s, rn, tmp2);
9135 tcg_temp_free_i32(tmp2);
9137 if (insn & (1 << 20)) {
9138 /* Complete the load. */
9139 store_reg_from_load(s, rd, tmp);
9145 int j, n, loaded_base;
9146 bool exc_return = false;
9147 bool is_load = extract32(insn, 20, 1);
9149 TCGv_i32 loaded_var;
9150 /* load/store multiple words */
9151 /* XXX: store correct base if write back */
9152 if (insn & (1 << 22)) {
9153 /* LDM (user), LDM (exception return) and STM (user) */
9155 goto illegal_op; /* only usable in supervisor mode */
9157 if (is_load && extract32(insn, 15, 1)) {
9163 rn = (insn >> 16) & 0xf;
9164 addr = load_reg(s, rn);
9166 /* compute total size */
9171 if (insn & (1 << i))
9174 /* XXX: test invalid n == 0 case ? */
9175 if (insn & (1 << 23)) {
9176 if (insn & (1 << 24)) {
9178 tcg_gen_addi_i32(addr, addr, 4);
9180 /* post increment */
9183 if (insn & (1 << 24)) {
9185 tcg_gen_addi_i32(addr, addr, -(n * 4));
9187 /* post decrement */
9189 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9194 if (insn & (1 << i)) {
9197 tmp = tcg_temp_new_i32();
9198 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
9200 tmp2 = tcg_const_i32(i);
9201 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
9202 tcg_temp_free_i32(tmp2);
9203 tcg_temp_free_i32(tmp);
9204 } else if (i == rn) {
9207 } else if (i == 15 && exc_return) {
9208 store_pc_exc_ret(s, tmp);
9210 store_reg_from_load(s, i, tmp);
9215 /* special case: r15 = PC + 8 */
9216 val = (long)s->pc + 4;
9217 tmp = tcg_temp_new_i32();
9218 tcg_gen_movi_i32(tmp, val);
9220 tmp = tcg_temp_new_i32();
9221 tmp2 = tcg_const_i32(i);
9222 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
9223 tcg_temp_free_i32(tmp2);
9225 tmp = load_reg(s, i);
9227 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
9228 tcg_temp_free_i32(tmp);
9231 /* no need to add after the last transfer */
9233 tcg_gen_addi_i32(addr, addr, 4);
9236 if (insn & (1 << 21)) {
9238 if (insn & (1 << 23)) {
9239 if (insn & (1 << 24)) {
9242 /* post increment */
9243 tcg_gen_addi_i32(addr, addr, 4);
9246 if (insn & (1 << 24)) {
9249 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9251 /* post decrement */
9252 tcg_gen_addi_i32(addr, addr, -(n * 4));
9255 store_reg(s, rn, addr);
9257 tcg_temp_free_i32(addr);
9260 store_reg(s, rn, loaded_var);
9263 /* Restore CPSR from SPSR. */
9264 tmp = load_cpu_field(spsr);
9265 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9268 gen_helper_cpsr_write_eret(cpu_env, tmp);
9269 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9272 tcg_temp_free_i32(tmp);
9273 /* Must exit loop to check un-masked IRQs */
9274 s->base.is_jmp = DISAS_EXIT;
9283 /* branch (and link) */
9284 val = (int32_t)s->pc;
9285 if (insn & (1 << 24)) {
9286 tmp = tcg_temp_new_i32();
9287 tcg_gen_movi_i32(tmp, val);
9288 store_reg(s, 14, tmp);
9290 offset = sextract32(insn << 2, 0, 26);
9298 if (((insn >> 8) & 0xe) == 10) {
9300 if (disas_vfp_insn(s, insn)) {
9303 } else if (disas_coproc_insn(s, insn)) {
9310 gen_set_pc_im(s, s->pc);
9311 s->svc_imm = extract32(insn, 0, 24);
9312 s->base.is_jmp = DISAS_SWI;
9316 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
9317 default_exception_el(s));
9323 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t insn)
9325 /* Return true if this is a 16 bit instruction. We must be precise
9326 * about this (matching the decode). We assume that s->pc still
9327 * points to the first 16 bits of the insn.
9329 if ((insn >> 11) < 0x1d) {
9330 /* Definitely a 16-bit instruction */
9334 /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
9335 * first half of a 32-bit Thumb insn. Thumb-1 cores might
9336 * end up actually treating this as two 16-bit insns, though,
9337 * if it's half of a bl/blx pair that might span a page boundary.
9339 if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
9340 arm_dc_feature(s, ARM_FEATURE_M)) {
9341 /* Thumb2 cores (including all M profile ones) always treat
9342 * 32-bit insns as 32-bit.
9347 if ((insn >> 11) == 0x1e && s->pc - s->page_start < TARGET_PAGE_SIZE - 3) {
9348 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
9349 * is not on the next page; we merge this into a 32-bit
9354 /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9355 * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9356 * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9357 * -- handle as single 16 bit insn
9362 /* Return true if this is a Thumb-2 logical op. */
9364 thumb2_logic_op(int op)
9369 /* Generate code for a Thumb-2 data processing operation. If CONDS is nonzero
9370 then set condition code flags based on the result of the operation.
9371 If SHIFTER_OUT is nonzero then set the carry flag for logical operations
9372 to the high bit of T1.
9373 Returns zero if the opcode is valid. */
9376 gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out,
9377 TCGv_i32 t0, TCGv_i32 t1)
9384 tcg_gen_and_i32(t0, t0, t1);
9388 tcg_gen_andc_i32(t0, t0, t1);
9392 tcg_gen_or_i32(t0, t0, t1);
9396 tcg_gen_orc_i32(t0, t0, t1);
9400 tcg_gen_xor_i32(t0, t0, t1);
9405 gen_add_CC(t0, t0, t1);
9407 tcg_gen_add_i32(t0, t0, t1);
9411 gen_adc_CC(t0, t0, t1);
9417 gen_sbc_CC(t0, t0, t1);
9419 gen_sub_carry(t0, t0, t1);
9424 gen_sub_CC(t0, t0, t1);
9426 tcg_gen_sub_i32(t0, t0, t1);
9430 gen_sub_CC(t0, t1, t0);
9432 tcg_gen_sub_i32(t0, t1, t0);
9434 default: /* 5, 6, 7, 9, 12, 15. */
9440 gen_set_CF_bit31(t1);
9445 /* Translate a 32-bit thumb instruction. */
9446 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9448 uint32_t imm, shift, offset;
9449 uint32_t rd, rn, rm, rs;
9461 * ARMv6-M supports a limited subset of Thumb2 instructions.
9462 * Other Thumb1 architectures allow only 32-bit
9463 * combined BL/BLX prefix and suffix.
9465 if (arm_dc_feature(s, ARM_FEATURE_M) &&
9466 !arm_dc_feature(s, ARM_FEATURE_V7)) {
9469 static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9470 0xf3b08040 /* dsb */,
9471 0xf3b08050 /* dmb */,
9472 0xf3b08060 /* isb */,
9473 0xf3e08000 /* mrs */,
9474 0xf000d000 /* bl */};
9475 static const uint32_t armv6m_mask[] = {0xffe0d000,
9482 for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9483 if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9491 } else if ((insn & 0xf800e800) != 0xf000e800) {
9495 rn = (insn >> 16) & 0xf;
9496 rs = (insn >> 12) & 0xf;
9497 rd = (insn >> 8) & 0xf;
9499 switch ((insn >> 25) & 0xf) {
9500 case 0: case 1: case 2: case 3:
9501 /* 16-bit instructions. Should never happen. */
9504 if (insn & (1 << 22)) {
9505 /* 0b1110_100x_x1xx_xxxx_xxxx_xxxx_xxxx_xxxx
9506 * - load/store doubleword, load/store exclusive, ldacq/strel,
9509 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_M) &&
9510 arm_dc_feature(s, ARM_FEATURE_V8)) {
9511 /* 0b1110_1001_0111_1111_1110_1001_0111_111
9513 * The bulk of the behaviour for this instruction is implemented
9514 * in v7m_handle_execute_nsc(), which deals with the insn when
9515 * it is executed by a CPU in non-secure state from memory
9516 * which is Secure & NonSecure-Callable.
9517 * Here we only need to handle the remaining cases:
9518 * * in NS memory (including the "security extension not
9519 * implemented" case) : NOP
9520 * * in S memory but CPU already secure (clear IT bits)
9521 * We know that the attribute for the memory this insn is
9522 * in must match the current CPU state, because otherwise
9523 * get_phys_addr_pmsav8 would have generated an exception.
9525 if (s->v8m_secure) {
9526 /* Like the IT insn, we don't need to generate any code */
9527 s->condexec_cond = 0;
9528 s->condexec_mask = 0;
9530 } else if (insn & 0x01200000) {
9531 /* 0b1110_1000_x11x_xxxx_xxxx_xxxx_xxxx_xxxx
9532 * - load/store dual (post-indexed)
9533 * 0b1111_1001_x10x_xxxx_xxxx_xxxx_xxxx_xxxx
9534 * - load/store dual (literal and immediate)
9535 * 0b1111_1001_x11x_xxxx_xxxx_xxxx_xxxx_xxxx
9536 * - load/store dual (pre-indexed)
9538 bool wback = extract32(insn, 21, 1);
9541 if (insn & (1 << 21)) {
9545 addr = tcg_temp_new_i32();
9546 tcg_gen_movi_i32(addr, s->pc & ~3);
9548 addr = load_reg(s, rn);
9550 offset = (insn & 0xff) * 4;
9551 if ((insn & (1 << 23)) == 0) {
9555 if (s->v8m_stackcheck && rn == 13 && wback) {
9557 * Here 'addr' is the current SP; if offset is +ve we're
9558 * moving SP up, else down. It is UNKNOWN whether the limit
9559 * check triggers when SP starts below the limit and ends
9560 * up above it; check whichever of the current and final
9561 * SP is lower, so QEMU will trigger in that situation.
9563 if ((int32_t)offset < 0) {
9564 TCGv_i32 newsp = tcg_temp_new_i32();
9566 tcg_gen_addi_i32(newsp, addr, offset);
9567 gen_helper_v8m_stackcheck(cpu_env, newsp);
9568 tcg_temp_free_i32(newsp);
9570 gen_helper_v8m_stackcheck(cpu_env, addr);
9574 if (insn & (1 << 24)) {
9575 tcg_gen_addi_i32(addr, addr, offset);
9578 if (insn & (1 << 20)) {
9580 tmp = tcg_temp_new_i32();
9581 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
9582 store_reg(s, rs, tmp);
9583 tcg_gen_addi_i32(addr, addr, 4);
9584 tmp = tcg_temp_new_i32();
9585 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
9586 store_reg(s, rd, tmp);
9589 tmp = load_reg(s, rs);
9590 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
9591 tcg_temp_free_i32(tmp);
9592 tcg_gen_addi_i32(addr, addr, 4);
9593 tmp = load_reg(s, rd);
9594 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
9595 tcg_temp_free_i32(tmp);
9598 /* Base writeback. */
9599 tcg_gen_addi_i32(addr, addr, offset - 4);
9600 store_reg(s, rn, addr);
9602 tcg_temp_free_i32(addr);
9604 } else if ((insn & (1 << 23)) == 0) {
9605 /* 0b1110_1000_010x_xxxx_xxxx_xxxx_xxxx_xxxx
9606 * - load/store exclusive word
9610 if (!(insn & (1 << 20)) &&
9611 arm_dc_feature(s, ARM_FEATURE_M) &&
9612 arm_dc_feature(s, ARM_FEATURE_V8)) {
9613 /* 0b1110_1000_0100_xxxx_1111_xxxx_xxxx_xxxx
9616 bool alt = insn & (1 << 7);
9617 TCGv_i32 addr, op, ttresp;
9619 if ((insn & 0x3f) || rd == 13 || rd == 15 || rn == 15) {
9620 /* we UNDEF for these UNPREDICTABLE cases */
9624 if (alt && !s->v8m_secure) {
9628 addr = load_reg(s, rn);
9629 op = tcg_const_i32(extract32(insn, 6, 2));
9630 ttresp = tcg_temp_new_i32();
9631 gen_helper_v7m_tt(ttresp, cpu_env, addr, op);
9632 tcg_temp_free_i32(addr);
9633 tcg_temp_free_i32(op);
9634 store_reg(s, rd, ttresp);
9639 addr = tcg_temp_local_new_i32();
9640 load_reg_var(s, addr, rn);
9641 tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2);
9642 if (insn & (1 << 20)) {
9643 gen_load_exclusive(s, rs, 15, addr, 2);
9645 gen_store_exclusive(s, rd, rs, 15, addr, 2);
9647 tcg_temp_free_i32(addr);
9648 } else if ((insn & (7 << 5)) == 0) {
9651 addr = tcg_temp_new_i32();
9652 tcg_gen_movi_i32(addr, s->pc);
9654 addr = load_reg(s, rn);
9656 tmp = load_reg(s, rm);
9657 tcg_gen_add_i32(addr, addr, tmp);
9658 if (insn & (1 << 4)) {
9660 tcg_gen_add_i32(addr, addr, tmp);
9661 tcg_temp_free_i32(tmp);
9662 tmp = tcg_temp_new_i32();
9663 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
9665 tcg_temp_free_i32(tmp);
9666 tmp = tcg_temp_new_i32();
9667 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
9669 tcg_temp_free_i32(addr);
9670 tcg_gen_shli_i32(tmp, tmp, 1);
9671 tcg_gen_addi_i32(tmp, tmp, s->pc);
9672 store_reg(s, 15, tmp);
9674 bool is_lasr = false;
9675 bool is_ld = extract32(insn, 20, 1);
9676 int op2 = (insn >> 6) & 0x3;
9677 op = (insn >> 4) & 0x3;
9682 /* Load/store exclusive byte/halfword/doubleword */
9689 /* Load-acquire/store-release */
9695 /* Load-acquire/store-release exclusive */
9701 if (is_lasr && !is_ld) {
9702 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9705 addr = tcg_temp_local_new_i32();
9706 load_reg_var(s, addr, rn);
9709 tmp = tcg_temp_new_i32();
9712 gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s),
9716 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s),
9720 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s),
9726 store_reg(s, rs, tmp);
9728 tmp = load_reg(s, rs);
9731 gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s),
9735 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s),
9739 gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s),
9745 tcg_temp_free_i32(tmp);
9748 gen_load_exclusive(s, rs, rd, addr, op);
9750 gen_store_exclusive(s, rm, rs, rd, addr, op);
9752 tcg_temp_free_i32(addr);
9754 if (is_lasr && is_ld) {
9755 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9759 /* Load/store multiple, RFE, SRS. */
9760 if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
9761 /* RFE, SRS: not available in user mode or on M profile */
9762 if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
9765 if (insn & (1 << 20)) {
9767 addr = load_reg(s, rn);
9768 if ((insn & (1 << 24)) == 0)
9769 tcg_gen_addi_i32(addr, addr, -8);
9770 /* Load PC into tmp and CPSR into tmp2. */
9771 tmp = tcg_temp_new_i32();
9772 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
9773 tcg_gen_addi_i32(addr, addr, 4);
9774 tmp2 = tcg_temp_new_i32();
9775 gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
9776 if (insn & (1 << 21)) {
9777 /* Base writeback. */
9778 if (insn & (1 << 24)) {
9779 tcg_gen_addi_i32(addr, addr, 4);
9781 tcg_gen_addi_i32(addr, addr, -4);
9783 store_reg(s, rn, addr);
9785 tcg_temp_free_i32(addr);
9787 gen_rfe(s, tmp, tmp2);
9790 gen_srs(s, (insn & 0x1f), (insn & (1 << 24)) ? 1 : 2,
9794 int i, loaded_base = 0;
9795 TCGv_i32 loaded_var;
9796 bool wback = extract32(insn, 21, 1);
9797 /* Load/store multiple. */
9798 addr = load_reg(s, rn);
9800 for (i = 0; i < 16; i++) {
9801 if (insn & (1 << i))
9805 if (insn & (1 << 24)) {
9806 tcg_gen_addi_i32(addr, addr, -offset);
9809 if (s->v8m_stackcheck && rn == 13 && wback) {
9811 * If the writeback is incrementing SP rather than
9812 * decrementing it, and the initial SP is below the
9813 * stack limit but the final written-back SP would
9814 * be above, then then we must not perform any memory
9815 * accesses, but it is IMPDEF whether we generate
9816 * an exception. We choose to do so in this case.
9817 * At this point 'addr' is the lowest address, so
9818 * either the original SP (if incrementing) or our
9819 * final SP (if decrementing), so that's what we check.
9821 gen_helper_v8m_stackcheck(cpu_env, addr);
9825 for (i = 0; i < 16; i++) {
9826 if ((insn & (1 << i)) == 0)
9828 if (insn & (1 << 20)) {
9830 tmp = tcg_temp_new_i32();
9831 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
9833 gen_bx_excret(s, tmp);
9834 } else if (i == rn) {
9838 store_reg(s, i, tmp);
9842 tmp = load_reg(s, i);
9843 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
9844 tcg_temp_free_i32(tmp);
9846 tcg_gen_addi_i32(addr, addr, 4);
9849 store_reg(s, rn, loaded_var);
9852 /* Base register writeback. */
9853 if (insn & (1 << 24)) {
9854 tcg_gen_addi_i32(addr, addr, -offset);
9856 /* Fault if writeback register is in register list. */
9857 if (insn & (1 << rn))
9859 store_reg(s, rn, addr);
9861 tcg_temp_free_i32(addr);
9868 op = (insn >> 21) & 0xf;
9870 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9873 /* Halfword pack. */
9874 tmp = load_reg(s, rn);
9875 tmp2 = load_reg(s, rm);
9876 shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3);
9877 if (insn & (1 << 5)) {
9881 tcg_gen_sari_i32(tmp2, tmp2, shift);
9882 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
9883 tcg_gen_ext16u_i32(tmp2, tmp2);
9887 tcg_gen_shli_i32(tmp2, tmp2, shift);
9888 tcg_gen_ext16u_i32(tmp, tmp);
9889 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
9891 tcg_gen_or_i32(tmp, tmp, tmp2);
9892 tcg_temp_free_i32(tmp2);
9893 store_reg(s, rd, tmp);
9895 /* Data processing register constant shift. */
9897 tmp = tcg_temp_new_i32();
9898 tcg_gen_movi_i32(tmp, 0);
9900 tmp = load_reg(s, rn);
9902 tmp2 = load_reg(s, rm);
9904 shiftop = (insn >> 4) & 3;
9905 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
9906 conds = (insn & (1 << 20)) != 0;
9907 logic_cc = (conds && thumb2_logic_op(op));
9908 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
9909 if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
9911 tcg_temp_free_i32(tmp2);
9913 ((op == 2 && rn == 15) ||
9914 (op == 8 && rn == 13) ||
9915 (op == 13 && rn == 13))) {
9916 /* MOV SP, ... or ADD SP, SP, ... or SUB SP, SP, ... */
9917 store_sp_checked(s, tmp);
9918 } else if (rd != 15) {
9919 store_reg(s, rd, tmp);
9921 tcg_temp_free_i32(tmp);
9925 case 13: /* Misc data processing. */
9926 op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
9927 if (op < 4 && (insn & 0xf000) != 0xf000)
9930 case 0: /* Register controlled shift. */
9931 tmp = load_reg(s, rn);
9932 tmp2 = load_reg(s, rm);
9933 if ((insn & 0x70) != 0)
9936 * 0b1111_1010_0xxx_xxxx_1111_xxxx_0000_xxxx:
9937 * - MOV, MOVS (register-shifted register), flagsetting
9939 op = (insn >> 21) & 3;
9940 logic_cc = (insn & (1 << 20)) != 0;
9941 gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
9944 store_reg(s, rd, tmp);
9946 case 1: /* Sign/zero extend. */
9947 op = (insn >> 20) & 7;
9949 case 0: /* SXTAH, SXTH */
9950 case 1: /* UXTAH, UXTH */
9951 case 4: /* SXTAB, SXTB */
9952 case 5: /* UXTAB, UXTB */
9954 case 2: /* SXTAB16, SXTB16 */
9955 case 3: /* UXTAB16, UXTB16 */
9956 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9964 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9968 tmp = load_reg(s, rm);
9969 shift = (insn >> 4) & 3;
9970 /* ??? In many cases it's not necessary to do a
9971 rotate, a shift is sufficient. */
9973 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
9974 op = (insn >> 20) & 7;
9976 case 0: gen_sxth(tmp); break;
9977 case 1: gen_uxth(tmp); break;
9978 case 2: gen_sxtb16(tmp); break;
9979 case 3: gen_uxtb16(tmp); break;
9980 case 4: gen_sxtb(tmp); break;
9981 case 5: gen_uxtb(tmp); break;
9983 g_assert_not_reached();
9986 tmp2 = load_reg(s, rn);
9987 if ((op >> 1) == 1) {
9988 gen_add16(tmp, tmp2);
9990 tcg_gen_add_i32(tmp, tmp, tmp2);
9991 tcg_temp_free_i32(tmp2);
9994 store_reg(s, rd, tmp);
9996 case 2: /* SIMD add/subtract. */
9997 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10000 op = (insn >> 20) & 7;
10001 shift = (insn >> 4) & 7;
10002 if ((op & 3) == 3 || (shift & 3) == 3)
10004 tmp = load_reg(s, rn);
10005 tmp2 = load_reg(s, rm);
10006 gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
10007 tcg_temp_free_i32(tmp2);
10008 store_reg(s, rd, tmp);
10010 case 3: /* Other data processing. */
10011 op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
10013 /* Saturating add/subtract. */
10014 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10017 tmp = load_reg(s, rn);
10018 tmp2 = load_reg(s, rm);
10020 gen_helper_double_saturate(tmp, cpu_env, tmp);
10022 gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
10024 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
10025 tcg_temp_free_i32(tmp2);
10028 case 0x0a: /* rbit */
10029 case 0x08: /* rev */
10030 case 0x09: /* rev16 */
10031 case 0x0b: /* revsh */
10032 case 0x18: /* clz */
10034 case 0x10: /* sel */
10035 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10039 case 0x20: /* crc32/crc32c */
10045 if (!dc_isar_feature(aa32_crc32, s)) {
10052 tmp = load_reg(s, rn);
10054 case 0x0a: /* rbit */
10055 gen_helper_rbit(tmp, tmp);
10057 case 0x08: /* rev */
10058 tcg_gen_bswap32_i32(tmp, tmp);
10060 case 0x09: /* rev16 */
10063 case 0x0b: /* revsh */
10066 case 0x10: /* sel */
10067 tmp2 = load_reg(s, rm);
10068 tmp3 = tcg_temp_new_i32();
10069 tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
10070 gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
10071 tcg_temp_free_i32(tmp3);
10072 tcg_temp_free_i32(tmp2);
10074 case 0x18: /* clz */
10075 tcg_gen_clzi_i32(tmp, tmp, 32);
10085 uint32_t sz = op & 0x3;
10086 uint32_t c = op & 0x8;
10088 tmp2 = load_reg(s, rm);
10090 tcg_gen_andi_i32(tmp2, tmp2, 0xff);
10091 } else if (sz == 1) {
10092 tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
10094 tmp3 = tcg_const_i32(1 << sz);
10096 gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
10098 gen_helper_crc32(tmp, tmp, tmp2, tmp3);
10100 tcg_temp_free_i32(tmp2);
10101 tcg_temp_free_i32(tmp3);
10105 g_assert_not_reached();
10108 store_reg(s, rd, tmp);
10110 case 4: case 5: /* 32-bit multiply. Sum of absolute differences. */
10111 switch ((insn >> 20) & 7) {
10112 case 0: /* 32 x 32 -> 32 */
10113 case 7: /* Unsigned sum of absolute differences. */
10115 case 1: /* 16 x 16 -> 32 */
10116 case 2: /* Dual multiply add. */
10117 case 3: /* 32 * 16 -> 32msb */
10118 case 4: /* Dual multiply subtract. */
10119 case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
10120 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10125 op = (insn >> 4) & 0xf;
10126 tmp = load_reg(s, rn);
10127 tmp2 = load_reg(s, rm);
10128 switch ((insn >> 20) & 7) {
10129 case 0: /* 32 x 32 -> 32 */
10130 tcg_gen_mul_i32(tmp, tmp, tmp2);
10131 tcg_temp_free_i32(tmp2);
10133 tmp2 = load_reg(s, rs);
10135 tcg_gen_sub_i32(tmp, tmp2, tmp);
10137 tcg_gen_add_i32(tmp, tmp, tmp2);
10138 tcg_temp_free_i32(tmp2);
10141 case 1: /* 16 x 16 -> 32 */
10142 gen_mulxy(tmp, tmp2, op & 2, op & 1);
10143 tcg_temp_free_i32(tmp2);
10145 tmp2 = load_reg(s, rs);
10146 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
10147 tcg_temp_free_i32(tmp2);
10150 case 2: /* Dual multiply add. */
10151 case 4: /* Dual multiply subtract. */
10153 gen_swap_half(tmp2);
10154 gen_smul_dual(tmp, tmp2);
10155 if (insn & (1 << 22)) {
10156 /* This subtraction cannot overflow. */
10157 tcg_gen_sub_i32(tmp, tmp, tmp2);
10159 /* This addition cannot overflow 32 bits;
10160 * however it may overflow considered as a signed
10161 * operation, in which case we must set the Q flag.
10163 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
10165 tcg_temp_free_i32(tmp2);
10168 tmp2 = load_reg(s, rs);
10169 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
10170 tcg_temp_free_i32(tmp2);
10173 case 3: /* 32 * 16 -> 32msb */
10175 tcg_gen_sari_i32(tmp2, tmp2, 16);
10178 tmp64 = gen_muls_i64_i32(tmp, tmp2);
10179 tcg_gen_shri_i64(tmp64, tmp64, 16);
10180 tmp = tcg_temp_new_i32();
10181 tcg_gen_extrl_i64_i32(tmp, tmp64);
10182 tcg_temp_free_i64(tmp64);
10185 tmp2 = load_reg(s, rs);
10186 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
10187 tcg_temp_free_i32(tmp2);
10190 case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
10191 tmp64 = gen_muls_i64_i32(tmp, tmp2);
10193 tmp = load_reg(s, rs);
10194 if (insn & (1 << 20)) {
10195 tmp64 = gen_addq_msw(tmp64, tmp);
10197 tmp64 = gen_subq_msw(tmp64, tmp);
10200 if (insn & (1 << 4)) {
10201 tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
10203 tcg_gen_shri_i64(tmp64, tmp64, 32);
10204 tmp = tcg_temp_new_i32();
10205 tcg_gen_extrl_i64_i32(tmp, tmp64);
10206 tcg_temp_free_i64(tmp64);
10208 case 7: /* Unsigned sum of absolute differences. */
10209 gen_helper_usad8(tmp, tmp, tmp2);
10210 tcg_temp_free_i32(tmp2);
10212 tmp2 = load_reg(s, rs);
10213 tcg_gen_add_i32(tmp, tmp, tmp2);
10214 tcg_temp_free_i32(tmp2);
10218 store_reg(s, rd, tmp);
10220 case 6: case 7: /* 64-bit multiply, Divide. */
10221 op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
10222 tmp = load_reg(s, rn);
10223 tmp2 = load_reg(s, rm);
10224 if ((op & 0x50) == 0x10) {
10226 if (!dc_isar_feature(thumb_div, s)) {
10230 gen_helper_udiv(tmp, tmp, tmp2);
10232 gen_helper_sdiv(tmp, tmp, tmp2);
10233 tcg_temp_free_i32(tmp2);
10234 store_reg(s, rd, tmp);
10235 } else if ((op & 0xe) == 0xc) {
10236 /* Dual multiply accumulate long. */
10237 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10238 tcg_temp_free_i32(tmp);
10239 tcg_temp_free_i32(tmp2);
10243 gen_swap_half(tmp2);
10244 gen_smul_dual(tmp, tmp2);
10246 tcg_gen_sub_i32(tmp, tmp, tmp2);
10248 tcg_gen_add_i32(tmp, tmp, tmp2);
10250 tcg_temp_free_i32(tmp2);
10252 tmp64 = tcg_temp_new_i64();
10253 tcg_gen_ext_i32_i64(tmp64, tmp);
10254 tcg_temp_free_i32(tmp);
10255 gen_addq(s, tmp64, rs, rd);
10256 gen_storeq_reg(s, rs, rd, tmp64);
10257 tcg_temp_free_i64(tmp64);
10260 /* Unsigned 64-bit multiply */
10261 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
10265 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10266 tcg_temp_free_i32(tmp2);
10267 tcg_temp_free_i32(tmp);
10270 gen_mulxy(tmp, tmp2, op & 2, op & 1);
10271 tcg_temp_free_i32(tmp2);
10272 tmp64 = tcg_temp_new_i64();
10273 tcg_gen_ext_i32_i64(tmp64, tmp);
10274 tcg_temp_free_i32(tmp);
10276 /* Signed 64-bit multiply */
10277 tmp64 = gen_muls_i64_i32(tmp, tmp2);
10282 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10283 tcg_temp_free_i64(tmp64);
10286 gen_addq_lo(s, tmp64, rs);
10287 gen_addq_lo(s, tmp64, rd);
10288 } else if (op & 0x40) {
10289 /* 64-bit accumulate. */
10290 gen_addq(s, tmp64, rs, rd);
10292 gen_storeq_reg(s, rs, rd, tmp64);
10293 tcg_temp_free_i64(tmp64);
10298 case 6: case 7: case 14: case 15:
10300 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10301 /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10302 if (extract32(insn, 24, 2) == 3) {
10303 goto illegal_op; /* op0 = 0b11 : unallocated */
10307 * Decode VLLDM and VLSTM first: these are nonstandard because:
10308 * * if there is no FPU then these insns must NOP in
10309 * Secure state and UNDEF in Nonsecure state
10310 * * if there is an FPU then these insns do not have
10311 * the usual behaviour that disas_vfp_insn() provides of
10312 * being controlled by CPACR/NSACR enable bits or the
10313 * lazy-stacking logic.
10315 if (arm_dc_feature(s, ARM_FEATURE_V8) &&
10316 (insn & 0xffa00f00) == 0xec200a00) {
10317 /* 0b1110_1100_0x1x_xxxx_xxxx_1010_xxxx_xxxx
10319 * We choose to UNDEF if the RAZ bits are non-zero.
10321 if (!s->v8m_secure || (insn & 0x0040f0ff)) {
10325 if (arm_dc_feature(s, ARM_FEATURE_VFP)) {
10326 TCGv_i32 fptr = load_reg(s, rn);
10328 if (extract32(insn, 20, 1)) {
10329 gen_helper_v7m_vlldm(cpu_env, fptr);
10331 gen_helper_v7m_vlstm(cpu_env, fptr);
10333 tcg_temp_free_i32(fptr);
10335 /* End the TB, because we have updated FP control bits */
10336 s->base.is_jmp = DISAS_UPDATE;
10340 if (arm_dc_feature(s, ARM_FEATURE_VFP) &&
10341 ((insn >> 8) & 0xe) == 10) {
10342 /* FP, and the CPU supports it */
10343 if (disas_vfp_insn(s, insn)) {
10349 /* All other insns: NOCP */
10350 gen_exception_insn(s, 4, EXCP_NOCP, syn_uncategorized(),
10351 default_exception_el(s));
10354 if ((insn & 0xfe000a00) == 0xfc000800
10355 && arm_dc_feature(s, ARM_FEATURE_V8)) {
10356 /* The Thumb2 and ARM encodings are identical. */
10357 if (disas_neon_insn_3same_ext(s, insn)) {
10360 } else if ((insn & 0xff000a00) == 0xfe000800
10361 && arm_dc_feature(s, ARM_FEATURE_V8)) {
10362 /* The Thumb2 and ARM encodings are identical. */
10363 if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
10366 } else if (((insn >> 24) & 3) == 3) {
10367 /* Translate into the equivalent ARM encoding. */
10368 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10369 if (disas_neon_data_insn(s, insn)) {
10372 } else if (((insn >> 8) & 0xe) == 10) {
10373 if (disas_vfp_insn(s, insn)) {
10377 if (insn & (1 << 28))
10379 if (disas_coproc_insn(s, insn)) {
10384 case 8: case 9: case 10: case 11:
10385 if (insn & (1 << 15)) {
10386 /* Branches, misc control. */
10387 if (insn & 0x5000) {
10388 /* Unconditional branch. */
10389 /* signextend(hw1[10:0]) -> offset[:12]. */
10390 offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
10391 /* hw1[10:0] -> offset[11:1]. */
10392 offset |= (insn & 0x7ff) << 1;
10393 /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
10394 offset[24:22] already have the same value because of the
10395 sign extension above. */
10396 offset ^= ((~insn) & (1 << 13)) << 10;
10397 offset ^= ((~insn) & (1 << 11)) << 11;
10399 if (insn & (1 << 14)) {
10400 /* Branch and link. */
10401 tcg_gen_movi_i32(cpu_R[14], s->pc | 1);
10405 if (insn & (1 << 12)) {
10407 gen_jmp(s, offset);
10410 offset &= ~(uint32_t)2;
10411 /* thumb2 bx, no need to check */
10412 gen_bx_im(s, offset);
10414 } else if (((insn >> 23) & 7) == 7) {
10416 if (insn & (1 << 13))
10419 if (insn & (1 << 26)) {
10420 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10423 if (!(insn & (1 << 20))) {
10424 /* Hypervisor call (v7) */
10425 int imm16 = extract32(insn, 16, 4) << 12
10426 | extract32(insn, 0, 12);
10433 /* Secure monitor call (v6+) */
10441 op = (insn >> 20) & 7;
10443 case 0: /* msr cpsr. */
10444 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10445 tmp = load_reg(s, rn);
10446 /* the constant is the mask and SYSm fields */
10447 addr = tcg_const_i32(insn & 0xfff);
10448 gen_helper_v7m_msr(cpu_env, addr, tmp);
10449 tcg_temp_free_i32(addr);
10450 tcg_temp_free_i32(tmp);
10455 case 1: /* msr spsr. */
10456 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10460 if (extract32(insn, 5, 1)) {
10462 int sysm = extract32(insn, 8, 4) |
10463 (extract32(insn, 4, 1) << 4);
10466 gen_msr_banked(s, r, sysm, rm);
10470 /* MSR (for PSRs) */
10471 tmp = load_reg(s, rn);
10473 msr_mask(s, (insn >> 8) & 0xf, op == 1),
10477 case 2: /* cps, nop-hint. */
10478 if (((insn >> 8) & 7) == 0) {
10479 gen_nop_hint(s, insn & 0xff);
10481 /* Implemented as NOP in user mode. */
10486 if (insn & (1 << 10)) {
10487 if (insn & (1 << 7))
10489 if (insn & (1 << 6))
10491 if (insn & (1 << 5))
10493 if (insn & (1 << 9))
10494 imm = CPSR_A | CPSR_I | CPSR_F;
10496 if (insn & (1 << 8)) {
10498 imm |= (insn & 0x1f);
10501 gen_set_psr_im(s, offset, 0, imm);
10504 case 3: /* Special control operations. */
10505 if (!arm_dc_feature(s, ARM_FEATURE_V7) &&
10506 !arm_dc_feature(s, ARM_FEATURE_M)) {
10509 op = (insn >> 4) & 0xf;
10511 case 2: /* clrex */
10516 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10519 /* We need to break the TB after this insn
10520 * to execute self-modifying code correctly
10521 * and also to take any pending interrupts
10524 gen_goto_tb(s, 0, s->pc & ~1);
10527 if ((insn & 0xf) || !dc_isar_feature(aa32_sb, s)) {
10531 * TODO: There is no speculation barrier opcode
10532 * for TCG; MB and end the TB instead.
10534 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10535 gen_goto_tb(s, 0, s->pc & ~1);
10542 /* Trivial implementation equivalent to bx.
10543 * This instruction doesn't exist at all for M-profile.
10545 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10548 tmp = load_reg(s, rn);
10551 case 5: /* Exception return. */
10555 if (rn != 14 || rd != 15) {
10558 if (s->current_el == 2) {
10559 /* ERET from Hyp uses ELR_Hyp, not LR */
10563 tmp = load_cpu_field(elr_el[2]);
10565 tmp = load_reg(s, rn);
10566 tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
10568 gen_exception_return(s, tmp);
10571 if (extract32(insn, 5, 1) &&
10572 !arm_dc_feature(s, ARM_FEATURE_M)) {
10574 int sysm = extract32(insn, 16, 4) |
10575 (extract32(insn, 4, 1) << 4);
10577 gen_mrs_banked(s, 0, sysm, rd);
10581 if (extract32(insn, 16, 4) != 0xf) {
10584 if (!arm_dc_feature(s, ARM_FEATURE_M) &&
10585 extract32(insn, 0, 8) != 0) {
10590 tmp = tcg_temp_new_i32();
10591 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10592 addr = tcg_const_i32(insn & 0xff);
10593 gen_helper_v7m_mrs(tmp, cpu_env, addr);
10594 tcg_temp_free_i32(addr);
10596 gen_helper_cpsr_read(tmp, cpu_env);
10598 store_reg(s, rd, tmp);
10601 if (extract32(insn, 5, 1) &&
10602 !arm_dc_feature(s, ARM_FEATURE_M)) {
10604 int sysm = extract32(insn, 16, 4) |
10605 (extract32(insn, 4, 1) << 4);
10607 gen_mrs_banked(s, 1, sysm, rd);
10612 /* Not accessible in user mode. */
10613 if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
10617 if (extract32(insn, 16, 4) != 0xf ||
10618 extract32(insn, 0, 8) != 0) {
10622 tmp = load_cpu_field(spsr);
10623 store_reg(s, rd, tmp);
10628 /* Conditional branch. */
10629 op = (insn >> 22) & 0xf;
10630 /* Generate a conditional jump to next instruction. */
10631 arm_skip_unless(s, op);
10633 /* offset[11:1] = insn[10:0] */
10634 offset = (insn & 0x7ff) << 1;
10635 /* offset[17:12] = insn[21:16]. */
10636 offset |= (insn & 0x003f0000) >> 4;
10637 /* offset[31:20] = insn[26]. */
10638 offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
10639 /* offset[18] = insn[13]. */
10640 offset |= (insn & (1 << 13)) << 5;
10641 /* offset[19] = insn[11]. */
10642 offset |= (insn & (1 << 11)) << 8;
10644 /* jump to the offset */
10645 gen_jmp(s, s->pc + offset);
10649 * 0b1111_0xxx_xxxx_0xxx_xxxx_xxxx
10650 * - Data-processing (modified immediate, plain binary immediate)
10652 if (insn & (1 << 25)) {
10654 * 0b1111_0x1x_xxxx_0xxx_xxxx_xxxx
10655 * - Data-processing (plain binary immediate)
10657 if (insn & (1 << 24)) {
10658 if (insn & (1 << 20))
10660 /* Bitfield/Saturate. */
10661 op = (insn >> 21) & 7;
10663 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
10665 tmp = tcg_temp_new_i32();
10666 tcg_gen_movi_i32(tmp, 0);
10668 tmp = load_reg(s, rn);
10671 case 2: /* Signed bitfield extract. */
10673 if (shift + imm > 32)
10676 tcg_gen_sextract_i32(tmp, tmp, shift, imm);
10679 case 6: /* Unsigned bitfield extract. */
10681 if (shift + imm > 32)
10684 tcg_gen_extract_i32(tmp, tmp, shift, imm);
10687 case 3: /* Bitfield insert/clear. */
10690 imm = imm + 1 - shift;
10692 tmp2 = load_reg(s, rd);
10693 tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
10694 tcg_temp_free_i32(tmp2);
10699 default: /* Saturate. */
10702 tcg_gen_sari_i32(tmp, tmp, shift);
10704 tcg_gen_shli_i32(tmp, tmp, shift);
10706 tmp2 = tcg_const_i32(imm);
10709 if ((op & 1) && shift == 0) {
10710 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10711 tcg_temp_free_i32(tmp);
10712 tcg_temp_free_i32(tmp2);
10715 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
10717 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
10721 if ((op & 1) && shift == 0) {
10722 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10723 tcg_temp_free_i32(tmp);
10724 tcg_temp_free_i32(tmp2);
10727 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
10729 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
10732 tcg_temp_free_i32(tmp2);
10735 store_reg(s, rd, tmp);
10737 imm = ((insn & 0x04000000) >> 15)
10738 | ((insn & 0x7000) >> 4) | (insn & 0xff);
10739 if (insn & (1 << 22)) {
10740 /* 16-bit immediate. */
10741 imm |= (insn >> 4) & 0xf000;
10742 if (insn & (1 << 23)) {
10744 tmp = load_reg(s, rd);
10745 tcg_gen_ext16u_i32(tmp, tmp);
10746 tcg_gen_ori_i32(tmp, tmp, imm << 16);
10749 tmp = tcg_temp_new_i32();
10750 tcg_gen_movi_i32(tmp, imm);
10752 store_reg(s, rd, tmp);
10754 /* Add/sub 12-bit immediate. */
10756 offset = s->pc & ~(uint32_t)3;
10757 if (insn & (1 << 23))
10761 tmp = tcg_temp_new_i32();
10762 tcg_gen_movi_i32(tmp, offset);
10763 store_reg(s, rd, tmp);
10765 tmp = load_reg(s, rn);
10766 if (insn & (1 << 23))
10767 tcg_gen_subi_i32(tmp, tmp, imm);
10769 tcg_gen_addi_i32(tmp, tmp, imm);
10770 if (rn == 13 && rd == 13) {
10771 /* ADD SP, SP, imm or SUB SP, SP, imm */
10772 store_sp_checked(s, tmp);
10774 store_reg(s, rd, tmp);
10781 * 0b1111_0x0x_xxxx_0xxx_xxxx_xxxx
10782 * - Data-processing (modified immediate)
10784 int shifter_out = 0;
10785 /* modified 12-bit immediate. */
10786 shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
10787 imm = (insn & 0xff);
10790 /* Nothing to do. */
10792 case 1: /* 00XY00XY */
10795 case 2: /* XY00XY00 */
10799 case 3: /* XYXYXYXY */
10803 default: /* Rotated constant. */
10804 shift = (shift << 1) | (imm >> 7);
10806 imm = imm << (32 - shift);
10810 tmp2 = tcg_temp_new_i32();
10811 tcg_gen_movi_i32(tmp2, imm);
10812 rn = (insn >> 16) & 0xf;
10814 tmp = tcg_temp_new_i32();
10815 tcg_gen_movi_i32(tmp, 0);
10817 tmp = load_reg(s, rn);
10819 op = (insn >> 21) & 0xf;
10820 if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
10821 shifter_out, tmp, tmp2))
10823 tcg_temp_free_i32(tmp2);
10824 rd = (insn >> 8) & 0xf;
10825 if (rd == 13 && rn == 13
10826 && (op == 8 || op == 13)) {
10827 /* ADD(S) SP, SP, imm or SUB(S) SP, SP, imm */
10828 store_sp_checked(s, tmp);
10829 } else if (rd != 15) {
10830 store_reg(s, rd, tmp);
10832 tcg_temp_free_i32(tmp);
10837 case 12: /* Load/store single data item. */
10844 if ((insn & 0x01100000) == 0x01000000) {
10845 if (disas_neon_ls_insn(s, insn)) {
10850 op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
10852 if (!(insn & (1 << 20))) {
10856 /* Byte or halfword load space with dest == r15 : memory hints.
10857 * Catch them early so we don't emit pointless addressing code.
10858 * This space is a mix of:
10859 * PLD/PLDW/PLI, which we implement as NOPs (note that unlike
10860 * the ARM encodings, PLDW space doesn't UNDEF for non-v7MP
10862 * unallocated hints, which must be treated as NOPs
10863 * UNPREDICTABLE space, which we NOP or UNDEF depending on
10864 * which is easiest for the decoding logic
10865 * Some space which must UNDEF
10867 int op1 = (insn >> 23) & 3;
10868 int op2 = (insn >> 6) & 0x3f;
10873 /* UNPREDICTABLE, unallocated hint or
10874 * PLD/PLDW/PLI (literal)
10879 return; /* PLD/PLDW/PLI or unallocated hint */
10881 if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) {
10882 return; /* PLD/PLDW/PLI or unallocated hint */
10884 /* UNDEF space, or an UNPREDICTABLE */
10888 memidx = get_mem_index(s);
10890 addr = tcg_temp_new_i32();
10892 /* s->pc has already been incremented by 4. */
10893 imm = s->pc & 0xfffffffc;
10894 if (insn & (1 << 23))
10895 imm += insn & 0xfff;
10897 imm -= insn & 0xfff;
10898 tcg_gen_movi_i32(addr, imm);
10900 addr = load_reg(s, rn);
10901 if (insn & (1 << 23)) {
10902 /* Positive offset. */
10903 imm = insn & 0xfff;
10904 tcg_gen_addi_i32(addr, addr, imm);
10907 switch ((insn >> 8) & 0xf) {
10908 case 0x0: /* Shifted Register. */
10909 shift = (insn >> 4) & 0xf;
10911 tcg_temp_free_i32(addr);
10914 tmp = load_reg(s, rm);
10916 tcg_gen_shli_i32(tmp, tmp, shift);
10917 tcg_gen_add_i32(addr, addr, tmp);
10918 tcg_temp_free_i32(tmp);
10920 case 0xc: /* Negative offset. */
10921 tcg_gen_addi_i32(addr, addr, -imm);
10923 case 0xe: /* User privilege. */
10924 tcg_gen_addi_i32(addr, addr, imm);
10925 memidx = get_a32_user_mem_index(s);
10927 case 0x9: /* Post-decrement. */
10929 /* Fall through. */
10930 case 0xb: /* Post-increment. */
10934 case 0xd: /* Pre-decrement. */
10936 /* Fall through. */
10937 case 0xf: /* Pre-increment. */
10941 tcg_temp_free_i32(addr);
10947 issinfo = writeback ? ISSInvalid : rs;
10949 if (s->v8m_stackcheck && rn == 13 && writeback) {
10951 * Stackcheck. Here we know 'addr' is the current SP;
10952 * if imm is +ve we're moving SP up, else down. It is
10953 * UNKNOWN whether the limit check triggers when SP starts
10954 * below the limit and ends up above it; we chose to do so.
10956 if ((int32_t)imm < 0) {
10957 TCGv_i32 newsp = tcg_temp_new_i32();
10959 tcg_gen_addi_i32(newsp, addr, imm);
10960 gen_helper_v8m_stackcheck(cpu_env, newsp);
10961 tcg_temp_free_i32(newsp);
10963 gen_helper_v8m_stackcheck(cpu_env, addr);
10967 if (writeback && !postinc) {
10968 tcg_gen_addi_i32(addr, addr, imm);
10971 if (insn & (1 << 20)) {
10973 tmp = tcg_temp_new_i32();
10976 gen_aa32_ld8u_iss(s, tmp, addr, memidx, issinfo);
10979 gen_aa32_ld8s_iss(s, tmp, addr, memidx, issinfo);
10982 gen_aa32_ld16u_iss(s, tmp, addr, memidx, issinfo);
10985 gen_aa32_ld16s_iss(s, tmp, addr, memidx, issinfo);
10988 gen_aa32_ld32u_iss(s, tmp, addr, memidx, issinfo);
10991 tcg_temp_free_i32(tmp);
10992 tcg_temp_free_i32(addr);
10996 gen_bx_excret(s, tmp);
10998 store_reg(s, rs, tmp);
11002 tmp = load_reg(s, rs);
11005 gen_aa32_st8_iss(s, tmp, addr, memidx, issinfo);
11008 gen_aa32_st16_iss(s, tmp, addr, memidx, issinfo);
11011 gen_aa32_st32_iss(s, tmp, addr, memidx, issinfo);
11014 tcg_temp_free_i32(tmp);
11015 tcg_temp_free_i32(addr);
11018 tcg_temp_free_i32(tmp);
11021 tcg_gen_addi_i32(addr, addr, imm);
11023 store_reg(s, rn, addr);
11025 tcg_temp_free_i32(addr);
11034 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
11035 default_exception_el(s));
11038 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
11040 uint32_t val, op, rm, rn, rd, shift, cond;
11047 switch (insn >> 12) {
11051 op = (insn >> 11) & 3;
11054 * 0b0001_1xxx_xxxx_xxxx
11055 * - Add, subtract (three low registers)
11056 * - Add, subtract (two low registers and immediate)
11058 rn = (insn >> 3) & 7;
11059 tmp = load_reg(s, rn);
11060 if (insn & (1 << 10)) {
11062 tmp2 = tcg_temp_new_i32();
11063 tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
11066 rm = (insn >> 6) & 7;
11067 tmp2 = load_reg(s, rm);
11069 if (insn & (1 << 9)) {
11070 if (s->condexec_mask)
11071 tcg_gen_sub_i32(tmp, tmp, tmp2);
11073 gen_sub_CC(tmp, tmp, tmp2);
11075 if (s->condexec_mask)
11076 tcg_gen_add_i32(tmp, tmp, tmp2);
11078 gen_add_CC(tmp, tmp, tmp2);
11080 tcg_temp_free_i32(tmp2);
11081 store_reg(s, rd, tmp);
11083 /* shift immediate */
11084 rm = (insn >> 3) & 7;
11085 shift = (insn >> 6) & 0x1f;
11086 tmp = load_reg(s, rm);
11087 gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
11088 if (!s->condexec_mask)
11090 store_reg(s, rd, tmp);
11095 * 0b001x_xxxx_xxxx_xxxx
11096 * - Add, subtract, compare, move (one low register and immediate)
11098 op = (insn >> 11) & 3;
11099 rd = (insn >> 8) & 0x7;
11100 if (op == 0) { /* mov */
11101 tmp = tcg_temp_new_i32();
11102 tcg_gen_movi_i32(tmp, insn & 0xff);
11103 if (!s->condexec_mask)
11105 store_reg(s, rd, tmp);
11107 tmp = load_reg(s, rd);
11108 tmp2 = tcg_temp_new_i32();
11109 tcg_gen_movi_i32(tmp2, insn & 0xff);
11112 gen_sub_CC(tmp, tmp, tmp2);
11113 tcg_temp_free_i32(tmp);
11114 tcg_temp_free_i32(tmp2);
11117 if (s->condexec_mask)
11118 tcg_gen_add_i32(tmp, tmp, tmp2);
11120 gen_add_CC(tmp, tmp, tmp2);
11121 tcg_temp_free_i32(tmp2);
11122 store_reg(s, rd, tmp);
11125 if (s->condexec_mask)
11126 tcg_gen_sub_i32(tmp, tmp, tmp2);
11128 gen_sub_CC(tmp, tmp, tmp2);
11129 tcg_temp_free_i32(tmp2);
11130 store_reg(s, rd, tmp);
11136 if (insn & (1 << 11)) {
11137 rd = (insn >> 8) & 7;
11138 /* load pc-relative. Bit 1 of PC is ignored. */
11139 val = s->pc + 2 + ((insn & 0xff) * 4);
11140 val &= ~(uint32_t)2;
11141 addr = tcg_temp_new_i32();
11142 tcg_gen_movi_i32(addr, val);
11143 tmp = tcg_temp_new_i32();
11144 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s),
11146 tcg_temp_free_i32(addr);
11147 store_reg(s, rd, tmp);
11150 if (insn & (1 << 10)) {
11151 /* 0b0100_01xx_xxxx_xxxx
11152 * - data processing extended, branch and exchange
11154 rd = (insn & 7) | ((insn >> 4) & 8);
11155 rm = (insn >> 3) & 0xf;
11156 op = (insn >> 8) & 3;
11159 tmp = load_reg(s, rd);
11160 tmp2 = load_reg(s, rm);
11161 tcg_gen_add_i32(tmp, tmp, tmp2);
11162 tcg_temp_free_i32(tmp2);
11164 /* ADD SP, SP, reg */
11165 store_sp_checked(s, tmp);
11167 store_reg(s, rd, tmp);
11171 tmp = load_reg(s, rd);
11172 tmp2 = load_reg(s, rm);
11173 gen_sub_CC(tmp, tmp, tmp2);
11174 tcg_temp_free_i32(tmp2);
11175 tcg_temp_free_i32(tmp);
11177 case 2: /* mov/cpy */
11178 tmp = load_reg(s, rm);
11181 store_sp_checked(s, tmp);
11183 store_reg(s, rd, tmp);
11188 /* 0b0100_0111_xxxx_xxxx
11189 * - branch [and link] exchange thumb register
11191 bool link = insn & (1 << 7);
11200 /* BXNS/BLXNS: only exists for v8M with the
11201 * security extensions, and always UNDEF if NonSecure.
11202 * We don't implement these in the user-only mode
11203 * either (in theory you can use them from Secure User
11204 * mode but they are too tied in to system emulation.)
11206 if (!s->v8m_secure || IS_USER_ONLY) {
11217 tmp = load_reg(s, rm);
11219 val = (uint32_t)s->pc | 1;
11220 tmp2 = tcg_temp_new_i32();
11221 tcg_gen_movi_i32(tmp2, val);
11222 store_reg(s, 14, tmp2);
11225 /* Only BX works as exception-return, not BLX */
11226 gen_bx_excret(s, tmp);
11235 * 0b0100_00xx_xxxx_xxxx
11236 * - Data-processing (two low registers)
11239 rm = (insn >> 3) & 7;
11240 op = (insn >> 6) & 0xf;
11241 if (op == 2 || op == 3 || op == 4 || op == 7) {
11242 /* the shift/rotate ops want the operands backwards */
11251 if (op == 9) { /* neg */
11252 tmp = tcg_temp_new_i32();
11253 tcg_gen_movi_i32(tmp, 0);
11254 } else if (op != 0xf) { /* mvn doesn't read its first operand */
11255 tmp = load_reg(s, rd);
11260 tmp2 = load_reg(s, rm);
11262 case 0x0: /* and */
11263 tcg_gen_and_i32(tmp, tmp, tmp2);
11264 if (!s->condexec_mask)
11267 case 0x1: /* eor */
11268 tcg_gen_xor_i32(tmp, tmp, tmp2);
11269 if (!s->condexec_mask)
11272 case 0x2: /* lsl */
11273 if (s->condexec_mask) {
11274 gen_shl(tmp2, tmp2, tmp);
11276 gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
11277 gen_logic_CC(tmp2);
11280 case 0x3: /* lsr */
11281 if (s->condexec_mask) {
11282 gen_shr(tmp2, tmp2, tmp);
11284 gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
11285 gen_logic_CC(tmp2);
11288 case 0x4: /* asr */
11289 if (s->condexec_mask) {
11290 gen_sar(tmp2, tmp2, tmp);
11292 gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
11293 gen_logic_CC(tmp2);
11296 case 0x5: /* adc */
11297 if (s->condexec_mask) {
11298 gen_adc(tmp, tmp2);
11300 gen_adc_CC(tmp, tmp, tmp2);
11303 case 0x6: /* sbc */
11304 if (s->condexec_mask) {
11305 gen_sub_carry(tmp, tmp, tmp2);
11307 gen_sbc_CC(tmp, tmp, tmp2);
11310 case 0x7: /* ror */
11311 if (s->condexec_mask) {
11312 tcg_gen_andi_i32(tmp, tmp, 0x1f);
11313 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
11315 gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
11316 gen_logic_CC(tmp2);
11319 case 0x8: /* tst */
11320 tcg_gen_and_i32(tmp, tmp, tmp2);
11324 case 0x9: /* neg */
11325 if (s->condexec_mask)
11326 tcg_gen_neg_i32(tmp, tmp2);
11328 gen_sub_CC(tmp, tmp, tmp2);
11330 case 0xa: /* cmp */
11331 gen_sub_CC(tmp, tmp, tmp2);
11334 case 0xb: /* cmn */
11335 gen_add_CC(tmp, tmp, tmp2);
11338 case 0xc: /* orr */
11339 tcg_gen_or_i32(tmp, tmp, tmp2);
11340 if (!s->condexec_mask)
11343 case 0xd: /* mul */
11344 tcg_gen_mul_i32(tmp, tmp, tmp2);
11345 if (!s->condexec_mask)
11348 case 0xe: /* bic */
11349 tcg_gen_andc_i32(tmp, tmp, tmp2);
11350 if (!s->condexec_mask)
11353 case 0xf: /* mvn */
11354 tcg_gen_not_i32(tmp2, tmp2);
11355 if (!s->condexec_mask)
11356 gen_logic_CC(tmp2);
11363 store_reg(s, rm, tmp2);
11365 tcg_temp_free_i32(tmp);
11367 store_reg(s, rd, tmp);
11368 tcg_temp_free_i32(tmp2);
11371 tcg_temp_free_i32(tmp);
11372 tcg_temp_free_i32(tmp2);
11377 /* load/store register offset. */
11379 rn = (insn >> 3) & 7;
11380 rm = (insn >> 6) & 7;
11381 op = (insn >> 9) & 7;
11382 addr = load_reg(s, rn);
11383 tmp = load_reg(s, rm);
11384 tcg_gen_add_i32(addr, addr, tmp);
11385 tcg_temp_free_i32(tmp);
11387 if (op < 3) { /* store */
11388 tmp = load_reg(s, rd);
11390 tmp = tcg_temp_new_i32();
11395 gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11398 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11401 gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11403 case 3: /* ldrsb */
11404 gen_aa32_ld8s_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11407 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11410 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11413 gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11415 case 7: /* ldrsh */
11416 gen_aa32_ld16s_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11419 if (op >= 3) { /* load */
11420 store_reg(s, rd, tmp);
11422 tcg_temp_free_i32(tmp);
11424 tcg_temp_free_i32(addr);
11428 /* load/store word immediate offset */
11430 rn = (insn >> 3) & 7;
11431 addr = load_reg(s, rn);
11432 val = (insn >> 4) & 0x7c;
11433 tcg_gen_addi_i32(addr, addr, val);
11435 if (insn & (1 << 11)) {
11437 tmp = tcg_temp_new_i32();
11438 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
11439 store_reg(s, rd, tmp);
11442 tmp = load_reg(s, rd);
11443 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
11444 tcg_temp_free_i32(tmp);
11446 tcg_temp_free_i32(addr);
11450 /* load/store byte immediate offset */
11452 rn = (insn >> 3) & 7;
11453 addr = load_reg(s, rn);
11454 val = (insn >> 6) & 0x1f;
11455 tcg_gen_addi_i32(addr, addr, val);
11457 if (insn & (1 << 11)) {
11459 tmp = tcg_temp_new_i32();
11460 gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11461 store_reg(s, rd, tmp);
11464 tmp = load_reg(s, rd);
11465 gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11466 tcg_temp_free_i32(tmp);
11468 tcg_temp_free_i32(addr);
11472 /* load/store halfword immediate offset */
11474 rn = (insn >> 3) & 7;
11475 addr = load_reg(s, rn);
11476 val = (insn >> 5) & 0x3e;
11477 tcg_gen_addi_i32(addr, addr, val);
11479 if (insn & (1 << 11)) {
11481 tmp = tcg_temp_new_i32();
11482 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11483 store_reg(s, rd, tmp);
11486 tmp = load_reg(s, rd);
11487 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11488 tcg_temp_free_i32(tmp);
11490 tcg_temp_free_i32(addr);
11494 /* load/store from stack */
11495 rd = (insn >> 8) & 7;
11496 addr = load_reg(s, 13);
11497 val = (insn & 0xff) * 4;
11498 tcg_gen_addi_i32(addr, addr, val);
11500 if (insn & (1 << 11)) {
11502 tmp = tcg_temp_new_i32();
11503 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11504 store_reg(s, rd, tmp);
11507 tmp = load_reg(s, rd);
11508 gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11509 tcg_temp_free_i32(tmp);
11511 tcg_temp_free_i32(addr);
11516 * 0b1010_xxxx_xxxx_xxxx
11517 * - Add PC/SP (immediate)
11519 rd = (insn >> 8) & 7;
11520 if (insn & (1 << 11)) {
11522 tmp = load_reg(s, 13);
11524 /* PC. bit 1 is ignored. */
11525 tmp = tcg_temp_new_i32();
11526 tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
11528 val = (insn & 0xff) * 4;
11529 tcg_gen_addi_i32(tmp, tmp, val);
11530 store_reg(s, rd, tmp);
11535 op = (insn >> 8) & 0xf;
11539 * 0b1011_0000_xxxx_xxxx
11540 * - ADD (SP plus immediate)
11541 * - SUB (SP minus immediate)
11543 tmp = load_reg(s, 13);
11544 val = (insn & 0x7f) * 4;
11545 if (insn & (1 << 7))
11546 val = -(int32_t)val;
11547 tcg_gen_addi_i32(tmp, tmp, val);
11548 store_sp_checked(s, tmp);
11551 case 2: /* sign/zero extend. */
11554 rm = (insn >> 3) & 7;
11555 tmp = load_reg(s, rm);
11556 switch ((insn >> 6) & 3) {
11557 case 0: gen_sxth(tmp); break;
11558 case 1: gen_sxtb(tmp); break;
11559 case 2: gen_uxth(tmp); break;
11560 case 3: gen_uxtb(tmp); break;
11562 store_reg(s, rd, tmp);
11564 case 4: case 5: case 0xc: case 0xd:
11566 * 0b1011_x10x_xxxx_xxxx
11569 addr = load_reg(s, 13);
11570 if (insn & (1 << 8))
11574 for (i = 0; i < 8; i++) {
11575 if (insn & (1 << i))
11578 if ((insn & (1 << 11)) == 0) {
11579 tcg_gen_addi_i32(addr, addr, -offset);
11582 if (s->v8m_stackcheck) {
11584 * Here 'addr' is the lower of "old SP" and "new SP";
11585 * if this is a pop that starts below the limit and ends
11586 * above it, it is UNKNOWN whether the limit check triggers;
11587 * we choose to trigger.
11589 gen_helper_v8m_stackcheck(cpu_env, addr);
11592 for (i = 0; i < 8; i++) {
11593 if (insn & (1 << i)) {
11594 if (insn & (1 << 11)) {
11596 tmp = tcg_temp_new_i32();
11597 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
11598 store_reg(s, i, tmp);
11601 tmp = load_reg(s, i);
11602 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
11603 tcg_temp_free_i32(tmp);
11605 /* advance to the next address. */
11606 tcg_gen_addi_i32(addr, addr, 4);
11610 if (insn & (1 << 8)) {
11611 if (insn & (1 << 11)) {
11613 tmp = tcg_temp_new_i32();
11614 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
11615 /* don't set the pc until the rest of the instruction
11619 tmp = load_reg(s, 14);
11620 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
11621 tcg_temp_free_i32(tmp);
11623 tcg_gen_addi_i32(addr, addr, 4);
11625 if ((insn & (1 << 11)) == 0) {
11626 tcg_gen_addi_i32(addr, addr, -offset);
11628 /* write back the new stack pointer */
11629 store_reg(s, 13, addr);
11630 /* set the new PC value */
11631 if ((insn & 0x0900) == 0x0900) {
11632 store_reg_from_load(s, 15, tmp);
11636 case 1: case 3: case 9: case 11: /* czb */
11638 tmp = load_reg(s, rm);
11639 arm_gen_condlabel(s);
11640 if (insn & (1 << 11))
11641 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
11643 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
11644 tcg_temp_free_i32(tmp);
11645 offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
11646 val = (uint32_t)s->pc + 2;
11651 case 15: /* IT, nop-hint. */
11652 if ((insn & 0xf) == 0) {
11653 gen_nop_hint(s, (insn >> 4) & 0xf);
11657 s->condexec_cond = (insn >> 4) & 0xe;
11658 s->condexec_mask = insn & 0x1f;
11659 /* No actual code generated for this insn, just setup state. */
11662 case 0xe: /* bkpt */
11664 int imm8 = extract32(insn, 0, 8);
11666 gen_exception_bkpt_insn(s, 2, syn_aa32_bkpt(imm8, true));
11670 case 0xa: /* rev, and hlt */
11672 int op1 = extract32(insn, 6, 2);
11676 int imm6 = extract32(insn, 0, 6);
11682 /* Otherwise this is rev */
11684 rn = (insn >> 3) & 0x7;
11686 tmp = load_reg(s, rn);
11688 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
11689 case 1: gen_rev16(tmp); break;
11690 case 3: gen_revsh(tmp); break;
11692 g_assert_not_reached();
11694 store_reg(s, rd, tmp);
11699 switch ((insn >> 5) & 7) {
11703 if (((insn >> 3) & 1) != !!(s->be_data == MO_BE)) {
11704 gen_helper_setend(cpu_env);
11705 s->base.is_jmp = DISAS_UPDATE;
11714 if (arm_dc_feature(s, ARM_FEATURE_M)) {
11715 tmp = tcg_const_i32((insn & (1 << 4)) != 0);
11718 addr = tcg_const_i32(19);
11719 gen_helper_v7m_msr(cpu_env, addr, tmp);
11720 tcg_temp_free_i32(addr);
11724 addr = tcg_const_i32(16);
11725 gen_helper_v7m_msr(cpu_env, addr, tmp);
11726 tcg_temp_free_i32(addr);
11728 tcg_temp_free_i32(tmp);
11731 if (insn & (1 << 4)) {
11732 shift = CPSR_A | CPSR_I | CPSR_F;
11736 gen_set_psr_im(s, ((insn & 7) << 6), 0, shift);
11751 /* load/store multiple */
11752 TCGv_i32 loaded_var = NULL;
11753 rn = (insn >> 8) & 0x7;
11754 addr = load_reg(s, rn);
11755 for (i = 0; i < 8; i++) {
11756 if (insn & (1 << i)) {
11757 if (insn & (1 << 11)) {
11759 tmp = tcg_temp_new_i32();
11760 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
11764 store_reg(s, i, tmp);
11768 tmp = load_reg(s, i);
11769 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
11770 tcg_temp_free_i32(tmp);
11772 /* advance to the next address */
11773 tcg_gen_addi_i32(addr, addr, 4);
11776 if ((insn & (1 << rn)) == 0) {
11777 /* base reg not in list: base register writeback */
11778 store_reg(s, rn, addr);
11780 /* base reg in list: if load, complete it now */
11781 if (insn & (1 << 11)) {
11782 store_reg(s, rn, loaded_var);
11784 tcg_temp_free_i32(addr);
11789 /* conditional branch or swi */
11790 cond = (insn >> 8) & 0xf;
11796 gen_set_pc_im(s, s->pc);
11797 s->svc_imm = extract32(insn, 0, 8);
11798 s->base.is_jmp = DISAS_SWI;
11801 /* generate a conditional jump to next instruction */
11802 arm_skip_unless(s, cond);
11804 /* jump to the offset */
11805 val = (uint32_t)s->pc + 2;
11806 offset = ((int32_t)insn << 24) >> 24;
11807 val += offset << 1;
11812 if (insn & (1 << 11)) {
11813 /* thumb_insn_is_16bit() ensures we can't get here for
11814 * a Thumb2 CPU, so this must be a thumb1 split BL/BLX:
11815 * 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF)
11817 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
11819 offset = ((insn & 0x7ff) << 1);
11820 tmp = load_reg(s, 14);
11821 tcg_gen_addi_i32(tmp, tmp, offset);
11822 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
11824 tmp2 = tcg_temp_new_i32();
11825 tcg_gen_movi_i32(tmp2, s->pc | 1);
11826 store_reg(s, 14, tmp2);
11830 /* unconditional branch */
11831 val = (uint32_t)s->pc;
11832 offset = ((int32_t)insn << 21) >> 21;
11833 val += (offset << 1) + 2;
11838 /* thumb_insn_is_16bit() ensures we can't get here for
11839 * a Thumb2 CPU, so this must be a thumb1 split BL/BLX.
11841 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
11843 if (insn & (1 << 11)) {
11844 /* 0b1111_1xxx_xxxx_xxxx : BL suffix */
11845 offset = ((insn & 0x7ff) << 1) | 1;
11846 tmp = load_reg(s, 14);
11847 tcg_gen_addi_i32(tmp, tmp, offset);
11849 tmp2 = tcg_temp_new_i32();
11850 tcg_gen_movi_i32(tmp2, s->pc | 1);
11851 store_reg(s, 14, tmp2);
11854 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix */
11855 uint32_t uoffset = ((int32_t)insn << 21) >> 9;
11857 tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + uoffset);
11864 gen_exception_insn(s, 2, EXCP_UDEF, syn_uncategorized(),
11865 default_exception_el(s));
11868 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
11870 /* Return true if the insn at dc->pc might cross a page boundary.
11871 * (False positives are OK, false negatives are not.)
11872 * We know this is a Thumb insn, and our caller ensures we are
11873 * only called if dc->pc is less than 4 bytes from the page
11874 * boundary, so we cross the page if the first 16 bits indicate
11875 * that this is a 32 bit insn.
11877 uint16_t insn = arm_lduw_code(env, s->pc, s->sctlr_b);
11879 return !thumb_insn_is_16bit(s, insn);
11882 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
11884 DisasContext *dc = container_of(dcbase, DisasContext, base);
11885 CPUARMState *env = cs->env_ptr;
11886 ARMCPU *cpu = env_archcpu(env);
11887 uint32_t tb_flags = dc->base.tb->flags;
11888 uint32_t condexec, core_mmu_idx;
11890 dc->isar = &cpu->isar;
11891 dc->pc = dc->base.pc_first;
11895 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11896 * there is no secure EL1, so we route exceptions to EL3.
11898 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11899 !arm_el_is_aa64(env, 3);
11900 dc->thumb = FIELD_EX32(tb_flags, TBFLAG_A32, THUMB);
11901 dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
11902 dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
11903 condexec = FIELD_EX32(tb_flags, TBFLAG_A32, CONDEXEC);
11904 dc->condexec_mask = (condexec & 0xf) << 1;
11905 dc->condexec_cond = condexec >> 4;
11906 core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
11907 dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
11908 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11909 #if !defined(CONFIG_USER_ONLY)
11910 dc->user = (dc->current_el == 0);
11912 dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
11913 dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
11914 dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
11915 dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
11916 if (arm_feature(env, ARM_FEATURE_XSCALE)) {
11917 dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
11918 dc->vec_stride = 0;
11920 dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
11923 dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_A32, HANDLER);
11924 dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
11925 regime_is_secure(env, dc->mmu_idx);
11926 dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_A32, STACKCHECK);
11927 dc->v8m_fpccr_s_wrong = FIELD_EX32(tb_flags, TBFLAG_A32, FPCCR_S_WRONG);
11928 dc->v7m_new_fp_ctxt_needed =
11929 FIELD_EX32(tb_flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED);
11930 dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_A32, LSPACT);
11931 dc->cp_regs = cpu->cp_regs;
11932 dc->features = env->features;
11934 /* Single step state. The code-generation logic here is:
11936 * generate code with no special handling for single-stepping (except
11937 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11938 * this happens anyway because those changes are all system register or
11940 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11941 * emit code for one insn
11942 * emit code to clear PSTATE.SS
11943 * emit code to generate software step exception for completed step
11944 * end TB (as usual for having generated an exception)
11945 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11946 * emit code to generate a software step exception
11949 dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
11950 dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
11951 dc->is_ldex = false;
11952 dc->ss_same_el = false; /* Can't be true since EL_d must be AArch64 */
11954 dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
11956 /* If architectural single step active, limit to 1. */
11957 if (is_singlestepping(dc)) {
11958 dc->base.max_insns = 1;
11961 /* ARM is a fixed-length ISA. Bound the number of insns to execute
11962 to those left on the page. */
11964 int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
11965 dc->base.max_insns = MIN(dc->base.max_insns, bound);
11968 cpu_F0s = tcg_temp_new_i32();
11969 cpu_F1s = tcg_temp_new_i32();
11970 cpu_F0d = tcg_temp_new_i64();
11971 cpu_F1d = tcg_temp_new_i64();
11974 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
11975 cpu_M0 = tcg_temp_new_i64();
11978 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
11980 DisasContext *dc = container_of(dcbase, DisasContext, base);
11982 /* A note on handling of the condexec (IT) bits:
11984 * We want to avoid the overhead of having to write the updated condexec
11985 * bits back to the CPUARMState for every instruction in an IT block. So:
11986 * (1) if the condexec bits are not already zero then we write
11987 * zero back into the CPUARMState now. This avoids complications trying
11988 * to do it at the end of the block. (For example if we don't do this
11989 * it's hard to identify whether we can safely skip writing condexec
11990 * at the end of the TB, which we definitely want to do for the case
11991 * where a TB doesn't do anything with the IT state at all.)
11992 * (2) if we are going to leave the TB then we call gen_set_condexec()
11993 * which will write the correct value into CPUARMState if zero is wrong.
11994 * This is done both for leaving the TB at the end, and for leaving
11995 * it because of an exception we know will happen, which is done in
11996 * gen_exception_insn(). The latter is necessary because we need to
11997 * leave the TB with the PC/IT state just prior to execution of the
11998 * instruction which caused the exception.
11999 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
12000 * then the CPUARMState will be wrong and we need to reset it.
12001 * This is handled in the same way as restoration of the
12002 * PC in these situations; we save the value of the condexec bits
12003 * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
12004 * then uses this to restore them after an exception.
12006 * Note that there are no instructions which can read the condexec
12007 * bits, and none which can write non-static values to them, so
12008 * we don't need to care about whether CPUARMState is correct in the
12012 /* Reset the conditional execution bits immediately. This avoids
12013 complications trying to do it at the end of the block. */
12014 if (dc->condexec_mask || dc->condexec_cond) {
12015 TCGv_i32 tmp = tcg_temp_new_i32();
12016 tcg_gen_movi_i32(tmp, 0);
12017 store_cpu_field(tmp, condexec_bits);
12021 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
12023 DisasContext *dc = container_of(dcbase, DisasContext, base);
12025 tcg_gen_insn_start(dc->pc,
12026 (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
12028 dc->insn_start = tcg_last_op();
12031 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
12032 const CPUBreakpoint *bp)
12034 DisasContext *dc = container_of(dcbase, DisasContext, base);
12036 if (bp->flags & BP_CPU) {
12037 gen_set_condexec(dc);
12038 gen_set_pc_im(dc, dc->pc);
12039 gen_helper_check_breakpoints(cpu_env);
12040 /* End the TB early; it's likely not going to be executed */
12041 dc->base.is_jmp = DISAS_TOO_MANY;
12043 gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
12044 /* The address covered by the breakpoint must be
12045 included in [tb->pc, tb->pc + tb->size) in order
12046 to for it to be properly cleared -- thus we
12047 increment the PC here so that the logic setting
12048 tb->size below does the right thing. */
12049 /* TODO: Advance PC by correct instruction length to
12050 * avoid disassembler error messages */
12052 dc->base.is_jmp = DISAS_NORETURN;
12058 static bool arm_pre_translate_insn(DisasContext *dc)
12060 #ifdef CONFIG_USER_ONLY
12061 /* Intercept jump to the magic kernel page. */
12062 if (dc->pc >= 0xffff0000) {
12063 /* We always get here via a jump, so know we are not in a
12064 conditional execution block. */
12065 gen_exception_internal(EXCP_KERNEL_TRAP);
12066 dc->base.is_jmp = DISAS_NORETURN;
12071 if (dc->ss_active && !dc->pstate_ss) {
12072 /* Singlestep state is Active-pending.
12073 * If we're in this state at the start of a TB then either
12074 * a) we just took an exception to an EL which is being debugged
12075 * and this is the first insn in the exception handler
12076 * b) debug exceptions were masked and we just unmasked them
12077 * without changing EL (eg by clearing PSTATE.D)
12078 * In either case we're going to take a swstep exception in the
12079 * "did not step an insn" case, and so the syndrome ISV and EX
12080 * bits should be zero.
12082 assert(dc->base.num_insns == 1);
12083 gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
12084 default_exception_el(dc));
12085 dc->base.is_jmp = DISAS_NORETURN;
12092 static void arm_post_translate_insn(DisasContext *dc)
12094 if (dc->condjmp && !dc->base.is_jmp) {
12095 gen_set_label(dc->condlabel);
12098 dc->base.pc_next = dc->pc;
12099 translator_loop_temp_check(&dc->base);
12102 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
12104 DisasContext *dc = container_of(dcbase, DisasContext, base);
12105 CPUARMState *env = cpu->env_ptr;
12108 if (arm_pre_translate_insn(dc)) {
12112 insn = arm_ldl_code(env, dc->pc, dc->sctlr_b);
12115 disas_arm_insn(dc, insn);
12117 arm_post_translate_insn(dc);
12119 /* ARM is a fixed-length ISA. We performed the cross-page check
12120 in init_disas_context by adjusting max_insns. */
12123 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
12125 /* Return true if this Thumb insn is always unconditional,
12126 * even inside an IT block. This is true of only a very few
12127 * instructions: BKPT, HLT, and SG.
12129 * A larger class of instructions are UNPREDICTABLE if used
12130 * inside an IT block; we do not need to detect those here, because
12131 * what we do by default (perform the cc check and update the IT
12132 * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
12133 * choice for those situations.
12135 * insn is either a 16-bit or a 32-bit instruction; the two are
12136 * distinguishable because for the 16-bit case the top 16 bits
12137 * are zeroes, and that isn't a valid 32-bit encoding.
12139 if ((insn & 0xffffff00) == 0xbe00) {
12144 if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
12145 !arm_dc_feature(s, ARM_FEATURE_M)) {
12146 /* HLT: v8A only. This is unconditional even when it is going to
12147 * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
12148 * For v7 cores this was a plain old undefined encoding and so
12149 * honours its cc check. (We might be using the encoding as
12150 * a semihosting trap, but we don't change the cc check behaviour
12151 * on that account, because a debugger connected to a real v7A
12152 * core and emulating semihosting traps by catching the UNDEF
12153 * exception would also only see cases where the cc check passed.
12154 * No guest code should be trying to do a HLT semihosting trap
12155 * in an IT block anyway.
12160 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
12161 arm_dc_feature(s, ARM_FEATURE_M)) {
12169 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
12171 DisasContext *dc = container_of(dcbase, DisasContext, base);
12172 CPUARMState *env = cpu->env_ptr;
12176 if (arm_pre_translate_insn(dc)) {
12180 insn = arm_lduw_code(env, dc->pc, dc->sctlr_b);
12181 is_16bit = thumb_insn_is_16bit(dc, insn);
12184 uint32_t insn2 = arm_lduw_code(env, dc->pc, dc->sctlr_b);
12186 insn = insn << 16 | insn2;
12191 if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
12192 uint32_t cond = dc->condexec_cond;
12194 if (cond != 0x0e) { /* Skip conditional when condition is AL. */
12195 arm_skip_unless(dc, cond);
12200 disas_thumb_insn(dc, insn);
12202 disas_thumb2_insn(dc, insn);
12205 /* Advance the Thumb condexec condition. */
12206 if (dc->condexec_mask) {
12207 dc->condexec_cond = ((dc->condexec_cond & 0xe) |
12208 ((dc->condexec_mask >> 4) & 1));
12209 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
12210 if (dc->condexec_mask == 0) {
12211 dc->condexec_cond = 0;
12215 arm_post_translate_insn(dc);
12217 /* Thumb is a variable-length ISA. Stop translation when the next insn
12218 * will touch a new page. This ensures that prefetch aborts occur at
12221 * We want to stop the TB if the next insn starts in a new page,
12222 * or if it spans between this page and the next. This means that
12223 * if we're looking at the last halfword in the page we need to
12224 * see if it's a 16-bit Thumb insn (which will fit in this TB)
12225 * or a 32-bit Thumb insn (which won't).
12226 * This is to avoid generating a silly TB with a single 16-bit insn
12227 * in it at the end of this page (which would execute correctly
12228 * but isn't very efficient).
12230 if (dc->base.is_jmp == DISAS_NEXT
12231 && (dc->pc - dc->page_start >= TARGET_PAGE_SIZE
12232 || (dc->pc - dc->page_start >= TARGET_PAGE_SIZE - 3
12233 && insn_crosses_page(env, dc)))) {
12234 dc->base.is_jmp = DISAS_TOO_MANY;
12238 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
12240 DisasContext *dc = container_of(dcbase, DisasContext, base);
12242 if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
12243 /* FIXME: This can theoretically happen with self-modifying code. */
12244 cpu_abort(cpu, "IO on conditional branch instruction");
12247 /* At this stage dc->condjmp will only be set when the skipped
12248 instruction was a conditional branch or trap, and the PC has
12249 already been written. */
12250 gen_set_condexec(dc);
12251 if (dc->base.is_jmp == DISAS_BX_EXCRET) {
12252 /* Exception return branches need some special case code at the
12253 * end of the TB, which is complex enough that it has to
12254 * handle the single-step vs not and the condition-failed
12255 * insn codepath itself.
12257 gen_bx_excret_final_code(dc);
12258 } else if (unlikely(is_singlestepping(dc))) {
12259 /* Unconditional and "condition passed" instruction codepath. */
12260 switch (dc->base.is_jmp) {
12262 gen_ss_advance(dc);
12263 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
12264 default_exception_el(dc));
12267 gen_ss_advance(dc);
12268 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
12271 gen_ss_advance(dc);
12272 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
12275 case DISAS_TOO_MANY:
12277 gen_set_pc_im(dc, dc->pc);
12280 /* FIXME: Single stepping a WFI insn will not halt the CPU. */
12281 gen_singlestep_exception(dc);
12283 case DISAS_NORETURN:
12287 /* While branches must always occur at the end of an IT block,
12288 there are a few other things that can cause us to terminate
12289 the TB in the middle of an IT block:
12290 - Exception generating instructions (bkpt, swi, undefined).
12292 - Hardware watchpoints.
12293 Hardware breakpoints have already been handled and skip this code.
12295 switch(dc->base.is_jmp) {
12297 case DISAS_TOO_MANY:
12298 gen_goto_tb(dc, 1, dc->pc);
12304 gen_set_pc_im(dc, dc->pc);
12307 /* indicate that the hash table must be used to find the next TB */
12308 tcg_gen_exit_tb(NULL, 0);
12310 case DISAS_NORETURN:
12311 /* nothing more to generate */
12315 TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
12316 !(dc->insn & (1U << 31))) ? 2 : 4);
12318 gen_helper_wfi(cpu_env, tmp);
12319 tcg_temp_free_i32(tmp);
12320 /* The helper doesn't necessarily throw an exception, but we
12321 * must go back to the main loop to check for interrupts anyway.
12323 tcg_gen_exit_tb(NULL, 0);
12327 gen_helper_wfe(cpu_env);
12330 gen_helper_yield(cpu_env);
12333 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
12334 default_exception_el(dc));
12337 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
12340 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
12346 /* "Condition failed" instruction codepath for the branch/trap insn */
12347 gen_set_label(dc->condlabel);
12348 gen_set_condexec(dc);
12349 if (unlikely(is_singlestepping(dc))) {
12350 gen_set_pc_im(dc, dc->pc);
12351 gen_singlestep_exception(dc);
12353 gen_goto_tb(dc, 1, dc->pc);
12357 /* Functions above can change dc->pc, so re-align db->pc_next */
12358 dc->base.pc_next = dc->pc;
12361 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
12363 DisasContext *dc = container_of(dcbase, DisasContext, base);
12365 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
12366 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
12369 static const TranslatorOps arm_translator_ops = {
12370 .init_disas_context = arm_tr_init_disas_context,
12371 .tb_start = arm_tr_tb_start,
12372 .insn_start = arm_tr_insn_start,
12373 .breakpoint_check = arm_tr_breakpoint_check,
12374 .translate_insn = arm_tr_translate_insn,
12375 .tb_stop = arm_tr_tb_stop,
12376 .disas_log = arm_tr_disas_log,
12379 static const TranslatorOps thumb_translator_ops = {
12380 .init_disas_context = arm_tr_init_disas_context,
12381 .tb_start = arm_tr_tb_start,
12382 .insn_start = arm_tr_insn_start,
12383 .breakpoint_check = arm_tr_breakpoint_check,
12384 .translate_insn = thumb_tr_translate_insn,
12385 .tb_stop = arm_tr_tb_stop,
12386 .disas_log = arm_tr_disas_log,
12389 /* generate intermediate code for basic block 'tb'. */
12390 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
12393 const TranslatorOps *ops = &arm_translator_ops;
12395 if (FIELD_EX32(tb->flags, TBFLAG_A32, THUMB)) {
12396 ops = &thumb_translator_ops;
12398 #ifdef TARGET_AARCH64
12399 if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
12400 ops = &aarch64_translator_ops;
12404 translator_loop(ops, &dc.base, cpu, tb, max_insns);
12407 void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
12409 ARMCPU *cpu = ARM_CPU(cs);
12410 CPUARMState *env = &cpu->env;
12414 aarch64_cpu_dump_state(cs, f, flags);
12418 for(i=0;i<16;i++) {
12419 qemu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
12421 qemu_fprintf(f, "\n");
12423 qemu_fprintf(f, " ");
12426 if (arm_feature(env, ARM_FEATURE_M)) {
12427 uint32_t xpsr = xpsr_read(env);
12429 const char *ns_status = "";
12431 if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
12432 ns_status = env->v7m.secure ? "S " : "NS ";
12435 if (xpsr & XPSR_EXCP) {
12438 if (env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_NPRIV_MASK) {
12439 mode = "unpriv-thread";
12441 mode = "priv-thread";
12445 qemu_fprintf(f, "XPSR=%08x %c%c%c%c %c %s%s\n",
12447 xpsr & XPSR_N ? 'N' : '-',
12448 xpsr & XPSR_Z ? 'Z' : '-',
12449 xpsr & XPSR_C ? 'C' : '-',
12450 xpsr & XPSR_V ? 'V' : '-',
12451 xpsr & XPSR_T ? 'T' : 'A',
12455 uint32_t psr = cpsr_read(env);
12456 const char *ns_status = "";
12458 if (arm_feature(env, ARM_FEATURE_EL3) &&
12459 (psr & CPSR_M) != ARM_CPU_MODE_MON) {
12460 ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
12463 qemu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%s%d\n",
12465 psr & CPSR_N ? 'N' : '-',
12466 psr & CPSR_Z ? 'Z' : '-',
12467 psr & CPSR_C ? 'C' : '-',
12468 psr & CPSR_V ? 'V' : '-',
12469 psr & CPSR_T ? 'T' : 'A',
12471 aarch32_mode_name(psr), (psr & 0x10) ? 32 : 26);
12474 if (flags & CPU_DUMP_FPU) {
12475 int numvfpregs = 0;
12476 if (arm_feature(env, ARM_FEATURE_VFP)) {
12479 if (arm_feature(env, ARM_FEATURE_VFP3)) {
12482 for (i = 0; i < numvfpregs; i++) {
12483 uint64_t v = *aa32_vfp_dreg(env, i);
12484 qemu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
12485 i * 2, (uint32_t)v,
12486 i * 2 + 1, (uint32_t)(v >> 32),
12489 qemu_fprintf(f, "FPSCR: %08x\n", vfp_get_fpscr(env));
12493 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
12494 target_ulong *data)
12498 env->condexec_bits = 0;
12499 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
12501 env->regs[15] = data[0];
12502 env->condexec_bits = data[1];
12503 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;