4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include "qemu/osdep.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
28 #include "tcg-op-gvec.h"
30 #include "qemu/bitops.h"
32 #include "exec/semihost.h"
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
37 #include "trace-tcg.h"
41 #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J dc_isar_feature(jazelle, s)
46 #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
54 #include "translate.h"
56 #if defined(CONFIG_USER_ONLY)
59 #define IS_USER(s) (s->user)
62 /* We reuse the same 64-bit temporaries for efficiency. */
63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
64 static TCGv_i32 cpu_R[16];
65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
66 TCGv_i64 cpu_exclusive_addr;
67 TCGv_i64 cpu_exclusive_val;
69 /* FIXME: These should be removed. */
70 static TCGv_i32 cpu_F0s, cpu_F1s;
71 static TCGv_i64 cpu_F0d, cpu_F1d;
73 #include "exec/gen-icount.h"
75 static const char * const regnames[] =
76 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
77 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
79 /* Function prototypes for gen_ functions calling Neon helpers. */
80 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
83 /* initialize TCG globals. */
84 void arm_translate_init(void)
88 for (i = 0; i < 16; i++) {
89 cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
90 offsetof(CPUARMState, regs[i]),
93 cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
94 cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
95 cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
96 cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
98 cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
99 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
100 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
101 offsetof(CPUARMState, exclusive_val), "exclusive_val");
103 a64_translate_init();
106 /* Flags for the disas_set_da_iss info argument:
107 * lower bits hold the Rt register number, higher bits are flags.
109 typedef enum ISSInfo {
112 ISSInvalid = (1 << 5),
113 ISSIsAcqRel = (1 << 6),
114 ISSIsWrite = (1 << 7),
115 ISSIs16Bit = (1 << 8),
118 /* Save the syndrome information for a Data Abort */
119 static void disas_set_da_iss(DisasContext *s, TCGMemOp memop, ISSInfo issinfo)
122 int sas = memop & MO_SIZE;
123 bool sse = memop & MO_SIGN;
124 bool is_acqrel = issinfo & ISSIsAcqRel;
125 bool is_write = issinfo & ISSIsWrite;
126 bool is_16bit = issinfo & ISSIs16Bit;
127 int srt = issinfo & ISSRegMask;
129 if (issinfo & ISSInvalid) {
130 /* Some callsites want to conditionally provide ISS info,
131 * eg "only if this was not a writeback"
137 /* For AArch32, insns where the src/dest is R15 never generate
138 * ISS information. Catching that here saves checking at all
144 syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
145 0, 0, 0, is_write, 0, is_16bit);
146 disas_set_insn_syndrome(s, syn);
149 static inline int get_a32_user_mem_index(DisasContext *s)
151 /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
153 * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
154 * otherwise, access as if at PL0.
156 switch (s->mmu_idx) {
157 case ARMMMUIdx_S1E2: /* this one is UNPREDICTABLE */
158 case ARMMMUIdx_S12NSE0:
159 case ARMMMUIdx_S12NSE1:
160 return arm_to_core_mmu_idx(ARMMMUIdx_S12NSE0);
162 case ARMMMUIdx_S1SE0:
163 case ARMMMUIdx_S1SE1:
164 return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0);
165 case ARMMMUIdx_MUser:
166 case ARMMMUIdx_MPriv:
167 return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
168 case ARMMMUIdx_MUserNegPri:
169 case ARMMMUIdx_MPrivNegPri:
170 return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
171 case ARMMMUIdx_MSUser:
172 case ARMMMUIdx_MSPriv:
173 return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
174 case ARMMMUIdx_MSUserNegPri:
175 case ARMMMUIdx_MSPrivNegPri:
176 return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
179 g_assert_not_reached();
183 static inline TCGv_i32 load_cpu_offset(int offset)
185 TCGv_i32 tmp = tcg_temp_new_i32();
186 tcg_gen_ld_i32(tmp, cpu_env, offset);
190 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
192 static inline void store_cpu_offset(TCGv_i32 var, int offset)
194 tcg_gen_st_i32(var, cpu_env, offset);
195 tcg_temp_free_i32(var);
198 #define store_cpu_field(var, name) \
199 store_cpu_offset(var, offsetof(CPUARMState, name))
201 /* Set a variable to the value of a CPU register. */
202 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
206 /* normally, since we updated PC, we need only to add one insn */
208 addr = (long)s->pc + 2;
210 addr = (long)s->pc + 4;
211 tcg_gen_movi_i32(var, addr);
213 tcg_gen_mov_i32(var, cpu_R[reg]);
217 /* Create a new temporary and set it to the value of a CPU register. */
218 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
220 TCGv_i32 tmp = tcg_temp_new_i32();
221 load_reg_var(s, tmp, reg);
225 /* Set a CPU register. The source must be a temporary and will be
227 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
230 /* In Thumb mode, we must ignore bit 0.
231 * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
232 * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
233 * We choose to ignore [1:0] in ARM mode for all architecture versions.
235 tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
236 s->base.is_jmp = DISAS_JUMP;
238 tcg_gen_mov_i32(cpu_R[reg], var);
239 tcg_temp_free_i32(var);
243 * Variant of store_reg which applies v8M stack-limit checks before updating
244 * SP. If the check fails this will result in an exception being taken.
245 * We disable the stack checks for CONFIG_USER_ONLY because we have
246 * no idea what the stack limits should be in that case.
247 * If stack checking is not being done this just acts like store_reg().
249 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
251 #ifndef CONFIG_USER_ONLY
252 if (s->v8m_stackcheck) {
253 gen_helper_v8m_stackcheck(cpu_env, var);
256 store_reg(s, 13, var);
259 /* Value extensions. */
260 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
261 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
262 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
263 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
265 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
266 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
269 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
271 TCGv_i32 tmp_mask = tcg_const_i32(mask);
272 gen_helper_cpsr_write(cpu_env, var, tmp_mask);
273 tcg_temp_free_i32(tmp_mask);
275 /* Set NZCV flags from the high 4 bits of var. */
276 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
278 static void gen_exception_internal(int excp)
280 TCGv_i32 tcg_excp = tcg_const_i32(excp);
282 assert(excp_is_internal(excp));
283 gen_helper_exception_internal(cpu_env, tcg_excp);
284 tcg_temp_free_i32(tcg_excp);
287 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
289 TCGv_i32 tcg_excp = tcg_const_i32(excp);
290 TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
291 TCGv_i32 tcg_el = tcg_const_i32(target_el);
293 gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
296 tcg_temp_free_i32(tcg_el);
297 tcg_temp_free_i32(tcg_syn);
298 tcg_temp_free_i32(tcg_excp);
301 static void gen_ss_advance(DisasContext *s)
303 /* If the singlestep state is Active-not-pending, advance to
308 gen_helper_clear_pstate_ss(cpu_env);
312 static void gen_step_complete_exception(DisasContext *s)
314 /* We just completed step of an insn. Move from Active-not-pending
315 * to Active-pending, and then also take the swstep exception.
316 * This corresponds to making the (IMPDEF) choice to prioritize
317 * swstep exceptions over asynchronous exceptions taken to an exception
318 * level where debug is disabled. This choice has the advantage that
319 * we do not need to maintain internal state corresponding to the
320 * ISV/EX syndrome bits between completion of the step and generation
321 * of the exception, and our syndrome information is always correct.
324 gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
325 default_exception_el(s));
326 s->base.is_jmp = DISAS_NORETURN;
329 static void gen_singlestep_exception(DisasContext *s)
331 /* Generate the right kind of exception for singlestep, which is
332 * either the architectural singlestep or EXCP_DEBUG for QEMU's
333 * gdb singlestepping.
336 gen_step_complete_exception(s);
338 gen_exception_internal(EXCP_DEBUG);
342 static inline bool is_singlestepping(DisasContext *s)
344 /* Return true if we are singlestepping either because of
345 * architectural singlestep or QEMU gdbstub singlestep. This does
346 * not include the command line '-singlestep' mode which is rather
347 * misnamed as it only means "one instruction per TB" and doesn't
348 * affect the code we generate.
350 return s->base.singlestep_enabled || s->ss_active;
353 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
355 TCGv_i32 tmp1 = tcg_temp_new_i32();
356 TCGv_i32 tmp2 = tcg_temp_new_i32();
357 tcg_gen_ext16s_i32(tmp1, a);
358 tcg_gen_ext16s_i32(tmp2, b);
359 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
360 tcg_temp_free_i32(tmp2);
361 tcg_gen_sari_i32(a, a, 16);
362 tcg_gen_sari_i32(b, b, 16);
363 tcg_gen_mul_i32(b, b, a);
364 tcg_gen_mov_i32(a, tmp1);
365 tcg_temp_free_i32(tmp1);
368 /* Byteswap each halfword. */
369 static void gen_rev16(TCGv_i32 var)
371 TCGv_i32 tmp = tcg_temp_new_i32();
372 TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
373 tcg_gen_shri_i32(tmp, var, 8);
374 tcg_gen_and_i32(tmp, tmp, mask);
375 tcg_gen_and_i32(var, var, mask);
376 tcg_gen_shli_i32(var, var, 8);
377 tcg_gen_or_i32(var, var, tmp);
378 tcg_temp_free_i32(mask);
379 tcg_temp_free_i32(tmp);
382 /* Byteswap low halfword and sign extend. */
383 static void gen_revsh(TCGv_i32 var)
385 tcg_gen_ext16u_i32(var, var);
386 tcg_gen_bswap16_i32(var, var);
387 tcg_gen_ext16s_i32(var, var);
390 /* Return (b << 32) + a. Mark inputs as dead */
391 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b)
393 TCGv_i64 tmp64 = tcg_temp_new_i64();
395 tcg_gen_extu_i32_i64(tmp64, b);
396 tcg_temp_free_i32(b);
397 tcg_gen_shli_i64(tmp64, tmp64, 32);
398 tcg_gen_add_i64(a, tmp64, a);
400 tcg_temp_free_i64(tmp64);
404 /* Return (b << 32) - a. Mark inputs as dead. */
405 static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv_i32 b)
407 TCGv_i64 tmp64 = tcg_temp_new_i64();
409 tcg_gen_extu_i32_i64(tmp64, b);
410 tcg_temp_free_i32(b);
411 tcg_gen_shli_i64(tmp64, tmp64, 32);
412 tcg_gen_sub_i64(a, tmp64, a);
414 tcg_temp_free_i64(tmp64);
418 /* 32x32->64 multiply. Marks inputs as dead. */
419 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
421 TCGv_i32 lo = tcg_temp_new_i32();
422 TCGv_i32 hi = tcg_temp_new_i32();
425 tcg_gen_mulu2_i32(lo, hi, a, b);
426 tcg_temp_free_i32(a);
427 tcg_temp_free_i32(b);
429 ret = tcg_temp_new_i64();
430 tcg_gen_concat_i32_i64(ret, lo, hi);
431 tcg_temp_free_i32(lo);
432 tcg_temp_free_i32(hi);
437 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
439 TCGv_i32 lo = tcg_temp_new_i32();
440 TCGv_i32 hi = tcg_temp_new_i32();
443 tcg_gen_muls2_i32(lo, hi, a, b);
444 tcg_temp_free_i32(a);
445 tcg_temp_free_i32(b);
447 ret = tcg_temp_new_i64();
448 tcg_gen_concat_i32_i64(ret, lo, hi);
449 tcg_temp_free_i32(lo);
450 tcg_temp_free_i32(hi);
455 /* Swap low and high halfwords. */
456 static void gen_swap_half(TCGv_i32 var)
458 TCGv_i32 tmp = tcg_temp_new_i32();
459 tcg_gen_shri_i32(tmp, var, 16);
460 tcg_gen_shli_i32(var, var, 16);
461 tcg_gen_or_i32(var, var, tmp);
462 tcg_temp_free_i32(tmp);
465 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
466 tmp = (t0 ^ t1) & 0x8000;
469 t0 = (t0 + t1) ^ tmp;
472 static void gen_add16(TCGv_i32 t0, TCGv_i32 t1)
474 TCGv_i32 tmp = tcg_temp_new_i32();
475 tcg_gen_xor_i32(tmp, t0, t1);
476 tcg_gen_andi_i32(tmp, tmp, 0x8000);
477 tcg_gen_andi_i32(t0, t0, ~0x8000);
478 tcg_gen_andi_i32(t1, t1, ~0x8000);
479 tcg_gen_add_i32(t0, t0, t1);
480 tcg_gen_xor_i32(t0, t0, tmp);
481 tcg_temp_free_i32(tmp);
482 tcg_temp_free_i32(t1);
485 /* Set CF to the top bit of var. */
486 static void gen_set_CF_bit31(TCGv_i32 var)
488 tcg_gen_shri_i32(cpu_CF, var, 31);
491 /* Set N and Z flags from var. */
492 static inline void gen_logic_CC(TCGv_i32 var)
494 tcg_gen_mov_i32(cpu_NF, var);
495 tcg_gen_mov_i32(cpu_ZF, var);
499 static void gen_adc(TCGv_i32 t0, TCGv_i32 t1)
501 tcg_gen_add_i32(t0, t0, t1);
502 tcg_gen_add_i32(t0, t0, cpu_CF);
505 /* dest = T0 + T1 + CF. */
506 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
508 tcg_gen_add_i32(dest, t0, t1);
509 tcg_gen_add_i32(dest, dest, cpu_CF);
512 /* dest = T0 - T1 + CF - 1. */
513 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
515 tcg_gen_sub_i32(dest, t0, t1);
516 tcg_gen_add_i32(dest, dest, cpu_CF);
517 tcg_gen_subi_i32(dest, dest, 1);
520 /* dest = T0 + T1. Compute C, N, V and Z flags */
521 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
523 TCGv_i32 tmp = tcg_temp_new_i32();
524 tcg_gen_movi_i32(tmp, 0);
525 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
526 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
527 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
528 tcg_gen_xor_i32(tmp, t0, t1);
529 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
530 tcg_temp_free_i32(tmp);
531 tcg_gen_mov_i32(dest, cpu_NF);
534 /* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
535 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
537 TCGv_i32 tmp = tcg_temp_new_i32();
538 if (TCG_TARGET_HAS_add2_i32) {
539 tcg_gen_movi_i32(tmp, 0);
540 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
541 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
543 TCGv_i64 q0 = tcg_temp_new_i64();
544 TCGv_i64 q1 = tcg_temp_new_i64();
545 tcg_gen_extu_i32_i64(q0, t0);
546 tcg_gen_extu_i32_i64(q1, t1);
547 tcg_gen_add_i64(q0, q0, q1);
548 tcg_gen_extu_i32_i64(q1, cpu_CF);
549 tcg_gen_add_i64(q0, q0, q1);
550 tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
551 tcg_temp_free_i64(q0);
552 tcg_temp_free_i64(q1);
554 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
555 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
556 tcg_gen_xor_i32(tmp, t0, t1);
557 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
558 tcg_temp_free_i32(tmp);
559 tcg_gen_mov_i32(dest, cpu_NF);
562 /* dest = T0 - T1. Compute C, N, V and Z flags */
563 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
566 tcg_gen_sub_i32(cpu_NF, t0, t1);
567 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
568 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
569 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
570 tmp = tcg_temp_new_i32();
571 tcg_gen_xor_i32(tmp, t0, t1);
572 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
573 tcg_temp_free_i32(tmp);
574 tcg_gen_mov_i32(dest, cpu_NF);
577 /* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
578 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
580 TCGv_i32 tmp = tcg_temp_new_i32();
581 tcg_gen_not_i32(tmp, t1);
582 gen_adc_CC(dest, t0, tmp);
583 tcg_temp_free_i32(tmp);
586 #define GEN_SHIFT(name) \
587 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
589 TCGv_i32 tmp1, tmp2, tmp3; \
590 tmp1 = tcg_temp_new_i32(); \
591 tcg_gen_andi_i32(tmp1, t1, 0xff); \
592 tmp2 = tcg_const_i32(0); \
593 tmp3 = tcg_const_i32(0x1f); \
594 tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
595 tcg_temp_free_i32(tmp3); \
596 tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
597 tcg_gen_##name##_i32(dest, tmp2, tmp1); \
598 tcg_temp_free_i32(tmp2); \
599 tcg_temp_free_i32(tmp1); \
605 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
608 tmp1 = tcg_temp_new_i32();
609 tcg_gen_andi_i32(tmp1, t1, 0xff);
610 tmp2 = tcg_const_i32(0x1f);
611 tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
612 tcg_temp_free_i32(tmp2);
613 tcg_gen_sar_i32(dest, t0, tmp1);
614 tcg_temp_free_i32(tmp1);
617 static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src)
619 TCGv_i32 c0 = tcg_const_i32(0);
620 TCGv_i32 tmp = tcg_temp_new_i32();
621 tcg_gen_neg_i32(tmp, src);
622 tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp);
623 tcg_temp_free_i32(c0);
624 tcg_temp_free_i32(tmp);
627 static void shifter_out_im(TCGv_i32 var, int shift)
630 tcg_gen_andi_i32(cpu_CF, var, 1);
632 tcg_gen_shri_i32(cpu_CF, var, shift);
634 tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
639 /* Shift by immediate. Includes special handling for shift == 0. */
640 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
641 int shift, int flags)
647 shifter_out_im(var, 32 - shift);
648 tcg_gen_shli_i32(var, var, shift);
654 tcg_gen_shri_i32(cpu_CF, var, 31);
656 tcg_gen_movi_i32(var, 0);
659 shifter_out_im(var, shift - 1);
660 tcg_gen_shri_i32(var, var, shift);
667 shifter_out_im(var, shift - 1);
670 tcg_gen_sari_i32(var, var, shift);
672 case 3: /* ROR/RRX */
675 shifter_out_im(var, shift - 1);
676 tcg_gen_rotri_i32(var, var, shift); break;
678 TCGv_i32 tmp = tcg_temp_new_i32();
679 tcg_gen_shli_i32(tmp, cpu_CF, 31);
681 shifter_out_im(var, 0);
682 tcg_gen_shri_i32(var, var, 1);
683 tcg_gen_or_i32(var, var, tmp);
684 tcg_temp_free_i32(tmp);
689 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
690 TCGv_i32 shift, int flags)
694 case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
695 case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
696 case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
697 case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
702 gen_shl(var, var, shift);
705 gen_shr(var, var, shift);
708 gen_sar(var, var, shift);
710 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
711 tcg_gen_rotr_i32(var, var, shift); break;
714 tcg_temp_free_i32(shift);
717 #define PAS_OP(pfx) \
719 case 0: gen_pas_helper(glue(pfx,add16)); break; \
720 case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
721 case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
722 case 3: gen_pas_helper(glue(pfx,sub16)); break; \
723 case 4: gen_pas_helper(glue(pfx,add8)); break; \
724 case 7: gen_pas_helper(glue(pfx,sub8)); break; \
726 static void gen_arm_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
731 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
733 tmp = tcg_temp_new_ptr();
734 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
736 tcg_temp_free_ptr(tmp);
739 tmp = tcg_temp_new_ptr();
740 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
742 tcg_temp_free_ptr(tmp);
744 #undef gen_pas_helper
745 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
758 #undef gen_pas_helper
763 /* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */
764 #define PAS_OP(pfx) \
766 case 0: gen_pas_helper(glue(pfx,add8)); break; \
767 case 1: gen_pas_helper(glue(pfx,add16)); break; \
768 case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
769 case 4: gen_pas_helper(glue(pfx,sub8)); break; \
770 case 5: gen_pas_helper(glue(pfx,sub16)); break; \
771 case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
773 static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
778 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
780 tmp = tcg_temp_new_ptr();
781 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
783 tcg_temp_free_ptr(tmp);
786 tmp = tcg_temp_new_ptr();
787 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
789 tcg_temp_free_ptr(tmp);
791 #undef gen_pas_helper
792 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
805 #undef gen_pas_helper
811 * Generate a conditional based on ARM condition code cc.
812 * This is common between ARM and Aarch64 targets.
814 void arm_test_cc(DisasCompare *cmp, int cc)
845 case 8: /* hi: C && !Z */
846 case 9: /* ls: !C || Z -> !(C && !Z) */
848 value = tcg_temp_new_i32();
850 /* CF is 1 for C, so -CF is an all-bits-set mask for C;
851 ZF is non-zero for !Z; so AND the two subexpressions. */
852 tcg_gen_neg_i32(value, cpu_CF);
853 tcg_gen_and_i32(value, value, cpu_ZF);
856 case 10: /* ge: N == V -> N ^ V == 0 */
857 case 11: /* lt: N != V -> N ^ V != 0 */
858 /* Since we're only interested in the sign bit, == 0 is >= 0. */
860 value = tcg_temp_new_i32();
862 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
865 case 12: /* gt: !Z && N == V */
866 case 13: /* le: Z || N != V */
868 value = tcg_temp_new_i32();
870 /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
871 * the sign bit then AND with ZF to yield the result. */
872 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
873 tcg_gen_sari_i32(value, value, 31);
874 tcg_gen_andc_i32(value, cpu_ZF, value);
877 case 14: /* always */
878 case 15: /* always */
879 /* Use the ALWAYS condition, which will fold early.
880 * It doesn't matter what we use for the value. */
881 cond = TCG_COND_ALWAYS;
886 fprintf(stderr, "Bad condition code 0x%x\n", cc);
891 cond = tcg_invert_cond(cond);
897 cmp->value_global = global;
900 void arm_free_cc(DisasCompare *cmp)
902 if (!cmp->value_global) {
903 tcg_temp_free_i32(cmp->value);
907 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
909 tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
912 void arm_gen_test_cc(int cc, TCGLabel *label)
915 arm_test_cc(&cmp, cc);
916 arm_jump_cc(&cmp, label);
920 static const uint8_t table_logic_cc[16] = {
939 static inline void gen_set_condexec(DisasContext *s)
941 if (s->condexec_mask) {
942 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
943 TCGv_i32 tmp = tcg_temp_new_i32();
944 tcg_gen_movi_i32(tmp, val);
945 store_cpu_field(tmp, condexec_bits);
949 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
951 tcg_gen_movi_i32(cpu_R[15], val);
954 /* Set PC and Thumb state from an immediate address. */
955 static inline void gen_bx_im(DisasContext *s, uint32_t addr)
959 s->base.is_jmp = DISAS_JUMP;
960 if (s->thumb != (addr & 1)) {
961 tmp = tcg_temp_new_i32();
962 tcg_gen_movi_i32(tmp, addr & 1);
963 tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
964 tcg_temp_free_i32(tmp);
966 tcg_gen_movi_i32(cpu_R[15], addr & ~1);
969 /* Set PC and Thumb state from var. var is marked as dead. */
970 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
972 s->base.is_jmp = DISAS_JUMP;
973 tcg_gen_andi_i32(cpu_R[15], var, ~1);
974 tcg_gen_andi_i32(var, var, 1);
975 store_cpu_field(var, thumb);
978 /* Set PC and Thumb state from var. var is marked as dead.
979 * For M-profile CPUs, include logic to detect exception-return
980 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
981 * and BX reg, and no others, and happens only for code in Handler mode.
983 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
985 /* Generate the same code here as for a simple bx, but flag via
986 * s->base.is_jmp that we need to do the rest of the work later.
989 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
990 (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
991 s->base.is_jmp = DISAS_BX_EXCRET;
995 static inline void gen_bx_excret_final_code(DisasContext *s)
997 /* Generate the code to finish possible exception return and end the TB */
998 TCGLabel *excret_label = gen_new_label();
1001 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
1002 /* Covers FNC_RETURN and EXC_RETURN magic */
1003 min_magic = FNC_RETURN_MIN_MAGIC;
1005 /* EXC_RETURN magic only */
1006 min_magic = EXC_RETURN_MIN_MAGIC;
1009 /* Is the new PC value in the magic range indicating exception return? */
1010 tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
1011 /* No: end the TB as we would for a DISAS_JMP */
1012 if (is_singlestepping(s)) {
1013 gen_singlestep_exception(s);
1015 tcg_gen_exit_tb(NULL, 0);
1017 gen_set_label(excret_label);
1018 /* Yes: this is an exception return.
1019 * At this point in runtime env->regs[15] and env->thumb will hold
1020 * the exception-return magic number, which do_v7m_exception_exit()
1021 * will read. Nothing else will be able to see those values because
1022 * the cpu-exec main loop guarantees that we will always go straight
1023 * from raising the exception to the exception-handling code.
1025 * gen_ss_advance(s) does nothing on M profile currently but
1026 * calling it is conceptually the right thing as we have executed
1027 * this instruction (compare SWI, HVC, SMC handling).
1030 gen_exception_internal(EXCP_EXCEPTION_EXIT);
1033 static inline void gen_bxns(DisasContext *s, int rm)
1035 TCGv_i32 var = load_reg(s, rm);
1037 /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
1038 * we need to sync state before calling it, but:
1039 * - we don't need to do gen_set_pc_im() because the bxns helper will
1040 * always set the PC itself
1041 * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
1042 * unless it's outside an IT block or the last insn in an IT block,
1043 * so we know that condexec == 0 (already set at the top of the TB)
1044 * is correct in the non-UNPREDICTABLE cases, and we can choose
1045 * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
1047 gen_helper_v7m_bxns(cpu_env, var);
1048 tcg_temp_free_i32(var);
1049 s->base.is_jmp = DISAS_EXIT;
1052 static inline void gen_blxns(DisasContext *s, int rm)
1054 TCGv_i32 var = load_reg(s, rm);
1056 /* We don't need to sync condexec state, for the same reason as bxns.
1057 * We do however need to set the PC, because the blxns helper reads it.
1058 * The blxns helper may throw an exception.
1060 gen_set_pc_im(s, s->pc);
1061 gen_helper_v7m_blxns(cpu_env, var);
1062 tcg_temp_free_i32(var);
1063 s->base.is_jmp = DISAS_EXIT;
1066 /* Variant of store_reg which uses branch&exchange logic when storing
1067 to r15 in ARM architecture v7 and above. The source must be a temporary
1068 and will be marked as dead. */
1069 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
1071 if (reg == 15 && ENABLE_ARCH_7) {
1074 store_reg(s, reg, var);
1078 /* Variant of store_reg which uses branch&exchange logic when storing
1079 * to r15 in ARM architecture v5T and above. This is used for storing
1080 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
1081 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
1082 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
1084 if (reg == 15 && ENABLE_ARCH_5) {
1085 gen_bx_excret(s, var);
1087 store_reg(s, reg, var);
1091 #ifdef CONFIG_USER_ONLY
1092 #define IS_USER_ONLY 1
1094 #define IS_USER_ONLY 0
1097 /* Abstractions of "generate code to do a guest load/store for
1098 * AArch32", where a vaddr is always 32 bits (and is zero
1099 * extended if we're a 64 bit core) and data is also
1100 * 32 bits unless specifically doing a 64 bit access.
1101 * These functions work like tcg_gen_qemu_{ld,st}* except
1102 * that the address argument is TCGv_i32 rather than TCGv.
1105 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, TCGMemOp op)
1107 TCGv addr = tcg_temp_new();
1108 tcg_gen_extu_i32_tl(addr, a32);
1110 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1111 if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
1112 tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
1117 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1118 int index, TCGMemOp opc)
1122 if (arm_dc_feature(s, ARM_FEATURE_M) &&
1123 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
1127 addr = gen_aa32_addr(s, a32, opc);
1128 tcg_gen_qemu_ld_i32(val, addr, index, opc);
1129 tcg_temp_free(addr);
1132 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1133 int index, TCGMemOp opc)
1137 if (arm_dc_feature(s, ARM_FEATURE_M) &&
1138 !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
1142 addr = gen_aa32_addr(s, a32, opc);
1143 tcg_gen_qemu_st_i32(val, addr, index, opc);
1144 tcg_temp_free(addr);
1147 #define DO_GEN_LD(SUFF, OPC) \
1148 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1149 TCGv_i32 a32, int index) \
1151 gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
1153 static inline void gen_aa32_ld##SUFF##_iss(DisasContext *s, \
1155 TCGv_i32 a32, int index, \
1158 gen_aa32_ld##SUFF(s, val, a32, index); \
1159 disas_set_da_iss(s, OPC, issinfo); \
1162 #define DO_GEN_ST(SUFF, OPC) \
1163 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1164 TCGv_i32 a32, int index) \
1166 gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
1168 static inline void gen_aa32_st##SUFF##_iss(DisasContext *s, \
1170 TCGv_i32 a32, int index, \
1173 gen_aa32_st##SUFF(s, val, a32, index); \
1174 disas_set_da_iss(s, OPC, issinfo | ISSIsWrite); \
1177 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
1179 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1180 if (!IS_USER_ONLY && s->sctlr_b) {
1181 tcg_gen_rotri_i64(val, val, 32);
1185 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1186 int index, TCGMemOp opc)
1188 TCGv addr = gen_aa32_addr(s, a32, opc);
1189 tcg_gen_qemu_ld_i64(val, addr, index, opc);
1190 gen_aa32_frob64(s, val);
1191 tcg_temp_free(addr);
1194 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
1195 TCGv_i32 a32, int index)
1197 gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1200 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1201 int index, TCGMemOp opc)
1203 TCGv addr = gen_aa32_addr(s, a32, opc);
1205 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1206 if (!IS_USER_ONLY && s->sctlr_b) {
1207 TCGv_i64 tmp = tcg_temp_new_i64();
1208 tcg_gen_rotri_i64(tmp, val, 32);
1209 tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1210 tcg_temp_free_i64(tmp);
1212 tcg_gen_qemu_st_i64(val, addr, index, opc);
1214 tcg_temp_free(addr);
1217 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1218 TCGv_i32 a32, int index)
1220 gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1223 DO_GEN_LD(8s, MO_SB)
1224 DO_GEN_LD(8u, MO_UB)
1225 DO_GEN_LD(16s, MO_SW)
1226 DO_GEN_LD(16u, MO_UW)
1227 DO_GEN_LD(32u, MO_UL)
1229 DO_GEN_ST(16, MO_UW)
1230 DO_GEN_ST(32, MO_UL)
1232 static inline void gen_hvc(DisasContext *s, int imm16)
1234 /* The pre HVC helper handles cases when HVC gets trapped
1235 * as an undefined insn by runtime configuration (ie before
1236 * the insn really executes).
1238 gen_set_pc_im(s, s->pc - 4);
1239 gen_helper_pre_hvc(cpu_env);
1240 /* Otherwise we will treat this as a real exception which
1241 * happens after execution of the insn. (The distinction matters
1242 * for the PC value reported to the exception handler and also
1243 * for single stepping.)
1246 gen_set_pc_im(s, s->pc);
1247 s->base.is_jmp = DISAS_HVC;
1250 static inline void gen_smc(DisasContext *s)
1252 /* As with HVC, we may take an exception either before or after
1253 * the insn executes.
1257 gen_set_pc_im(s, s->pc - 4);
1258 tmp = tcg_const_i32(syn_aa32_smc());
1259 gen_helper_pre_smc(cpu_env, tmp);
1260 tcg_temp_free_i32(tmp);
1261 gen_set_pc_im(s, s->pc);
1262 s->base.is_jmp = DISAS_SMC;
1265 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
1267 gen_set_condexec(s);
1268 gen_set_pc_im(s, s->pc - offset);
1269 gen_exception_internal(excp);
1270 s->base.is_jmp = DISAS_NORETURN;
1273 static void gen_exception_insn(DisasContext *s, int offset, int excp,
1274 int syn, uint32_t target_el)
1276 gen_set_condexec(s);
1277 gen_set_pc_im(s, s->pc - offset);
1278 gen_exception(excp, syn, target_el);
1279 s->base.is_jmp = DISAS_NORETURN;
1282 static void gen_exception_bkpt_insn(DisasContext *s, int offset, uint32_t syn)
1286 gen_set_condexec(s);
1287 gen_set_pc_im(s, s->pc - offset);
1288 tcg_syn = tcg_const_i32(syn);
1289 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1290 tcg_temp_free_i32(tcg_syn);
1291 s->base.is_jmp = DISAS_NORETURN;
1294 /* Force a TB lookup after an instruction that changes the CPU state. */
1295 static inline void gen_lookup_tb(DisasContext *s)
1297 tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
1298 s->base.is_jmp = DISAS_EXIT;
1301 static inline void gen_hlt(DisasContext *s, int imm)
1303 /* HLT. This has two purposes.
1304 * Architecturally, it is an external halting debug instruction.
1305 * Since QEMU doesn't implement external debug, we treat this as
1306 * it is required for halting debug disabled: it will UNDEF.
1307 * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1308 * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1309 * must trigger semihosting even for ARMv7 and earlier, where
1310 * HLT was an undefined encoding.
1311 * In system mode, we don't allow userspace access to
1312 * semihosting, to provide some semblance of security
1313 * (and for consistency with our 32-bit semihosting).
1315 if (semihosting_enabled() &&
1316 #ifndef CONFIG_USER_ONLY
1317 s->current_el != 0 &&
1319 (imm == (s->thumb ? 0x3c : 0xf000))) {
1320 gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1324 gen_exception_insn(s, s->thumb ? 2 : 4, EXCP_UDEF, syn_uncategorized(),
1325 default_exception_el(s));
1328 static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
1331 int val, rm, shift, shiftop;
1334 if (!(insn & (1 << 25))) {
1337 if (!(insn & (1 << 23)))
1340 tcg_gen_addi_i32(var, var, val);
1342 /* shift/register */
1344 shift = (insn >> 7) & 0x1f;
1345 shiftop = (insn >> 5) & 3;
1346 offset = load_reg(s, rm);
1347 gen_arm_shift_im(offset, shiftop, shift, 0);
1348 if (!(insn & (1 << 23)))
1349 tcg_gen_sub_i32(var, var, offset);
1351 tcg_gen_add_i32(var, var, offset);
1352 tcg_temp_free_i32(offset);
1356 static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
1357 int extra, TCGv_i32 var)
1362 if (insn & (1 << 22)) {
1364 val = (insn & 0xf) | ((insn >> 4) & 0xf0);
1365 if (!(insn & (1 << 23)))
1369 tcg_gen_addi_i32(var, var, val);
1373 tcg_gen_addi_i32(var, var, extra);
1375 offset = load_reg(s, rm);
1376 if (!(insn & (1 << 23)))
1377 tcg_gen_sub_i32(var, var, offset);
1379 tcg_gen_add_i32(var, var, offset);
1380 tcg_temp_free_i32(offset);
1384 static TCGv_ptr get_fpstatus_ptr(int neon)
1386 TCGv_ptr statusptr = tcg_temp_new_ptr();
1389 offset = offsetof(CPUARMState, vfp.standard_fp_status);
1391 offset = offsetof(CPUARMState, vfp.fp_status);
1393 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1397 #define VFP_OP2(name) \
1398 static inline void gen_vfp_##name(int dp) \
1400 TCGv_ptr fpst = get_fpstatus_ptr(0); \
1402 gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst); \
1404 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst); \
1406 tcg_temp_free_ptr(fpst); \
1416 static inline void gen_vfp_F1_mul(int dp)
1418 /* Like gen_vfp_mul() but put result in F1 */
1419 TCGv_ptr fpst = get_fpstatus_ptr(0);
1421 gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
1423 gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
1425 tcg_temp_free_ptr(fpst);
1428 static inline void gen_vfp_F1_neg(int dp)
1430 /* Like gen_vfp_neg() but put result in F1 */
1432 gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
1434 gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
1438 static inline void gen_vfp_abs(int dp)
1441 gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
1443 gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
1446 static inline void gen_vfp_neg(int dp)
1449 gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
1451 gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
1454 static inline void gen_vfp_sqrt(int dp)
1457 gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1459 gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1462 static inline void gen_vfp_cmp(int dp)
1465 gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1467 gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1470 static inline void gen_vfp_cmpe(int dp)
1473 gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1475 gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1478 static inline void gen_vfp_F1_ld0(int dp)
1481 tcg_gen_movi_i64(cpu_F1d, 0);
1483 tcg_gen_movi_i32(cpu_F1s, 0);
1486 #define VFP_GEN_ITOF(name) \
1487 static inline void gen_vfp_##name(int dp, int neon) \
1489 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1491 gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
1493 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1495 tcg_temp_free_ptr(statusptr); \
1502 #define VFP_GEN_FTOI(name) \
1503 static inline void gen_vfp_##name(int dp, int neon) \
1505 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1507 gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
1509 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1511 tcg_temp_free_ptr(statusptr); \
1520 #define VFP_GEN_FIX(name, round) \
1521 static inline void gen_vfp_##name(int dp, int shift, int neon) \
1523 TCGv_i32 tmp_shift = tcg_const_i32(shift); \
1524 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1526 gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \
1529 gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \
1532 tcg_temp_free_i32(tmp_shift); \
1533 tcg_temp_free_ptr(statusptr); \
1535 VFP_GEN_FIX(tosh, _round_to_zero)
1536 VFP_GEN_FIX(tosl, _round_to_zero)
1537 VFP_GEN_FIX(touh, _round_to_zero)
1538 VFP_GEN_FIX(toul, _round_to_zero)
1545 static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr)
1548 gen_aa32_ld64(s, cpu_F0d, addr, get_mem_index(s));
1550 gen_aa32_ld32u(s, cpu_F0s, addr, get_mem_index(s));
1554 static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
1557 gen_aa32_st64(s, cpu_F0d, addr, get_mem_index(s));
1559 gen_aa32_st32(s, cpu_F0s, addr, get_mem_index(s));
1563 static inline long vfp_reg_offset(bool dp, unsigned reg)
1566 return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1568 long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1570 ofs += offsetof(CPU_DoubleU, l.upper);
1572 ofs += offsetof(CPU_DoubleU, l.lower);
1578 /* Return the offset of a 32-bit piece of a NEON register.
1579 zero is the least significant end of the register. */
1581 neon_reg_offset (int reg, int n)
1585 return vfp_reg_offset(0, sreg);
1588 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1589 * where 0 is the least significant end of the register.
1592 neon_element_offset(int reg, int element, TCGMemOp size)
1594 int element_size = 1 << size;
1595 int ofs = element * element_size;
1596 #ifdef HOST_WORDS_BIGENDIAN
1597 /* Calculate the offset assuming fully little-endian,
1598 * then XOR to account for the order of the 8-byte units.
1600 if (element_size < 8) {
1601 ofs ^= 8 - element_size;
1604 return neon_reg_offset(reg, 0) + ofs;
1607 static TCGv_i32 neon_load_reg(int reg, int pass)
1609 TCGv_i32 tmp = tcg_temp_new_i32();
1610 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1614 static void neon_load_element64(TCGv_i64 var, int reg, int ele, TCGMemOp mop)
1616 long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1620 tcg_gen_ld8u_i64(var, cpu_env, offset);
1623 tcg_gen_ld16u_i64(var, cpu_env, offset);
1626 tcg_gen_ld32u_i64(var, cpu_env, offset);
1629 tcg_gen_ld_i64(var, cpu_env, offset);
1632 g_assert_not_reached();
1636 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1638 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1639 tcg_temp_free_i32(var);
1642 static void neon_store_element64(int reg, int ele, TCGMemOp size, TCGv_i64 var)
1644 long offset = neon_element_offset(reg, ele, size);
1648 tcg_gen_st8_i64(var, cpu_env, offset);
1651 tcg_gen_st16_i64(var, cpu_env, offset);
1654 tcg_gen_st32_i64(var, cpu_env, offset);
1657 tcg_gen_st_i64(var, cpu_env, offset);
1660 g_assert_not_reached();
1664 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1666 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1669 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1671 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1674 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1676 TCGv_ptr ret = tcg_temp_new_ptr();
1677 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1681 #define tcg_gen_ld_f32 tcg_gen_ld_i32
1682 #define tcg_gen_ld_f64 tcg_gen_ld_i64
1683 #define tcg_gen_st_f32 tcg_gen_st_i32
1684 #define tcg_gen_st_f64 tcg_gen_st_i64
1686 static inline void gen_mov_F0_vreg(int dp, int reg)
1689 tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1691 tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1694 static inline void gen_mov_F1_vreg(int dp, int reg)
1697 tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1699 tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1702 static inline void gen_mov_vreg_F0(int dp, int reg)
1705 tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1707 tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1710 #define ARM_CP_RW_BIT (1 << 20)
1712 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1714 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1717 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1719 tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1722 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1724 TCGv_i32 var = tcg_temp_new_i32();
1725 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1729 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1731 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1732 tcg_temp_free_i32(var);
1735 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1737 iwmmxt_store_reg(cpu_M0, rn);
1740 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1742 iwmmxt_load_reg(cpu_M0, rn);
1745 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1747 iwmmxt_load_reg(cpu_V1, rn);
1748 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1751 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1753 iwmmxt_load_reg(cpu_V1, rn);
1754 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1757 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1759 iwmmxt_load_reg(cpu_V1, rn);
1760 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1763 #define IWMMXT_OP(name) \
1764 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1766 iwmmxt_load_reg(cpu_V1, rn); \
1767 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1770 #define IWMMXT_OP_ENV(name) \
1771 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1773 iwmmxt_load_reg(cpu_V1, rn); \
1774 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1777 #define IWMMXT_OP_ENV_SIZE(name) \
1778 IWMMXT_OP_ENV(name##b) \
1779 IWMMXT_OP_ENV(name##w) \
1780 IWMMXT_OP_ENV(name##l)
1782 #define IWMMXT_OP_ENV1(name) \
1783 static inline void gen_op_iwmmxt_##name##_M0(void) \
1785 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1799 IWMMXT_OP_ENV_SIZE(unpackl)
1800 IWMMXT_OP_ENV_SIZE(unpackh)
1802 IWMMXT_OP_ENV1(unpacklub)
1803 IWMMXT_OP_ENV1(unpackluw)
1804 IWMMXT_OP_ENV1(unpacklul)
1805 IWMMXT_OP_ENV1(unpackhub)
1806 IWMMXT_OP_ENV1(unpackhuw)
1807 IWMMXT_OP_ENV1(unpackhul)
1808 IWMMXT_OP_ENV1(unpacklsb)
1809 IWMMXT_OP_ENV1(unpacklsw)
1810 IWMMXT_OP_ENV1(unpacklsl)
1811 IWMMXT_OP_ENV1(unpackhsb)
1812 IWMMXT_OP_ENV1(unpackhsw)
1813 IWMMXT_OP_ENV1(unpackhsl)
1815 IWMMXT_OP_ENV_SIZE(cmpeq)
1816 IWMMXT_OP_ENV_SIZE(cmpgtu)
1817 IWMMXT_OP_ENV_SIZE(cmpgts)
1819 IWMMXT_OP_ENV_SIZE(mins)
1820 IWMMXT_OP_ENV_SIZE(minu)
1821 IWMMXT_OP_ENV_SIZE(maxs)
1822 IWMMXT_OP_ENV_SIZE(maxu)
1824 IWMMXT_OP_ENV_SIZE(subn)
1825 IWMMXT_OP_ENV_SIZE(addn)
1826 IWMMXT_OP_ENV_SIZE(subu)
1827 IWMMXT_OP_ENV_SIZE(addu)
1828 IWMMXT_OP_ENV_SIZE(subs)
1829 IWMMXT_OP_ENV_SIZE(adds)
1831 IWMMXT_OP_ENV(avgb0)
1832 IWMMXT_OP_ENV(avgb1)
1833 IWMMXT_OP_ENV(avgw0)
1834 IWMMXT_OP_ENV(avgw1)
1836 IWMMXT_OP_ENV(packuw)
1837 IWMMXT_OP_ENV(packul)
1838 IWMMXT_OP_ENV(packuq)
1839 IWMMXT_OP_ENV(packsw)
1840 IWMMXT_OP_ENV(packsl)
1841 IWMMXT_OP_ENV(packsq)
1843 static void gen_op_iwmmxt_set_mup(void)
1846 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1847 tcg_gen_ori_i32(tmp, tmp, 2);
1848 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1851 static void gen_op_iwmmxt_set_cup(void)
1854 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1855 tcg_gen_ori_i32(tmp, tmp, 1);
1856 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1859 static void gen_op_iwmmxt_setpsr_nz(void)
1861 TCGv_i32 tmp = tcg_temp_new_i32();
1862 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1863 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1866 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1868 iwmmxt_load_reg(cpu_V1, rn);
1869 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1870 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1873 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1880 rd = (insn >> 16) & 0xf;
1881 tmp = load_reg(s, rd);
1883 offset = (insn & 0xff) << ((insn >> 7) & 2);
1884 if (insn & (1 << 24)) {
1886 if (insn & (1 << 23))
1887 tcg_gen_addi_i32(tmp, tmp, offset);
1889 tcg_gen_addi_i32(tmp, tmp, -offset);
1890 tcg_gen_mov_i32(dest, tmp);
1891 if (insn & (1 << 21))
1892 store_reg(s, rd, tmp);
1894 tcg_temp_free_i32(tmp);
1895 } else if (insn & (1 << 21)) {
1897 tcg_gen_mov_i32(dest, tmp);
1898 if (insn & (1 << 23))
1899 tcg_gen_addi_i32(tmp, tmp, offset);
1901 tcg_gen_addi_i32(tmp, tmp, -offset);
1902 store_reg(s, rd, tmp);
1903 } else if (!(insn & (1 << 23)))
1908 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1910 int rd = (insn >> 0) & 0xf;
1913 if (insn & (1 << 8)) {
1914 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1917 tmp = iwmmxt_load_creg(rd);
1920 tmp = tcg_temp_new_i32();
1921 iwmmxt_load_reg(cpu_V0, rd);
1922 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1924 tcg_gen_andi_i32(tmp, tmp, mask);
1925 tcg_gen_mov_i32(dest, tmp);
1926 tcg_temp_free_i32(tmp);
1930 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1931 (ie. an undefined instruction). */
1932 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1935 int rdhi, rdlo, rd0, rd1, i;
1937 TCGv_i32 tmp, tmp2, tmp3;
1939 if ((insn & 0x0e000e00) == 0x0c000000) {
1940 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1942 rdlo = (insn >> 12) & 0xf;
1943 rdhi = (insn >> 16) & 0xf;
1944 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1945 iwmmxt_load_reg(cpu_V0, wrd);
1946 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1947 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1948 tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
1949 } else { /* TMCRR */
1950 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1951 iwmmxt_store_reg(cpu_V0, wrd);
1952 gen_op_iwmmxt_set_mup();
1957 wrd = (insn >> 12) & 0xf;
1958 addr = tcg_temp_new_i32();
1959 if (gen_iwmmxt_address(s, insn, addr)) {
1960 tcg_temp_free_i32(addr);
1963 if (insn & ARM_CP_RW_BIT) {
1964 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1965 tmp = tcg_temp_new_i32();
1966 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1967 iwmmxt_store_creg(wrd, tmp);
1970 if (insn & (1 << 8)) {
1971 if (insn & (1 << 22)) { /* WLDRD */
1972 gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1974 } else { /* WLDRW wRd */
1975 tmp = tcg_temp_new_i32();
1976 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1979 tmp = tcg_temp_new_i32();
1980 if (insn & (1 << 22)) { /* WLDRH */
1981 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1982 } else { /* WLDRB */
1983 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1987 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1988 tcg_temp_free_i32(tmp);
1990 gen_op_iwmmxt_movq_wRn_M0(wrd);
1993 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1994 tmp = iwmmxt_load_creg(wrd);
1995 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1997 gen_op_iwmmxt_movq_M0_wRn(wrd);
1998 tmp = tcg_temp_new_i32();
1999 if (insn & (1 << 8)) {
2000 if (insn & (1 << 22)) { /* WSTRD */
2001 gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
2002 } else { /* WSTRW wRd */
2003 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2004 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
2007 if (insn & (1 << 22)) { /* WSTRH */
2008 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2009 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
2010 } else { /* WSTRB */
2011 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2012 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
2016 tcg_temp_free_i32(tmp);
2018 tcg_temp_free_i32(addr);
2022 if ((insn & 0x0f000000) != 0x0e000000)
2025 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
2026 case 0x000: /* WOR */
2027 wrd = (insn >> 12) & 0xf;
2028 rd0 = (insn >> 0) & 0xf;
2029 rd1 = (insn >> 16) & 0xf;
2030 gen_op_iwmmxt_movq_M0_wRn(rd0);
2031 gen_op_iwmmxt_orq_M0_wRn(rd1);
2032 gen_op_iwmmxt_setpsr_nz();
2033 gen_op_iwmmxt_movq_wRn_M0(wrd);
2034 gen_op_iwmmxt_set_mup();
2035 gen_op_iwmmxt_set_cup();
2037 case 0x011: /* TMCR */
2040 rd = (insn >> 12) & 0xf;
2041 wrd = (insn >> 16) & 0xf;
2043 case ARM_IWMMXT_wCID:
2044 case ARM_IWMMXT_wCASF:
2046 case ARM_IWMMXT_wCon:
2047 gen_op_iwmmxt_set_cup();
2049 case ARM_IWMMXT_wCSSF:
2050 tmp = iwmmxt_load_creg(wrd);
2051 tmp2 = load_reg(s, rd);
2052 tcg_gen_andc_i32(tmp, tmp, tmp2);
2053 tcg_temp_free_i32(tmp2);
2054 iwmmxt_store_creg(wrd, tmp);
2056 case ARM_IWMMXT_wCGR0:
2057 case ARM_IWMMXT_wCGR1:
2058 case ARM_IWMMXT_wCGR2:
2059 case ARM_IWMMXT_wCGR3:
2060 gen_op_iwmmxt_set_cup();
2061 tmp = load_reg(s, rd);
2062 iwmmxt_store_creg(wrd, tmp);
2068 case 0x100: /* WXOR */
2069 wrd = (insn >> 12) & 0xf;
2070 rd0 = (insn >> 0) & 0xf;
2071 rd1 = (insn >> 16) & 0xf;
2072 gen_op_iwmmxt_movq_M0_wRn(rd0);
2073 gen_op_iwmmxt_xorq_M0_wRn(rd1);
2074 gen_op_iwmmxt_setpsr_nz();
2075 gen_op_iwmmxt_movq_wRn_M0(wrd);
2076 gen_op_iwmmxt_set_mup();
2077 gen_op_iwmmxt_set_cup();
2079 case 0x111: /* TMRC */
2082 rd = (insn >> 12) & 0xf;
2083 wrd = (insn >> 16) & 0xf;
2084 tmp = iwmmxt_load_creg(wrd);
2085 store_reg(s, rd, tmp);
2087 case 0x300: /* WANDN */
2088 wrd = (insn >> 12) & 0xf;
2089 rd0 = (insn >> 0) & 0xf;
2090 rd1 = (insn >> 16) & 0xf;
2091 gen_op_iwmmxt_movq_M0_wRn(rd0);
2092 tcg_gen_neg_i64(cpu_M0, cpu_M0);
2093 gen_op_iwmmxt_andq_M0_wRn(rd1);
2094 gen_op_iwmmxt_setpsr_nz();
2095 gen_op_iwmmxt_movq_wRn_M0(wrd);
2096 gen_op_iwmmxt_set_mup();
2097 gen_op_iwmmxt_set_cup();
2099 case 0x200: /* WAND */
2100 wrd = (insn >> 12) & 0xf;
2101 rd0 = (insn >> 0) & 0xf;
2102 rd1 = (insn >> 16) & 0xf;
2103 gen_op_iwmmxt_movq_M0_wRn(rd0);
2104 gen_op_iwmmxt_andq_M0_wRn(rd1);
2105 gen_op_iwmmxt_setpsr_nz();
2106 gen_op_iwmmxt_movq_wRn_M0(wrd);
2107 gen_op_iwmmxt_set_mup();
2108 gen_op_iwmmxt_set_cup();
2110 case 0x810: case 0xa10: /* WMADD */
2111 wrd = (insn >> 12) & 0xf;
2112 rd0 = (insn >> 0) & 0xf;
2113 rd1 = (insn >> 16) & 0xf;
2114 gen_op_iwmmxt_movq_M0_wRn(rd0);
2115 if (insn & (1 << 21))
2116 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
2118 gen_op_iwmmxt_madduq_M0_wRn(rd1);
2119 gen_op_iwmmxt_movq_wRn_M0(wrd);
2120 gen_op_iwmmxt_set_mup();
2122 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
2123 wrd = (insn >> 12) & 0xf;
2124 rd0 = (insn >> 16) & 0xf;
2125 rd1 = (insn >> 0) & 0xf;
2126 gen_op_iwmmxt_movq_M0_wRn(rd0);
2127 switch ((insn >> 22) & 3) {
2129 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
2132 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
2135 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
2140 gen_op_iwmmxt_movq_wRn_M0(wrd);
2141 gen_op_iwmmxt_set_mup();
2142 gen_op_iwmmxt_set_cup();
2144 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
2145 wrd = (insn >> 12) & 0xf;
2146 rd0 = (insn >> 16) & 0xf;
2147 rd1 = (insn >> 0) & 0xf;
2148 gen_op_iwmmxt_movq_M0_wRn(rd0);
2149 switch ((insn >> 22) & 3) {
2151 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
2154 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
2157 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
2162 gen_op_iwmmxt_movq_wRn_M0(wrd);
2163 gen_op_iwmmxt_set_mup();
2164 gen_op_iwmmxt_set_cup();
2166 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
2167 wrd = (insn >> 12) & 0xf;
2168 rd0 = (insn >> 16) & 0xf;
2169 rd1 = (insn >> 0) & 0xf;
2170 gen_op_iwmmxt_movq_M0_wRn(rd0);
2171 if (insn & (1 << 22))
2172 gen_op_iwmmxt_sadw_M0_wRn(rd1);
2174 gen_op_iwmmxt_sadb_M0_wRn(rd1);
2175 if (!(insn & (1 << 20)))
2176 gen_op_iwmmxt_addl_M0_wRn(wrd);
2177 gen_op_iwmmxt_movq_wRn_M0(wrd);
2178 gen_op_iwmmxt_set_mup();
2180 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
2181 wrd = (insn >> 12) & 0xf;
2182 rd0 = (insn >> 16) & 0xf;
2183 rd1 = (insn >> 0) & 0xf;
2184 gen_op_iwmmxt_movq_M0_wRn(rd0);
2185 if (insn & (1 << 21)) {
2186 if (insn & (1 << 20))
2187 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
2189 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
2191 if (insn & (1 << 20))
2192 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
2194 gen_op_iwmmxt_mululw_M0_wRn(rd1);
2196 gen_op_iwmmxt_movq_wRn_M0(wrd);
2197 gen_op_iwmmxt_set_mup();
2199 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
2200 wrd = (insn >> 12) & 0xf;
2201 rd0 = (insn >> 16) & 0xf;
2202 rd1 = (insn >> 0) & 0xf;
2203 gen_op_iwmmxt_movq_M0_wRn(rd0);
2204 if (insn & (1 << 21))
2205 gen_op_iwmmxt_macsw_M0_wRn(rd1);
2207 gen_op_iwmmxt_macuw_M0_wRn(rd1);
2208 if (!(insn & (1 << 20))) {
2209 iwmmxt_load_reg(cpu_V1, wrd);
2210 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
2212 gen_op_iwmmxt_movq_wRn_M0(wrd);
2213 gen_op_iwmmxt_set_mup();
2215 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
2216 wrd = (insn >> 12) & 0xf;
2217 rd0 = (insn >> 16) & 0xf;
2218 rd1 = (insn >> 0) & 0xf;
2219 gen_op_iwmmxt_movq_M0_wRn(rd0);
2220 switch ((insn >> 22) & 3) {
2222 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
2225 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
2228 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
2233 gen_op_iwmmxt_movq_wRn_M0(wrd);
2234 gen_op_iwmmxt_set_mup();
2235 gen_op_iwmmxt_set_cup();
2237 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
2238 wrd = (insn >> 12) & 0xf;
2239 rd0 = (insn >> 16) & 0xf;
2240 rd1 = (insn >> 0) & 0xf;
2241 gen_op_iwmmxt_movq_M0_wRn(rd0);
2242 if (insn & (1 << 22)) {
2243 if (insn & (1 << 20))
2244 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
2246 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
2248 if (insn & (1 << 20))
2249 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
2251 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
2253 gen_op_iwmmxt_movq_wRn_M0(wrd);
2254 gen_op_iwmmxt_set_mup();
2255 gen_op_iwmmxt_set_cup();
2257 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
2258 wrd = (insn >> 12) & 0xf;
2259 rd0 = (insn >> 16) & 0xf;
2260 rd1 = (insn >> 0) & 0xf;
2261 gen_op_iwmmxt_movq_M0_wRn(rd0);
2262 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
2263 tcg_gen_andi_i32(tmp, tmp, 7);
2264 iwmmxt_load_reg(cpu_V1, rd1);
2265 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2266 tcg_temp_free_i32(tmp);
2267 gen_op_iwmmxt_movq_wRn_M0(wrd);
2268 gen_op_iwmmxt_set_mup();
2270 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
2271 if (((insn >> 6) & 3) == 3)
2273 rd = (insn >> 12) & 0xf;
2274 wrd = (insn >> 16) & 0xf;
2275 tmp = load_reg(s, rd);
2276 gen_op_iwmmxt_movq_M0_wRn(wrd);
2277 switch ((insn >> 6) & 3) {
2279 tmp2 = tcg_const_i32(0xff);
2280 tmp3 = tcg_const_i32((insn & 7) << 3);
2283 tmp2 = tcg_const_i32(0xffff);
2284 tmp3 = tcg_const_i32((insn & 3) << 4);
2287 tmp2 = tcg_const_i32(0xffffffff);
2288 tmp3 = tcg_const_i32((insn & 1) << 5);
2294 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
2295 tcg_temp_free_i32(tmp3);
2296 tcg_temp_free_i32(tmp2);
2297 tcg_temp_free_i32(tmp);
2298 gen_op_iwmmxt_movq_wRn_M0(wrd);
2299 gen_op_iwmmxt_set_mup();
2301 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
2302 rd = (insn >> 12) & 0xf;
2303 wrd = (insn >> 16) & 0xf;
2304 if (rd == 15 || ((insn >> 22) & 3) == 3)
2306 gen_op_iwmmxt_movq_M0_wRn(wrd);
2307 tmp = tcg_temp_new_i32();
2308 switch ((insn >> 22) & 3) {
2310 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
2311 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2313 tcg_gen_ext8s_i32(tmp, tmp);
2315 tcg_gen_andi_i32(tmp, tmp, 0xff);
2319 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
2320 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2322 tcg_gen_ext16s_i32(tmp, tmp);
2324 tcg_gen_andi_i32(tmp, tmp, 0xffff);
2328 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
2329 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2332 store_reg(s, rd, tmp);
2334 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
2335 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2337 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2338 switch ((insn >> 22) & 3) {
2340 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
2343 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
2346 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
2349 tcg_gen_shli_i32(tmp, tmp, 28);
2351 tcg_temp_free_i32(tmp);
2353 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
2354 if (((insn >> 6) & 3) == 3)
2356 rd = (insn >> 12) & 0xf;
2357 wrd = (insn >> 16) & 0xf;
2358 tmp = load_reg(s, rd);
2359 switch ((insn >> 6) & 3) {
2361 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
2364 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
2367 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
2370 tcg_temp_free_i32(tmp);
2371 gen_op_iwmmxt_movq_wRn_M0(wrd);
2372 gen_op_iwmmxt_set_mup();
2374 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
2375 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2377 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2378 tmp2 = tcg_temp_new_i32();
2379 tcg_gen_mov_i32(tmp2, tmp);
2380 switch ((insn >> 22) & 3) {
2382 for (i = 0; i < 7; i ++) {
2383 tcg_gen_shli_i32(tmp2, tmp2, 4);
2384 tcg_gen_and_i32(tmp, tmp, tmp2);
2388 for (i = 0; i < 3; i ++) {
2389 tcg_gen_shli_i32(tmp2, tmp2, 8);
2390 tcg_gen_and_i32(tmp, tmp, tmp2);
2394 tcg_gen_shli_i32(tmp2, tmp2, 16);
2395 tcg_gen_and_i32(tmp, tmp, tmp2);
2399 tcg_temp_free_i32(tmp2);
2400 tcg_temp_free_i32(tmp);
2402 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
2403 wrd = (insn >> 12) & 0xf;
2404 rd0 = (insn >> 16) & 0xf;
2405 gen_op_iwmmxt_movq_M0_wRn(rd0);
2406 switch ((insn >> 22) & 3) {
2408 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2411 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2414 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2419 gen_op_iwmmxt_movq_wRn_M0(wrd);
2420 gen_op_iwmmxt_set_mup();
2422 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
2423 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2425 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2426 tmp2 = tcg_temp_new_i32();
2427 tcg_gen_mov_i32(tmp2, tmp);
2428 switch ((insn >> 22) & 3) {
2430 for (i = 0; i < 7; i ++) {
2431 tcg_gen_shli_i32(tmp2, tmp2, 4);
2432 tcg_gen_or_i32(tmp, tmp, tmp2);
2436 for (i = 0; i < 3; i ++) {
2437 tcg_gen_shli_i32(tmp2, tmp2, 8);
2438 tcg_gen_or_i32(tmp, tmp, tmp2);
2442 tcg_gen_shli_i32(tmp2, tmp2, 16);
2443 tcg_gen_or_i32(tmp, tmp, tmp2);
2447 tcg_temp_free_i32(tmp2);
2448 tcg_temp_free_i32(tmp);
2450 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
2451 rd = (insn >> 12) & 0xf;
2452 rd0 = (insn >> 16) & 0xf;
2453 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2455 gen_op_iwmmxt_movq_M0_wRn(rd0);
2456 tmp = tcg_temp_new_i32();
2457 switch ((insn >> 22) & 3) {
2459 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2462 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2465 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2468 store_reg(s, rd, tmp);
2470 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
2471 case 0x906: case 0xb06: case 0xd06: case 0xf06:
2472 wrd = (insn >> 12) & 0xf;
2473 rd0 = (insn >> 16) & 0xf;
2474 rd1 = (insn >> 0) & 0xf;
2475 gen_op_iwmmxt_movq_M0_wRn(rd0);
2476 switch ((insn >> 22) & 3) {
2478 if (insn & (1 << 21))
2479 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2481 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2484 if (insn & (1 << 21))
2485 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2487 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2490 if (insn & (1 << 21))
2491 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2493 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2498 gen_op_iwmmxt_movq_wRn_M0(wrd);
2499 gen_op_iwmmxt_set_mup();
2500 gen_op_iwmmxt_set_cup();
2502 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
2503 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2504 wrd = (insn >> 12) & 0xf;
2505 rd0 = (insn >> 16) & 0xf;
2506 gen_op_iwmmxt_movq_M0_wRn(rd0);
2507 switch ((insn >> 22) & 3) {
2509 if (insn & (1 << 21))
2510 gen_op_iwmmxt_unpacklsb_M0();
2512 gen_op_iwmmxt_unpacklub_M0();
2515 if (insn & (1 << 21))
2516 gen_op_iwmmxt_unpacklsw_M0();
2518 gen_op_iwmmxt_unpackluw_M0();
2521 if (insn & (1 << 21))
2522 gen_op_iwmmxt_unpacklsl_M0();
2524 gen_op_iwmmxt_unpacklul_M0();
2529 gen_op_iwmmxt_movq_wRn_M0(wrd);
2530 gen_op_iwmmxt_set_mup();
2531 gen_op_iwmmxt_set_cup();
2533 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
2534 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2535 wrd = (insn >> 12) & 0xf;
2536 rd0 = (insn >> 16) & 0xf;
2537 gen_op_iwmmxt_movq_M0_wRn(rd0);
2538 switch ((insn >> 22) & 3) {
2540 if (insn & (1 << 21))
2541 gen_op_iwmmxt_unpackhsb_M0();
2543 gen_op_iwmmxt_unpackhub_M0();
2546 if (insn & (1 << 21))
2547 gen_op_iwmmxt_unpackhsw_M0();
2549 gen_op_iwmmxt_unpackhuw_M0();
2552 if (insn & (1 << 21))
2553 gen_op_iwmmxt_unpackhsl_M0();
2555 gen_op_iwmmxt_unpackhul_M0();
2560 gen_op_iwmmxt_movq_wRn_M0(wrd);
2561 gen_op_iwmmxt_set_mup();
2562 gen_op_iwmmxt_set_cup();
2564 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2565 case 0x214: case 0x614: case 0xa14: case 0xe14:
2566 if (((insn >> 22) & 3) == 0)
2568 wrd = (insn >> 12) & 0xf;
2569 rd0 = (insn >> 16) & 0xf;
2570 gen_op_iwmmxt_movq_M0_wRn(rd0);
2571 tmp = tcg_temp_new_i32();
2572 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2573 tcg_temp_free_i32(tmp);
2576 switch ((insn >> 22) & 3) {
2578 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2581 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2584 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2587 tcg_temp_free_i32(tmp);
2588 gen_op_iwmmxt_movq_wRn_M0(wrd);
2589 gen_op_iwmmxt_set_mup();
2590 gen_op_iwmmxt_set_cup();
2592 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2593 case 0x014: case 0x414: case 0x814: case 0xc14:
2594 if (((insn >> 22) & 3) == 0)
2596 wrd = (insn >> 12) & 0xf;
2597 rd0 = (insn >> 16) & 0xf;
2598 gen_op_iwmmxt_movq_M0_wRn(rd0);
2599 tmp = tcg_temp_new_i32();
2600 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2601 tcg_temp_free_i32(tmp);
2604 switch ((insn >> 22) & 3) {
2606 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2609 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2612 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2615 tcg_temp_free_i32(tmp);
2616 gen_op_iwmmxt_movq_wRn_M0(wrd);
2617 gen_op_iwmmxt_set_mup();
2618 gen_op_iwmmxt_set_cup();
2620 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2621 case 0x114: case 0x514: case 0x914: case 0xd14:
2622 if (((insn >> 22) & 3) == 0)
2624 wrd = (insn >> 12) & 0xf;
2625 rd0 = (insn >> 16) & 0xf;
2626 gen_op_iwmmxt_movq_M0_wRn(rd0);
2627 tmp = tcg_temp_new_i32();
2628 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2629 tcg_temp_free_i32(tmp);
2632 switch ((insn >> 22) & 3) {
2634 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2637 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2640 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2643 tcg_temp_free_i32(tmp);
2644 gen_op_iwmmxt_movq_wRn_M0(wrd);
2645 gen_op_iwmmxt_set_mup();
2646 gen_op_iwmmxt_set_cup();
2648 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2649 case 0x314: case 0x714: case 0xb14: case 0xf14:
2650 if (((insn >> 22) & 3) == 0)
2652 wrd = (insn >> 12) & 0xf;
2653 rd0 = (insn >> 16) & 0xf;
2654 gen_op_iwmmxt_movq_M0_wRn(rd0);
2655 tmp = tcg_temp_new_i32();
2656 switch ((insn >> 22) & 3) {
2658 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2659 tcg_temp_free_i32(tmp);
2662 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2665 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2666 tcg_temp_free_i32(tmp);
2669 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2672 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2673 tcg_temp_free_i32(tmp);
2676 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2679 tcg_temp_free_i32(tmp);
2680 gen_op_iwmmxt_movq_wRn_M0(wrd);
2681 gen_op_iwmmxt_set_mup();
2682 gen_op_iwmmxt_set_cup();
2684 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2685 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2686 wrd = (insn >> 12) & 0xf;
2687 rd0 = (insn >> 16) & 0xf;
2688 rd1 = (insn >> 0) & 0xf;
2689 gen_op_iwmmxt_movq_M0_wRn(rd0);
2690 switch ((insn >> 22) & 3) {
2692 if (insn & (1 << 21))
2693 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2695 gen_op_iwmmxt_minub_M0_wRn(rd1);
2698 if (insn & (1 << 21))
2699 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2701 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2704 if (insn & (1 << 21))
2705 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2707 gen_op_iwmmxt_minul_M0_wRn(rd1);
2712 gen_op_iwmmxt_movq_wRn_M0(wrd);
2713 gen_op_iwmmxt_set_mup();
2715 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2716 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2717 wrd = (insn >> 12) & 0xf;
2718 rd0 = (insn >> 16) & 0xf;
2719 rd1 = (insn >> 0) & 0xf;
2720 gen_op_iwmmxt_movq_M0_wRn(rd0);
2721 switch ((insn >> 22) & 3) {
2723 if (insn & (1 << 21))
2724 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2726 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2729 if (insn & (1 << 21))
2730 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2732 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2735 if (insn & (1 << 21))
2736 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2738 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2743 gen_op_iwmmxt_movq_wRn_M0(wrd);
2744 gen_op_iwmmxt_set_mup();
2746 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2747 case 0x402: case 0x502: case 0x602: case 0x702:
2748 wrd = (insn >> 12) & 0xf;
2749 rd0 = (insn >> 16) & 0xf;
2750 rd1 = (insn >> 0) & 0xf;
2751 gen_op_iwmmxt_movq_M0_wRn(rd0);
2752 tmp = tcg_const_i32((insn >> 20) & 3);
2753 iwmmxt_load_reg(cpu_V1, rd1);
2754 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2755 tcg_temp_free_i32(tmp);
2756 gen_op_iwmmxt_movq_wRn_M0(wrd);
2757 gen_op_iwmmxt_set_mup();
2759 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2760 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2761 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2762 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2763 wrd = (insn >> 12) & 0xf;
2764 rd0 = (insn >> 16) & 0xf;
2765 rd1 = (insn >> 0) & 0xf;
2766 gen_op_iwmmxt_movq_M0_wRn(rd0);
2767 switch ((insn >> 20) & 0xf) {
2769 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2772 gen_op_iwmmxt_subub_M0_wRn(rd1);
2775 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2778 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2781 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2784 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2787 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2790 gen_op_iwmmxt_subul_M0_wRn(rd1);
2793 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2798 gen_op_iwmmxt_movq_wRn_M0(wrd);
2799 gen_op_iwmmxt_set_mup();
2800 gen_op_iwmmxt_set_cup();
2802 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2803 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2804 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2805 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2806 wrd = (insn >> 12) & 0xf;
2807 rd0 = (insn >> 16) & 0xf;
2808 gen_op_iwmmxt_movq_M0_wRn(rd0);
2809 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2810 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2811 tcg_temp_free_i32(tmp);
2812 gen_op_iwmmxt_movq_wRn_M0(wrd);
2813 gen_op_iwmmxt_set_mup();
2814 gen_op_iwmmxt_set_cup();
2816 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2817 case 0x418: case 0x518: case 0x618: case 0x718:
2818 case 0x818: case 0x918: case 0xa18: case 0xb18:
2819 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2820 wrd = (insn >> 12) & 0xf;
2821 rd0 = (insn >> 16) & 0xf;
2822 rd1 = (insn >> 0) & 0xf;
2823 gen_op_iwmmxt_movq_M0_wRn(rd0);
2824 switch ((insn >> 20) & 0xf) {
2826 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2829 gen_op_iwmmxt_addub_M0_wRn(rd1);
2832 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2835 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2838 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2841 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2844 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2847 gen_op_iwmmxt_addul_M0_wRn(rd1);
2850 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2855 gen_op_iwmmxt_movq_wRn_M0(wrd);
2856 gen_op_iwmmxt_set_mup();
2857 gen_op_iwmmxt_set_cup();
2859 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2860 case 0x408: case 0x508: case 0x608: case 0x708:
2861 case 0x808: case 0x908: case 0xa08: case 0xb08:
2862 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2863 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2865 wrd = (insn >> 12) & 0xf;
2866 rd0 = (insn >> 16) & 0xf;
2867 rd1 = (insn >> 0) & 0xf;
2868 gen_op_iwmmxt_movq_M0_wRn(rd0);
2869 switch ((insn >> 22) & 3) {
2871 if (insn & (1 << 21))
2872 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2874 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2877 if (insn & (1 << 21))
2878 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2880 gen_op_iwmmxt_packul_M0_wRn(rd1);
2883 if (insn & (1 << 21))
2884 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2886 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2889 gen_op_iwmmxt_movq_wRn_M0(wrd);
2890 gen_op_iwmmxt_set_mup();
2891 gen_op_iwmmxt_set_cup();
2893 case 0x201: case 0x203: case 0x205: case 0x207:
2894 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2895 case 0x211: case 0x213: case 0x215: case 0x217:
2896 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2897 wrd = (insn >> 5) & 0xf;
2898 rd0 = (insn >> 12) & 0xf;
2899 rd1 = (insn >> 0) & 0xf;
2900 if (rd0 == 0xf || rd1 == 0xf)
2902 gen_op_iwmmxt_movq_M0_wRn(wrd);
2903 tmp = load_reg(s, rd0);
2904 tmp2 = load_reg(s, rd1);
2905 switch ((insn >> 16) & 0xf) {
2906 case 0x0: /* TMIA */
2907 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2909 case 0x8: /* TMIAPH */
2910 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2912 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2913 if (insn & (1 << 16))
2914 tcg_gen_shri_i32(tmp, tmp, 16);
2915 if (insn & (1 << 17))
2916 tcg_gen_shri_i32(tmp2, tmp2, 16);
2917 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2920 tcg_temp_free_i32(tmp2);
2921 tcg_temp_free_i32(tmp);
2924 tcg_temp_free_i32(tmp2);
2925 tcg_temp_free_i32(tmp);
2926 gen_op_iwmmxt_movq_wRn_M0(wrd);
2927 gen_op_iwmmxt_set_mup();
2936 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2937 (ie. an undefined instruction). */
2938 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2940 int acc, rd0, rd1, rdhi, rdlo;
2943 if ((insn & 0x0ff00f10) == 0x0e200010) {
2944 /* Multiply with Internal Accumulate Format */
2945 rd0 = (insn >> 12) & 0xf;
2947 acc = (insn >> 5) & 7;
2952 tmp = load_reg(s, rd0);
2953 tmp2 = load_reg(s, rd1);
2954 switch ((insn >> 16) & 0xf) {
2956 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2958 case 0x8: /* MIAPH */
2959 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2961 case 0xc: /* MIABB */
2962 case 0xd: /* MIABT */
2963 case 0xe: /* MIATB */
2964 case 0xf: /* MIATT */
2965 if (insn & (1 << 16))
2966 tcg_gen_shri_i32(tmp, tmp, 16);
2967 if (insn & (1 << 17))
2968 tcg_gen_shri_i32(tmp2, tmp2, 16);
2969 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2974 tcg_temp_free_i32(tmp2);
2975 tcg_temp_free_i32(tmp);
2977 gen_op_iwmmxt_movq_wRn_M0(acc);
2981 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2982 /* Internal Accumulator Access Format */
2983 rdhi = (insn >> 16) & 0xf;
2984 rdlo = (insn >> 12) & 0xf;
2990 if (insn & ARM_CP_RW_BIT) { /* MRA */
2991 iwmmxt_load_reg(cpu_V0, acc);
2992 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2993 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
2994 tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
2995 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2997 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2998 iwmmxt_store_reg(cpu_V0, acc);
3006 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
3007 #define VFP_SREG(insn, bigbit, smallbit) \
3008 ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
3009 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
3010 if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
3011 reg = (((insn) >> (bigbit)) & 0x0f) \
3012 | (((insn) >> ((smallbit) - 4)) & 0x10); \
3014 if (insn & (1 << (smallbit))) \
3016 reg = ((insn) >> (bigbit)) & 0x0f; \
3019 #define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
3020 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
3021 #define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
3022 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
3023 #define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
3024 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
3026 /* Move between integer and VFP cores. */
3027 static TCGv_i32 gen_vfp_mrs(void)
3029 TCGv_i32 tmp = tcg_temp_new_i32();
3030 tcg_gen_mov_i32(tmp, cpu_F0s);
3034 static void gen_vfp_msr(TCGv_i32 tmp)
3036 tcg_gen_mov_i32(cpu_F0s, tmp);
3037 tcg_temp_free_i32(tmp);
3040 static void gen_neon_dup_low16(TCGv_i32 var)
3042 TCGv_i32 tmp = tcg_temp_new_i32();
3043 tcg_gen_ext16u_i32(var, var);
3044 tcg_gen_shli_i32(tmp, var, 16);
3045 tcg_gen_or_i32(var, var, tmp);
3046 tcg_temp_free_i32(tmp);
3049 static void gen_neon_dup_high16(TCGv_i32 var)
3051 TCGv_i32 tmp = tcg_temp_new_i32();
3052 tcg_gen_andi_i32(var, var, 0xffff0000);
3053 tcg_gen_shri_i32(tmp, var, 16);
3054 tcg_gen_or_i32(var, var, tmp);
3055 tcg_temp_free_i32(tmp);
3058 static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
3061 uint32_t cc = extract32(insn, 20, 2);
3064 TCGv_i64 frn, frm, dest;
3065 TCGv_i64 tmp, zero, zf, nf, vf;
3067 zero = tcg_const_i64(0);
3069 frn = tcg_temp_new_i64();
3070 frm = tcg_temp_new_i64();
3071 dest = tcg_temp_new_i64();
3073 zf = tcg_temp_new_i64();
3074 nf = tcg_temp_new_i64();
3075 vf = tcg_temp_new_i64();
3077 tcg_gen_extu_i32_i64(zf, cpu_ZF);
3078 tcg_gen_ext_i32_i64(nf, cpu_NF);
3079 tcg_gen_ext_i32_i64(vf, cpu_VF);
3081 tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
3082 tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
3085 tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
3089 tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
3092 case 2: /* ge: N == V -> N ^ V == 0 */
3093 tmp = tcg_temp_new_i64();
3094 tcg_gen_xor_i64(tmp, vf, nf);
3095 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
3097 tcg_temp_free_i64(tmp);
3099 case 3: /* gt: !Z && N == V */
3100 tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
3102 tmp = tcg_temp_new_i64();
3103 tcg_gen_xor_i64(tmp, vf, nf);
3104 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
3106 tcg_temp_free_i64(tmp);
3109 tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
3110 tcg_temp_free_i64(frn);
3111 tcg_temp_free_i64(frm);
3112 tcg_temp_free_i64(dest);
3114 tcg_temp_free_i64(zf);
3115 tcg_temp_free_i64(nf);
3116 tcg_temp_free_i64(vf);
3118 tcg_temp_free_i64(zero);
3120 TCGv_i32 frn, frm, dest;
3123 zero = tcg_const_i32(0);
3125 frn = tcg_temp_new_i32();
3126 frm = tcg_temp_new_i32();
3127 dest = tcg_temp_new_i32();
3128 tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
3129 tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
3132 tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
3136 tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
3139 case 2: /* ge: N == V -> N ^ V == 0 */
3140 tmp = tcg_temp_new_i32();
3141 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
3142 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
3144 tcg_temp_free_i32(tmp);
3146 case 3: /* gt: !Z && N == V */
3147 tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
3149 tmp = tcg_temp_new_i32();
3150 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
3151 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
3153 tcg_temp_free_i32(tmp);
3156 tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
3157 tcg_temp_free_i32(frn);
3158 tcg_temp_free_i32(frm);
3159 tcg_temp_free_i32(dest);
3161 tcg_temp_free_i32(zero);
3167 static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
3168 uint32_t rm, uint32_t dp)
3170 uint32_t vmin = extract32(insn, 6, 1);
3171 TCGv_ptr fpst = get_fpstatus_ptr(0);
3174 TCGv_i64 frn, frm, dest;
3176 frn = tcg_temp_new_i64();
3177 frm = tcg_temp_new_i64();
3178 dest = tcg_temp_new_i64();
3180 tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
3181 tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
3183 gen_helper_vfp_minnumd(dest, frn, frm, fpst);
3185 gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
3187 tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
3188 tcg_temp_free_i64(frn);
3189 tcg_temp_free_i64(frm);
3190 tcg_temp_free_i64(dest);
3192 TCGv_i32 frn, frm, dest;
3194 frn = tcg_temp_new_i32();
3195 frm = tcg_temp_new_i32();
3196 dest = tcg_temp_new_i32();
3198 tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
3199 tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
3201 gen_helper_vfp_minnums(dest, frn, frm, fpst);
3203 gen_helper_vfp_maxnums(dest, frn, frm, fpst);
3205 tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
3206 tcg_temp_free_i32(frn);
3207 tcg_temp_free_i32(frm);
3208 tcg_temp_free_i32(dest);
3211 tcg_temp_free_ptr(fpst);
3215 static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
3218 TCGv_ptr fpst = get_fpstatus_ptr(0);
3221 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
3222 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3227 tcg_op = tcg_temp_new_i64();
3228 tcg_res = tcg_temp_new_i64();
3229 tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
3230 gen_helper_rintd(tcg_res, tcg_op, fpst);
3231 tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
3232 tcg_temp_free_i64(tcg_op);
3233 tcg_temp_free_i64(tcg_res);
3237 tcg_op = tcg_temp_new_i32();
3238 tcg_res = tcg_temp_new_i32();
3239 tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
3240 gen_helper_rints(tcg_res, tcg_op, fpst);
3241 tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
3242 tcg_temp_free_i32(tcg_op);
3243 tcg_temp_free_i32(tcg_res);
3246 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3247 tcg_temp_free_i32(tcg_rmode);
3249 tcg_temp_free_ptr(fpst);
3253 static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
3256 bool is_signed = extract32(insn, 7, 1);
3257 TCGv_ptr fpst = get_fpstatus_ptr(0);
3258 TCGv_i32 tcg_rmode, tcg_shift;
3260 tcg_shift = tcg_const_i32(0);
3262 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
3263 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3266 TCGv_i64 tcg_double, tcg_res;
3268 /* Rd is encoded as a single precision register even when the source
3269 * is double precision.
3271 rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
3272 tcg_double = tcg_temp_new_i64();
3273 tcg_res = tcg_temp_new_i64();
3274 tcg_tmp = tcg_temp_new_i32();
3275 tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
3277 gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
3279 gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
3281 tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
3282 tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
3283 tcg_temp_free_i32(tcg_tmp);
3284 tcg_temp_free_i64(tcg_res);
3285 tcg_temp_free_i64(tcg_double);
3287 TCGv_i32 tcg_single, tcg_res;
3288 tcg_single = tcg_temp_new_i32();
3289 tcg_res = tcg_temp_new_i32();
3290 tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
3292 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
3294 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
3296 tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
3297 tcg_temp_free_i32(tcg_res);
3298 tcg_temp_free_i32(tcg_single);
3301 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3302 tcg_temp_free_i32(tcg_rmode);
3304 tcg_temp_free_i32(tcg_shift);
3306 tcg_temp_free_ptr(fpst);
3311 /* Table for converting the most common AArch32 encoding of
3312 * rounding mode to arm_fprounding order (which matches the
3313 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
3315 static const uint8_t fp_decode_rm[] = {
3322 static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
3324 uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
3326 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3331 VFP_DREG_D(rd, insn);
3332 VFP_DREG_N(rn, insn);
3333 VFP_DREG_M(rm, insn);
3335 rd = VFP_SREG_D(insn);
3336 rn = VFP_SREG_N(insn);
3337 rm = VFP_SREG_M(insn);
3340 if ((insn & 0x0f800e50) == 0x0e000a00) {
3341 return handle_vsel(insn, rd, rn, rm, dp);
3342 } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
3343 return handle_vminmaxnm(insn, rd, rn, rm, dp);
3344 } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
3345 /* VRINTA, VRINTN, VRINTP, VRINTM */
3346 int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3347 return handle_vrint(insn, rd, rm, dp, rounding);
3348 } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
3349 /* VCVTA, VCVTN, VCVTP, VCVTM */
3350 int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3351 return handle_vcvt(insn, rd, rm, dp, rounding);
3356 /* Disassemble a VFP instruction. Returns nonzero if an error occurred
3357 (ie. an undefined instruction). */
3358 static int disas_vfp_insn(DisasContext *s, uint32_t insn)
3360 uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
3366 if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
3370 /* FIXME: this access check should not take precedence over UNDEF
3371 * for invalid encodings; we will generate incorrect syndrome information
3372 * for attempts to execute invalid vfp/neon encodings with FP disabled.
3374 if (s->fp_excp_el) {
3375 gen_exception_insn(s, 4, EXCP_UDEF,
3376 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
3380 if (!s->vfp_enabled) {
3381 /* VFP disabled. Only allow fmxr/fmrx to/from some control regs. */
3382 if ((insn & 0x0fe00fff) != 0x0ee00a10)
3384 rn = (insn >> 16) & 0xf;
3385 if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2
3386 && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) {
3391 if (extract32(insn, 28, 4) == 0xf) {
3392 /* Encodings with T=1 (Thumb) or unconditional (ARM):
3393 * only used in v8 and above.
3395 return disas_vfp_v8_insn(s, insn);
3398 dp = ((insn & 0xf00) == 0xb00);
3399 switch ((insn >> 24) & 0xf) {
3401 if (insn & (1 << 4)) {
3402 /* single register transfer */
3403 rd = (insn >> 12) & 0xf;
3408 VFP_DREG_N(rn, insn);
3411 if (insn & 0x00c00060
3412 && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
3416 pass = (insn >> 21) & 1;
3417 if (insn & (1 << 22)) {
3419 offset = ((insn >> 5) & 3) * 8;
3420 } else if (insn & (1 << 5)) {
3422 offset = (insn & (1 << 6)) ? 16 : 0;
3427 if (insn & ARM_CP_RW_BIT) {
3429 tmp = neon_load_reg(rn, pass);
3433 tcg_gen_shri_i32(tmp, tmp, offset);
3434 if (insn & (1 << 23))
3440 if (insn & (1 << 23)) {
3442 tcg_gen_shri_i32(tmp, tmp, 16);
3448 tcg_gen_sari_i32(tmp, tmp, 16);
3457 store_reg(s, rd, tmp);
3460 tmp = load_reg(s, rd);
3461 if (insn & (1 << 23)) {
3463 int vec_size = pass ? 16 : 8;
3464 tcg_gen_gvec_dup_i32(size, neon_reg_offset(rn, 0),
3465 vec_size, vec_size, tmp);
3466 tcg_temp_free_i32(tmp);
3471 tmp2 = neon_load_reg(rn, pass);
3472 tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
3473 tcg_temp_free_i32(tmp2);
3476 tmp2 = neon_load_reg(rn, pass);
3477 tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
3478 tcg_temp_free_i32(tmp2);
3483 neon_store_reg(rn, pass, tmp);
3487 if ((insn & 0x6f) != 0x00)
3489 rn = VFP_SREG_N(insn);
3490 if (insn & ARM_CP_RW_BIT) {
3492 if (insn & (1 << 21)) {
3493 /* system register */
3498 /* VFP2 allows access to FSID from userspace.
3499 VFP3 restricts all id registers to privileged
3502 && arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3505 tmp = load_cpu_field(vfp.xregs[rn]);
3510 tmp = load_cpu_field(vfp.xregs[rn]);
3512 case ARM_VFP_FPINST:
3513 case ARM_VFP_FPINST2:
3514 /* Not present in VFP3. */
3516 || arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3519 tmp = load_cpu_field(vfp.xregs[rn]);
3523 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
3524 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
3526 tmp = tcg_temp_new_i32();
3527 gen_helper_vfp_get_fpscr(tmp, cpu_env);
3531 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3538 || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
3541 tmp = load_cpu_field(vfp.xregs[rn]);
3547 gen_mov_F0_vreg(0, rn);
3548 tmp = gen_vfp_mrs();
3551 /* Set the 4 flag bits in the CPSR. */
3553 tcg_temp_free_i32(tmp);
3555 store_reg(s, rd, tmp);
3559 if (insn & (1 << 21)) {
3561 /* system register */
3566 /* Writes are ignored. */
3569 tmp = load_reg(s, rd);
3570 gen_helper_vfp_set_fpscr(cpu_env, tmp);
3571 tcg_temp_free_i32(tmp);
3577 /* TODO: VFP subarchitecture support.
3578 * For now, keep the EN bit only */
3579 tmp = load_reg(s, rd);
3580 tcg_gen_andi_i32(tmp, tmp, 1 << 30);
3581 store_cpu_field(tmp, vfp.xregs[rn]);
3584 case ARM_VFP_FPINST:
3585 case ARM_VFP_FPINST2:
3589 tmp = load_reg(s, rd);
3590 store_cpu_field(tmp, vfp.xregs[rn]);
3596 tmp = load_reg(s, rd);
3598 gen_mov_vreg_F0(0, rn);
3603 /* data processing */
3604 /* The opcode is in bits 23, 21, 20 and 6. */
3605 op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
3609 rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
3611 /* rn is register number */
3612 VFP_DREG_N(rn, insn);
3615 if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
3616 ((rn & 0x1e) == 0x6))) {
3617 /* Integer or single/half precision destination. */
3618 rd = VFP_SREG_D(insn);
3620 VFP_DREG_D(rd, insn);
3623 (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
3624 ((rn & 0x1e) == 0x4))) {
3625 /* VCVT from int or half precision is always from S reg
3626 * regardless of dp bit. VCVT with immediate frac_bits
3627 * has same format as SREG_M.
3629 rm = VFP_SREG_M(insn);
3631 VFP_DREG_M(rm, insn);
3634 rn = VFP_SREG_N(insn);
3635 if (op == 15 && rn == 15) {
3636 /* Double precision destination. */
3637 VFP_DREG_D(rd, insn);
3639 rd = VFP_SREG_D(insn);
3641 /* NB that we implicitly rely on the encoding for the frac_bits
3642 * in VCVT of fixed to float being the same as that of an SREG_M
3644 rm = VFP_SREG_M(insn);
3647 veclen = s->vec_len;
3648 if (op == 15 && rn > 3)
3651 /* Shut up compiler warnings. */
3662 /* Figure out what type of vector operation this is. */
3663 if ((rd & bank_mask) == 0) {
3668 delta_d = (s->vec_stride >> 1) + 1;
3670 delta_d = s->vec_stride + 1;
3672 if ((rm & bank_mask) == 0) {
3673 /* mixed scalar/vector */
3682 /* Load the initial operands. */
3687 /* Integer source */
3688 gen_mov_F0_vreg(0, rm);
3693 gen_mov_F0_vreg(dp, rd);
3694 gen_mov_F1_vreg(dp, rm);
3698 /* Compare with zero */
3699 gen_mov_F0_vreg(dp, rd);
3710 /* Source and destination the same. */
3711 gen_mov_F0_vreg(dp, rd);
3717 /* VCVTB, VCVTT: only present with the halfprec extension
3718 * UNPREDICTABLE if bit 8 is set prior to ARMv8
3719 * (we choose to UNDEF)
3721 if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
3722 !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
3725 if (!extract32(rn, 1, 1)) {
3726 /* Half precision source. */
3727 gen_mov_F0_vreg(0, rm);
3730 /* Otherwise fall through */
3732 /* One source operand. */
3733 gen_mov_F0_vreg(dp, rm);
3737 /* Two source operands. */
3738 gen_mov_F0_vreg(dp, rn);
3739 gen_mov_F1_vreg(dp, rm);
3743 /* Perform the calculation. */
3745 case 0: /* VMLA: fd + (fn * fm) */
3746 /* Note that order of inputs to the add matters for NaNs */
3748 gen_mov_F0_vreg(dp, rd);
3751 case 1: /* VMLS: fd + -(fn * fm) */
3754 gen_mov_F0_vreg(dp, rd);
3757 case 2: /* VNMLS: -fd + (fn * fm) */
3758 /* Note that it isn't valid to replace (-A + B) with (B - A)
3759 * or similar plausible looking simplifications
3760 * because this will give wrong results for NaNs.
3763 gen_mov_F0_vreg(dp, rd);
3767 case 3: /* VNMLA: -fd + -(fn * fm) */
3770 gen_mov_F0_vreg(dp, rd);
3774 case 4: /* mul: fn * fm */
3777 case 5: /* nmul: -(fn * fm) */
3781 case 6: /* add: fn + fm */
3784 case 7: /* sub: fn - fm */
3787 case 8: /* div: fn / fm */
3790 case 10: /* VFNMA : fd = muladd(-fd, fn, fm) */
3791 case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
3792 case 12: /* VFMA : fd = muladd( fd, fn, fm) */
3793 case 13: /* VFMS : fd = muladd( fd, -fn, fm) */
3794 /* These are fused multiply-add, and must be done as one
3795 * floating point operation with no rounding between the
3796 * multiplication and addition steps.
3797 * NB that doing the negations here as separate steps is
3798 * correct : an input NaN should come out with its sign bit
3799 * flipped if it is a negated-input.
3801 if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
3809 gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
3811 frd = tcg_temp_new_i64();
3812 tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
3815 gen_helper_vfp_negd(frd, frd);
3817 fpst = get_fpstatus_ptr(0);
3818 gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
3819 cpu_F1d, frd, fpst);
3820 tcg_temp_free_ptr(fpst);
3821 tcg_temp_free_i64(frd);
3827 gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
3829 frd = tcg_temp_new_i32();
3830 tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
3832 gen_helper_vfp_negs(frd, frd);
3834 fpst = get_fpstatus_ptr(0);
3835 gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
3836 cpu_F1s, frd, fpst);
3837 tcg_temp_free_ptr(fpst);
3838 tcg_temp_free_i32(frd);
3841 case 14: /* fconst */
3842 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3846 n = (insn << 12) & 0x80000000;
3847 i = ((insn >> 12) & 0x70) | (insn & 0xf);
3854 tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3861 tcg_gen_movi_i32(cpu_F0s, n);
3864 case 15: /* extension space */
3878 case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
3880 TCGv_ptr fpst = get_fpstatus_ptr(false);
3881 TCGv_i32 ahp_mode = get_ahp_flag();
3882 tmp = gen_vfp_mrs();
3883 tcg_gen_ext16u_i32(tmp, tmp);
3885 gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3888 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3891 tcg_temp_free_i32(ahp_mode);
3892 tcg_temp_free_ptr(fpst);
3893 tcg_temp_free_i32(tmp);
3896 case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
3898 TCGv_ptr fpst = get_fpstatus_ptr(false);
3899 TCGv_i32 ahp = get_ahp_flag();
3900 tmp = gen_vfp_mrs();
3901 tcg_gen_shri_i32(tmp, tmp, 16);
3903 gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3906 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3909 tcg_temp_free_i32(tmp);
3910 tcg_temp_free_i32(ahp);
3911 tcg_temp_free_ptr(fpst);
3914 case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
3916 TCGv_ptr fpst = get_fpstatus_ptr(false);
3917 TCGv_i32 ahp = get_ahp_flag();
3918 tmp = tcg_temp_new_i32();
3921 gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3924 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3927 tcg_temp_free_i32(ahp);
3928 tcg_temp_free_ptr(fpst);
3929 gen_mov_F0_vreg(0, rd);
3930 tmp2 = gen_vfp_mrs();
3931 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
3932 tcg_gen_or_i32(tmp, tmp, tmp2);
3933 tcg_temp_free_i32(tmp2);
3937 case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
3939 TCGv_ptr fpst = get_fpstatus_ptr(false);
3940 TCGv_i32 ahp = get_ahp_flag();
3941 tmp = tcg_temp_new_i32();
3943 gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3946 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3949 tcg_temp_free_i32(ahp);
3950 tcg_temp_free_ptr(fpst);
3951 tcg_gen_shli_i32(tmp, tmp, 16);
3952 gen_mov_F0_vreg(0, rd);
3953 tmp2 = gen_vfp_mrs();
3954 tcg_gen_ext16u_i32(tmp2, tmp2);
3955 tcg_gen_or_i32(tmp, tmp, tmp2);
3956 tcg_temp_free_i32(tmp2);
3969 case 11: /* cmpez */
3973 case 12: /* vrintr */
3975 TCGv_ptr fpst = get_fpstatus_ptr(0);
3977 gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3979 gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3981 tcg_temp_free_ptr(fpst);
3984 case 13: /* vrintz */
3986 TCGv_ptr fpst = get_fpstatus_ptr(0);
3988 tcg_rmode = tcg_const_i32(float_round_to_zero);
3989 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3991 gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3993 gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3995 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3996 tcg_temp_free_i32(tcg_rmode);
3997 tcg_temp_free_ptr(fpst);
4000 case 14: /* vrintx */
4002 TCGv_ptr fpst = get_fpstatus_ptr(0);
4004 gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
4006 gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
4008 tcg_temp_free_ptr(fpst);
4011 case 15: /* single<->double conversion */
4013 gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
4015 gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
4017 case 16: /* fuito */
4018 gen_vfp_uito(dp, 0);
4020 case 17: /* fsito */
4021 gen_vfp_sito(dp, 0);
4023 case 20: /* fshto */
4024 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4027 gen_vfp_shto(dp, 16 - rm, 0);
4029 case 21: /* fslto */
4030 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4033 gen_vfp_slto(dp, 32 - rm, 0);
4035 case 22: /* fuhto */
4036 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4039 gen_vfp_uhto(dp, 16 - rm, 0);
4041 case 23: /* fulto */
4042 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4045 gen_vfp_ulto(dp, 32 - rm, 0);
4047 case 24: /* ftoui */
4048 gen_vfp_toui(dp, 0);
4050 case 25: /* ftouiz */
4051 gen_vfp_touiz(dp, 0);
4053 case 26: /* ftosi */
4054 gen_vfp_tosi(dp, 0);
4056 case 27: /* ftosiz */
4057 gen_vfp_tosiz(dp, 0);
4059 case 28: /* ftosh */
4060 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4063 gen_vfp_tosh(dp, 16 - rm, 0);
4065 case 29: /* ftosl */
4066 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4069 gen_vfp_tosl(dp, 32 - rm, 0);
4071 case 30: /* ftouh */
4072 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4075 gen_vfp_touh(dp, 16 - rm, 0);
4077 case 31: /* ftoul */
4078 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4081 gen_vfp_toul(dp, 32 - rm, 0);
4083 default: /* undefined */
4087 default: /* undefined */
4091 /* Write back the result. */
4092 if (op == 15 && (rn >= 8 && rn <= 11)) {
4093 /* Comparison, do nothing. */
4094 } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
4095 (rn & 0x1e) == 0x6)) {
4096 /* VCVT double to int: always integer result.
4097 * VCVT double to half precision is always a single
4100 gen_mov_vreg_F0(0, rd);
4101 } else if (op == 15 && rn == 15) {
4103 gen_mov_vreg_F0(!dp, rd);
4105 gen_mov_vreg_F0(dp, rd);
4108 /* break out of the loop if we have finished */
4112 if (op == 15 && delta_m == 0) {
4113 /* single source one-many */
4115 rd = ((rd + delta_d) & (bank_mask - 1))
4117 gen_mov_vreg_F0(dp, rd);
4121 /* Setup the next operands. */
4123 rd = ((rd + delta_d) & (bank_mask - 1))
4127 /* One source operand. */
4128 rm = ((rm + delta_m) & (bank_mask - 1))
4130 gen_mov_F0_vreg(dp, rm);
4132 /* Two source operands. */
4133 rn = ((rn + delta_d) & (bank_mask - 1))
4135 gen_mov_F0_vreg(dp, rn);
4137 rm = ((rm + delta_m) & (bank_mask - 1))
4139 gen_mov_F1_vreg(dp, rm);
4147 if ((insn & 0x03e00000) == 0x00400000) {
4148 /* two-register transfer */
4149 rn = (insn >> 16) & 0xf;
4150 rd = (insn >> 12) & 0xf;
4152 VFP_DREG_M(rm, insn);
4154 rm = VFP_SREG_M(insn);
4157 if (insn & ARM_CP_RW_BIT) {
4160 gen_mov_F0_vreg(0, rm * 2);
4161 tmp = gen_vfp_mrs();
4162 store_reg(s, rd, tmp);
4163 gen_mov_F0_vreg(0, rm * 2 + 1);
4164 tmp = gen_vfp_mrs();
4165 store_reg(s, rn, tmp);
4167 gen_mov_F0_vreg(0, rm);
4168 tmp = gen_vfp_mrs();
4169 store_reg(s, rd, tmp);
4170 gen_mov_F0_vreg(0, rm + 1);
4171 tmp = gen_vfp_mrs();
4172 store_reg(s, rn, tmp);
4177 tmp = load_reg(s, rd);
4179 gen_mov_vreg_F0(0, rm * 2);
4180 tmp = load_reg(s, rn);
4182 gen_mov_vreg_F0(0, rm * 2 + 1);
4184 tmp = load_reg(s, rd);
4186 gen_mov_vreg_F0(0, rm);
4187 tmp = load_reg(s, rn);
4189 gen_mov_vreg_F0(0, rm + 1);
4194 rn = (insn >> 16) & 0xf;
4196 VFP_DREG_D(rd, insn);
4198 rd = VFP_SREG_D(insn);
4199 if ((insn & 0x01200000) == 0x01000000) {
4200 /* Single load/store */
4201 offset = (insn & 0xff) << 2;
4202 if ((insn & (1 << 23)) == 0)
4204 if (s->thumb && rn == 15) {
4205 /* This is actually UNPREDICTABLE */
4206 addr = tcg_temp_new_i32();
4207 tcg_gen_movi_i32(addr, s->pc & ~2);
4209 addr = load_reg(s, rn);
4211 tcg_gen_addi_i32(addr, addr, offset);
4212 if (insn & (1 << 20)) {
4213 gen_vfp_ld(s, dp, addr);
4214 gen_mov_vreg_F0(dp, rd);
4216 gen_mov_F0_vreg(dp, rd);
4217 gen_vfp_st(s, dp, addr);
4219 tcg_temp_free_i32(addr);
4221 /* load/store multiple */
4222 int w = insn & (1 << 21);
4224 n = (insn >> 1) & 0x7f;
4228 if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
4229 /* P == U , W == 1 => UNDEF */
4232 if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
4233 /* UNPREDICTABLE cases for bad immediates: we choose to
4234 * UNDEF to avoid generating huge numbers of TCG ops
4238 if (rn == 15 && w) {
4239 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
4243 if (s->thumb && rn == 15) {
4244 /* This is actually UNPREDICTABLE */
4245 addr = tcg_temp_new_i32();
4246 tcg_gen_movi_i32(addr, s->pc & ~2);
4248 addr = load_reg(s, rn);
4250 if (insn & (1 << 24)) /* pre-decrement */
4251 tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
4253 if (s->v8m_stackcheck && rn == 13 && w) {
4255 * Here 'addr' is the lowest address we will store to,
4256 * and is either the old SP (if post-increment) or
4257 * the new SP (if pre-decrement). For post-increment
4258 * where the old value is below the limit and the new
4259 * value is above, it is UNKNOWN whether the limit check
4260 * triggers; we choose to trigger.
4262 gen_helper_v8m_stackcheck(cpu_env, addr);
4269 for (i = 0; i < n; i++) {
4270 if (insn & ARM_CP_RW_BIT) {
4272 gen_vfp_ld(s, dp, addr);
4273 gen_mov_vreg_F0(dp, rd + i);
4276 gen_mov_F0_vreg(dp, rd + i);
4277 gen_vfp_st(s, dp, addr);
4279 tcg_gen_addi_i32(addr, addr, offset);
4283 if (insn & (1 << 24))
4284 offset = -offset * n;
4285 else if (dp && (insn & 1))
4291 tcg_gen_addi_i32(addr, addr, offset);
4292 store_reg(s, rn, addr);
4294 tcg_temp_free_i32(addr);
4300 /* Should never happen. */
4306 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
4308 #ifndef CONFIG_USER_ONLY
4309 return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
4310 ((s->pc - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
4316 static void gen_goto_ptr(void)
4318 tcg_gen_lookup_and_goto_ptr();
4321 /* This will end the TB but doesn't guarantee we'll return to
4322 * cpu_loop_exec. Any live exit_requests will be processed as we
4323 * enter the next TB.
4325 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
4327 if (use_goto_tb(s, dest)) {
4329 gen_set_pc_im(s, dest);
4330 tcg_gen_exit_tb(s->base.tb, n);
4332 gen_set_pc_im(s, dest);
4335 s->base.is_jmp = DISAS_NORETURN;
4338 static inline void gen_jmp (DisasContext *s, uint32_t dest)
4340 if (unlikely(is_singlestepping(s))) {
4341 /* An indirect jump so that we still trigger the debug exception. */
4346 gen_goto_tb(s, 0, dest);
4350 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
4353 tcg_gen_sari_i32(t0, t0, 16);
4357 tcg_gen_sari_i32(t1, t1, 16);
4360 tcg_gen_mul_i32(t0, t0, t1);
4363 /* Return the mask of PSR bits set by a MSR instruction. */
4364 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
4369 if (flags & (1 << 0))
4371 if (flags & (1 << 1))
4373 if (flags & (1 << 2))
4375 if (flags & (1 << 3))
4378 /* Mask out undefined bits. */
4379 mask &= ~CPSR_RESERVED;
4380 if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
4383 if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
4384 mask &= ~CPSR_Q; /* V5TE in reality*/
4386 if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
4387 mask &= ~(CPSR_E | CPSR_GE);
4389 if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
4392 /* Mask out execution state and reserved bits. */
4394 mask &= ~(CPSR_EXEC | CPSR_RESERVED);
4396 /* Mask out privileged bits. */
4402 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
4403 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
4407 /* ??? This is also undefined in system mode. */
4411 tmp = load_cpu_field(spsr);
4412 tcg_gen_andi_i32(tmp, tmp, ~mask);
4413 tcg_gen_andi_i32(t0, t0, mask);
4414 tcg_gen_or_i32(tmp, tmp, t0);
4415 store_cpu_field(tmp, spsr);
4417 gen_set_cpsr(t0, mask);
4419 tcg_temp_free_i32(t0);
4424 /* Returns nonzero if access to the PSR is not permitted. */
4425 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
4428 tmp = tcg_temp_new_i32();
4429 tcg_gen_movi_i32(tmp, val);
4430 return gen_set_psr(s, mask, spsr, tmp);
4433 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
4434 int *tgtmode, int *regno)
4436 /* Decode the r and sysm fields of MSR/MRS banked accesses into
4437 * the target mode and register number, and identify the various
4438 * unpredictable cases.
4439 * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
4440 * + executed in user mode
4441 * + using R15 as the src/dest register
4442 * + accessing an unimplemented register
4443 * + accessing a register that's inaccessible at current PL/security state*
4444 * + accessing a register that you could access with a different insn
4445 * We choose to UNDEF in all these cases.
4446 * Since we don't know which of the various AArch32 modes we are in
4447 * we have to defer some checks to runtime.
4448 * Accesses to Monitor mode registers from Secure EL1 (which implies
4449 * that EL3 is AArch64) must trap to EL3.
4451 * If the access checks fail this function will emit code to take
4452 * an exception and return false. Otherwise it will return true,
4453 * and set *tgtmode and *regno appropriately.
4455 int exc_target = default_exception_el(s);
4457 /* These instructions are present only in ARMv8, or in ARMv7 with the
4458 * Virtualization Extensions.
4460 if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
4461 !arm_dc_feature(s, ARM_FEATURE_EL2)) {
4465 if (IS_USER(s) || rn == 15) {
4469 /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
4470 * of registers into (r, sysm).
4473 /* SPSRs for other modes */
4475 case 0xe: /* SPSR_fiq */
4476 *tgtmode = ARM_CPU_MODE_FIQ;
4478 case 0x10: /* SPSR_irq */
4479 *tgtmode = ARM_CPU_MODE_IRQ;
4481 case 0x12: /* SPSR_svc */
4482 *tgtmode = ARM_CPU_MODE_SVC;
4484 case 0x14: /* SPSR_abt */
4485 *tgtmode = ARM_CPU_MODE_ABT;
4487 case 0x16: /* SPSR_und */
4488 *tgtmode = ARM_CPU_MODE_UND;
4490 case 0x1c: /* SPSR_mon */
4491 *tgtmode = ARM_CPU_MODE_MON;
4493 case 0x1e: /* SPSR_hyp */
4494 *tgtmode = ARM_CPU_MODE_HYP;
4496 default: /* unallocated */
4499 /* We arbitrarily assign SPSR a register number of 16. */
4502 /* general purpose registers for other modes */
4504 case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
4505 *tgtmode = ARM_CPU_MODE_USR;
4508 case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
4509 *tgtmode = ARM_CPU_MODE_FIQ;
4512 case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
4513 *tgtmode = ARM_CPU_MODE_IRQ;
4514 *regno = sysm & 1 ? 13 : 14;
4516 case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
4517 *tgtmode = ARM_CPU_MODE_SVC;
4518 *regno = sysm & 1 ? 13 : 14;
4520 case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
4521 *tgtmode = ARM_CPU_MODE_ABT;
4522 *regno = sysm & 1 ? 13 : 14;
4524 case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
4525 *tgtmode = ARM_CPU_MODE_UND;
4526 *regno = sysm & 1 ? 13 : 14;
4528 case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
4529 *tgtmode = ARM_CPU_MODE_MON;
4530 *regno = sysm & 1 ? 13 : 14;
4532 case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
4533 *tgtmode = ARM_CPU_MODE_HYP;
4534 /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
4535 *regno = sysm & 1 ? 13 : 17;
4537 default: /* unallocated */
4542 /* Catch the 'accessing inaccessible register' cases we can detect
4543 * at translate time.
4546 case ARM_CPU_MODE_MON:
4547 if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
4550 if (s->current_el == 1) {
4551 /* If we're in Secure EL1 (which implies that EL3 is AArch64)
4552 * then accesses to Mon registers trap to EL3
4558 case ARM_CPU_MODE_HYP:
4560 * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
4561 * (and so we can forbid accesses from EL2 or below). elr_hyp
4562 * can be accessed also from Hyp mode, so forbid accesses from
4565 if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
4566 (s->current_el < 3 && *regno != 17)) {
4577 /* If we get here then some access check did not pass */
4578 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), exc_target);
4582 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
4584 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
4585 int tgtmode = 0, regno = 0;
4587 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) {
4591 /* Sync state because msr_banked() can raise exceptions */
4592 gen_set_condexec(s);
4593 gen_set_pc_im(s, s->pc - 4);
4594 tcg_reg = load_reg(s, rn);
4595 tcg_tgtmode = tcg_const_i32(tgtmode);
4596 tcg_regno = tcg_const_i32(regno);
4597 gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
4598 tcg_temp_free_i32(tcg_tgtmode);
4599 tcg_temp_free_i32(tcg_regno);
4600 tcg_temp_free_i32(tcg_reg);
4601 s->base.is_jmp = DISAS_UPDATE;
4604 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
4606 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
4607 int tgtmode = 0, regno = 0;
4609 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) {
4613 /* Sync state because mrs_banked() can raise exceptions */
4614 gen_set_condexec(s);
4615 gen_set_pc_im(s, s->pc - 4);
4616 tcg_reg = tcg_temp_new_i32();
4617 tcg_tgtmode = tcg_const_i32(tgtmode);
4618 tcg_regno = tcg_const_i32(regno);
4619 gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
4620 tcg_temp_free_i32(tcg_tgtmode);
4621 tcg_temp_free_i32(tcg_regno);
4622 store_reg(s, rn, tcg_reg);
4623 s->base.is_jmp = DISAS_UPDATE;
4626 /* Store value to PC as for an exception return (ie don't
4627 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
4628 * will do the masking based on the new value of the Thumb bit.
4630 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
4632 tcg_gen_mov_i32(cpu_R[15], pc);
4633 tcg_temp_free_i32(pc);
4636 /* Generate a v6 exception return. Marks both values as dead. */
4637 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
4639 store_pc_exc_ret(s, pc);
4640 /* The cpsr_write_eret helper will mask the low bits of PC
4641 * appropriately depending on the new Thumb bit, so it must
4642 * be called after storing the new PC.
4644 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
4647 gen_helper_cpsr_write_eret(cpu_env, cpsr);
4648 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
4651 tcg_temp_free_i32(cpsr);
4652 /* Must exit loop to check un-masked IRQs */
4653 s->base.is_jmp = DISAS_EXIT;
4656 /* Generate an old-style exception return. Marks pc as dead. */
4657 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
4659 gen_rfe(s, pc, load_cpu_field(spsr));
4663 * For WFI we will halt the vCPU until an IRQ. For WFE and YIELD we
4664 * only call the helper when running single threaded TCG code to ensure
4665 * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
4666 * just skip this instruction. Currently the SEV/SEVL instructions
4667 * which are *one* of many ways to wake the CPU from WFE are not
4668 * implemented so we can't sleep like WFI does.
4670 static void gen_nop_hint(DisasContext *s, int val)
4673 /* When running in MTTCG we don't generate jumps to the yield and
4674 * WFE helpers as it won't affect the scheduling of other vCPUs.
4675 * If we wanted to more completely model WFE/SEV so we don't busy
4676 * spin unnecessarily we would need to do something more involved.
4679 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
4680 gen_set_pc_im(s, s->pc);
4681 s->base.is_jmp = DISAS_YIELD;
4685 gen_set_pc_im(s, s->pc);
4686 s->base.is_jmp = DISAS_WFI;
4689 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
4690 gen_set_pc_im(s, s->pc);
4691 s->base.is_jmp = DISAS_WFE;
4696 /* TODO: Implement SEV, SEVL and WFE. May help SMP performance. */
4702 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
4704 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
4707 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
4708 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
4709 case 2: tcg_gen_add_i32(t0, t0, t1); break;
4714 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
4717 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
4718 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
4719 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
4724 /* 32-bit pairwise ops end up the same as the elementwise versions. */
4725 #define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32
4726 #define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32
4727 #define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32
4728 #define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32
4730 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
4731 switch ((size << 1) | u) { \
4733 gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
4736 gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
4739 gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
4742 gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
4745 gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
4748 gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
4750 default: return 1; \
4753 #define GEN_NEON_INTEGER_OP(name) do { \
4754 switch ((size << 1) | u) { \
4756 gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
4759 gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
4762 gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
4765 gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
4768 gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
4771 gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
4773 default: return 1; \
4776 static TCGv_i32 neon_load_scratch(int scratch)
4778 TCGv_i32 tmp = tcg_temp_new_i32();
4779 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4783 static void neon_store_scratch(int scratch, TCGv_i32 var)
4785 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4786 tcg_temp_free_i32(var);
4789 static inline TCGv_i32 neon_get_scalar(int size, int reg)
4793 tmp = neon_load_reg(reg & 7, reg >> 4);
4795 gen_neon_dup_high16(tmp);
4797 gen_neon_dup_low16(tmp);
4800 tmp = neon_load_reg(reg & 15, reg >> 4);
4805 static int gen_neon_unzip(int rd, int rm, int size, int q)
4809 if (!q && size == 2) {
4812 pd = vfp_reg_ptr(true, rd);
4813 pm = vfp_reg_ptr(true, rm);
4817 gen_helper_neon_qunzip8(pd, pm);
4820 gen_helper_neon_qunzip16(pd, pm);
4823 gen_helper_neon_qunzip32(pd, pm);
4831 gen_helper_neon_unzip8(pd, pm);
4834 gen_helper_neon_unzip16(pd, pm);
4840 tcg_temp_free_ptr(pd);
4841 tcg_temp_free_ptr(pm);
4845 static int gen_neon_zip(int rd, int rm, int size, int q)
4849 if (!q && size == 2) {
4852 pd = vfp_reg_ptr(true, rd);
4853 pm = vfp_reg_ptr(true, rm);
4857 gen_helper_neon_qzip8(pd, pm);
4860 gen_helper_neon_qzip16(pd, pm);
4863 gen_helper_neon_qzip32(pd, pm);
4871 gen_helper_neon_zip8(pd, pm);
4874 gen_helper_neon_zip16(pd, pm);
4880 tcg_temp_free_ptr(pd);
4881 tcg_temp_free_ptr(pm);
4885 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
4889 rd = tcg_temp_new_i32();
4890 tmp = tcg_temp_new_i32();
4892 tcg_gen_shli_i32(rd, t0, 8);
4893 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
4894 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
4895 tcg_gen_or_i32(rd, rd, tmp);
4897 tcg_gen_shri_i32(t1, t1, 8);
4898 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
4899 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
4900 tcg_gen_or_i32(t1, t1, tmp);
4901 tcg_gen_mov_i32(t0, rd);
4903 tcg_temp_free_i32(tmp);
4904 tcg_temp_free_i32(rd);
4907 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
4911 rd = tcg_temp_new_i32();
4912 tmp = tcg_temp_new_i32();
4914 tcg_gen_shli_i32(rd, t0, 16);
4915 tcg_gen_andi_i32(tmp, t1, 0xffff);
4916 tcg_gen_or_i32(rd, rd, tmp);
4917 tcg_gen_shri_i32(t1, t1, 16);
4918 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
4919 tcg_gen_or_i32(t1, t1, tmp);
4920 tcg_gen_mov_i32(t0, rd);
4922 tcg_temp_free_i32(tmp);
4923 tcg_temp_free_i32(rd);
4931 } const neon_ls_element_type[11] = {
4945 /* Translate a NEON load/store element instruction. Return nonzero if the
4946 instruction is invalid. */
4947 static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
4969 /* FIXME: this access check should not take precedence over UNDEF
4970 * for invalid encodings; we will generate incorrect syndrome information
4971 * for attempts to execute invalid vfp/neon encodings with FP disabled.
4973 if (s->fp_excp_el) {
4974 gen_exception_insn(s, 4, EXCP_UDEF,
4975 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
4979 if (!s->vfp_enabled)
4981 VFP_DREG_D(rd, insn);
4982 rn = (insn >> 16) & 0xf;
4984 load = (insn & (1 << 21)) != 0;
4985 endian = s->be_data;
4986 mmu_idx = get_mem_index(s);
4987 if ((insn & (1 << 23)) == 0) {
4988 /* Load store all elements. */
4989 op = (insn >> 8) & 0xf;
4990 size = (insn >> 6) & 3;
4993 /* Catch UNDEF cases for bad values of align field */
4996 if (((insn >> 5) & 1) == 1) {
5001 if (((insn >> 4) & 3) == 3) {
5008 nregs = neon_ls_element_type[op].nregs;
5009 interleave = neon_ls_element_type[op].interleave;
5010 spacing = neon_ls_element_type[op].spacing;
5011 if (size == 3 && (interleave | spacing) != 1) {
5014 tmp64 = tcg_temp_new_i64();
5015 addr = tcg_temp_new_i32();
5016 tmp2 = tcg_const_i32(1 << size);
5017 load_reg_var(s, addr, rn);
5018 for (reg = 0; reg < nregs; reg++) {
5019 for (n = 0; n < 8 >> size; n++) {
5021 for (xs = 0; xs < interleave; xs++) {
5022 int tt = rd + reg + spacing * xs;
5025 gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian | size);
5026 neon_store_element64(tt, n, size, tmp64);
5028 neon_load_element64(tmp64, tt, n, size);
5029 gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian | size);
5031 tcg_gen_add_i32(addr, addr, tmp2);
5035 tcg_temp_free_i32(addr);
5036 tcg_temp_free_i32(tmp2);
5037 tcg_temp_free_i64(tmp64);
5038 stride = nregs * interleave * 8;
5040 size = (insn >> 10) & 3;
5042 /* Load single element to all lanes. */
5043 int a = (insn >> 4) & 1;
5047 size = (insn >> 6) & 3;
5048 nregs = ((insn >> 8) & 3) + 1;
5051 if (nregs != 4 || a == 0) {
5054 /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
5057 if (nregs == 1 && a == 1 && size == 0) {
5060 if (nregs == 3 && a == 1) {
5063 addr = tcg_temp_new_i32();
5064 load_reg_var(s, addr, rn);
5066 /* VLD1 to all lanes: bit 5 indicates how many Dregs to write.
5067 * VLD2/3/4 to all lanes: bit 5 indicates register stride.
5069 stride = (insn & (1 << 5)) ? 2 : 1;
5070 vec_size = nregs == 1 ? stride * 8 : 8;
5072 tmp = tcg_temp_new_i32();
5073 for (reg = 0; reg < nregs; reg++) {
5074 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
5076 if ((rd & 1) && vec_size == 16) {
5077 /* We cannot write 16 bytes at once because the
5078 * destination is unaligned.
5080 tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
5082 tcg_gen_gvec_mov(0, neon_reg_offset(rd + 1, 0),
5083 neon_reg_offset(rd, 0), 8, 8);
5085 tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, 0),
5086 vec_size, vec_size, tmp);
5088 tcg_gen_addi_i32(addr, addr, 1 << size);
5091 tcg_temp_free_i32(tmp);
5092 tcg_temp_free_i32(addr);
5093 stride = (1 << size) * nregs;
5095 /* Single element. */
5096 int idx = (insn >> 4) & 0xf;
5097 pass = (insn >> 7) & 1;
5100 shift = ((insn >> 5) & 3) * 8;
5104 shift = ((insn >> 6) & 1) * 16;
5105 stride = (insn & (1 << 5)) ? 2 : 1;
5109 stride = (insn & (1 << 6)) ? 2 : 1;
5114 nregs = ((insn >> 8) & 3) + 1;
5115 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
5118 if (((idx & (1 << size)) != 0) ||
5119 (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
5124 if ((idx & 1) != 0) {
5129 if (size == 2 && (idx & 2) != 0) {
5134 if ((size == 2) && ((idx & 3) == 3)) {
5141 if ((rd + stride * (nregs - 1)) > 31) {
5142 /* Attempts to write off the end of the register file
5143 * are UNPREDICTABLE; we choose to UNDEF because otherwise
5144 * the neon_load_reg() would write off the end of the array.
5148 addr = tcg_temp_new_i32();
5149 load_reg_var(s, addr, rn);
5150 for (reg = 0; reg < nregs; reg++) {
5152 tmp = tcg_temp_new_i32();
5155 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
5158 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
5161 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
5163 default: /* Avoid compiler warnings. */
5167 tmp2 = neon_load_reg(rd, pass);
5168 tcg_gen_deposit_i32(tmp, tmp2, tmp,
5169 shift, size ? 16 : 8);
5170 tcg_temp_free_i32(tmp2);
5172 neon_store_reg(rd, pass, tmp);
5173 } else { /* Store */
5174 tmp = neon_load_reg(rd, pass);
5176 tcg_gen_shri_i32(tmp, tmp, shift);
5179 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
5182 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
5185 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5188 tcg_temp_free_i32(tmp);
5191 tcg_gen_addi_i32(addr, addr, 1 << size);
5193 tcg_temp_free_i32(addr);
5194 stride = nregs * (1 << size);
5200 base = load_reg(s, rn);
5202 tcg_gen_addi_i32(base, base, stride);
5205 index = load_reg(s, rm);
5206 tcg_gen_add_i32(base, base, index);
5207 tcg_temp_free_i32(index);
5209 store_reg(s, rn, base);
5214 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
5217 case 0: gen_helper_neon_narrow_u8(dest, src); break;
5218 case 1: gen_helper_neon_narrow_u16(dest, src); break;
5219 case 2: tcg_gen_extrl_i64_i32(dest, src); break;
5224 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
5227 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
5228 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
5229 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
5234 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
5237 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
5238 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
5239 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
5244 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
5247 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
5248 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
5249 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
5254 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
5260 case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
5261 case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
5266 case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
5267 case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
5274 case 1: gen_helper_neon_shl_u16(var, var, shift); break;
5275 case 2: gen_helper_neon_shl_u32(var, var, shift); break;
5280 case 1: gen_helper_neon_shl_s16(var, var, shift); break;
5281 case 2: gen_helper_neon_shl_s32(var, var, shift); break;
5288 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
5292 case 0: gen_helper_neon_widen_u8(dest, src); break;
5293 case 1: gen_helper_neon_widen_u16(dest, src); break;
5294 case 2: tcg_gen_extu_i32_i64(dest, src); break;
5299 case 0: gen_helper_neon_widen_s8(dest, src); break;
5300 case 1: gen_helper_neon_widen_s16(dest, src); break;
5301 case 2: tcg_gen_ext_i32_i64(dest, src); break;
5305 tcg_temp_free_i32(src);
5308 static inline void gen_neon_addl(int size)
5311 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
5312 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
5313 case 2: tcg_gen_add_i64(CPU_V001); break;
5318 static inline void gen_neon_subl(int size)
5321 case 0: gen_helper_neon_subl_u16(CPU_V001); break;
5322 case 1: gen_helper_neon_subl_u32(CPU_V001); break;
5323 case 2: tcg_gen_sub_i64(CPU_V001); break;
5328 static inline void gen_neon_negl(TCGv_i64 var, int size)
5331 case 0: gen_helper_neon_negl_u16(var, var); break;
5332 case 1: gen_helper_neon_negl_u32(var, var); break;
5334 tcg_gen_neg_i64(var, var);
5340 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
5343 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
5344 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
5349 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
5354 switch ((size << 1) | u) {
5355 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
5356 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
5357 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
5358 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
5360 tmp = gen_muls_i64_i32(a, b);
5361 tcg_gen_mov_i64(dest, tmp);
5362 tcg_temp_free_i64(tmp);
5365 tmp = gen_mulu_i64_i32(a, b);
5366 tcg_gen_mov_i64(dest, tmp);
5367 tcg_temp_free_i64(tmp);
5372 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
5373 Don't forget to clean them now. */
5375 tcg_temp_free_i32(a);
5376 tcg_temp_free_i32(b);
5380 static void gen_neon_narrow_op(int op, int u, int size,
5381 TCGv_i32 dest, TCGv_i64 src)
5385 gen_neon_unarrow_sats(size, dest, src);
5387 gen_neon_narrow(size, dest, src);
5391 gen_neon_narrow_satu(size, dest, src);
5393 gen_neon_narrow_sats(size, dest, src);
5398 /* Symbolic constants for op fields for Neon 3-register same-length.
5399 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
5402 #define NEON_3R_VHADD 0
5403 #define NEON_3R_VQADD 1
5404 #define NEON_3R_VRHADD 2
5405 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
5406 #define NEON_3R_VHSUB 4
5407 #define NEON_3R_VQSUB 5
5408 #define NEON_3R_VCGT 6
5409 #define NEON_3R_VCGE 7
5410 #define NEON_3R_VSHL 8
5411 #define NEON_3R_VQSHL 9
5412 #define NEON_3R_VRSHL 10
5413 #define NEON_3R_VQRSHL 11
5414 #define NEON_3R_VMAX 12
5415 #define NEON_3R_VMIN 13
5416 #define NEON_3R_VABD 14
5417 #define NEON_3R_VABA 15
5418 #define NEON_3R_VADD_VSUB 16
5419 #define NEON_3R_VTST_VCEQ 17
5420 #define NEON_3R_VML 18 /* VMLA, VMLS */
5421 #define NEON_3R_VMUL 19
5422 #define NEON_3R_VPMAX 20
5423 #define NEON_3R_VPMIN 21
5424 #define NEON_3R_VQDMULH_VQRDMULH 22
5425 #define NEON_3R_VPADD_VQRDMLAH 23
5426 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
5427 #define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
5428 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
5429 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
5430 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
5431 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
5432 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
5433 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
5435 static const uint8_t neon_3r_sizes[] = {
5436 [NEON_3R_VHADD] = 0x7,
5437 [NEON_3R_VQADD] = 0xf,
5438 [NEON_3R_VRHADD] = 0x7,
5439 [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
5440 [NEON_3R_VHSUB] = 0x7,
5441 [NEON_3R_VQSUB] = 0xf,
5442 [NEON_3R_VCGT] = 0x7,
5443 [NEON_3R_VCGE] = 0x7,
5444 [NEON_3R_VSHL] = 0xf,
5445 [NEON_3R_VQSHL] = 0xf,
5446 [NEON_3R_VRSHL] = 0xf,
5447 [NEON_3R_VQRSHL] = 0xf,
5448 [NEON_3R_VMAX] = 0x7,
5449 [NEON_3R_VMIN] = 0x7,
5450 [NEON_3R_VABD] = 0x7,
5451 [NEON_3R_VABA] = 0x7,
5452 [NEON_3R_VADD_VSUB] = 0xf,
5453 [NEON_3R_VTST_VCEQ] = 0x7,
5454 [NEON_3R_VML] = 0x7,
5455 [NEON_3R_VMUL] = 0x7,
5456 [NEON_3R_VPMAX] = 0x7,
5457 [NEON_3R_VPMIN] = 0x7,
5458 [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
5459 [NEON_3R_VPADD_VQRDMLAH] = 0x7,
5460 [NEON_3R_SHA] = 0xf, /* size field encodes op type */
5461 [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
5462 [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
5463 [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
5464 [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
5465 [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
5466 [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
5467 [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
5470 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
5471 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
5474 #define NEON_2RM_VREV64 0
5475 #define NEON_2RM_VREV32 1
5476 #define NEON_2RM_VREV16 2
5477 #define NEON_2RM_VPADDL 4
5478 #define NEON_2RM_VPADDL_U 5
5479 #define NEON_2RM_AESE 6 /* Includes AESD */
5480 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
5481 #define NEON_2RM_VCLS 8
5482 #define NEON_2RM_VCLZ 9
5483 #define NEON_2RM_VCNT 10
5484 #define NEON_2RM_VMVN 11
5485 #define NEON_2RM_VPADAL 12
5486 #define NEON_2RM_VPADAL_U 13
5487 #define NEON_2RM_VQABS 14
5488 #define NEON_2RM_VQNEG 15
5489 #define NEON_2RM_VCGT0 16
5490 #define NEON_2RM_VCGE0 17
5491 #define NEON_2RM_VCEQ0 18
5492 #define NEON_2RM_VCLE0 19
5493 #define NEON_2RM_VCLT0 20
5494 #define NEON_2RM_SHA1H 21
5495 #define NEON_2RM_VABS 22
5496 #define NEON_2RM_VNEG 23
5497 #define NEON_2RM_VCGT0_F 24
5498 #define NEON_2RM_VCGE0_F 25
5499 #define NEON_2RM_VCEQ0_F 26
5500 #define NEON_2RM_VCLE0_F 27
5501 #define NEON_2RM_VCLT0_F 28
5502 #define NEON_2RM_VABS_F 30
5503 #define NEON_2RM_VNEG_F 31
5504 #define NEON_2RM_VSWP 32
5505 #define NEON_2RM_VTRN 33
5506 #define NEON_2RM_VUZP 34
5507 #define NEON_2RM_VZIP 35
5508 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
5509 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
5510 #define NEON_2RM_VSHLL 38
5511 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
5512 #define NEON_2RM_VRINTN 40
5513 #define NEON_2RM_VRINTX 41
5514 #define NEON_2RM_VRINTA 42
5515 #define NEON_2RM_VRINTZ 43
5516 #define NEON_2RM_VCVT_F16_F32 44
5517 #define NEON_2RM_VRINTM 45
5518 #define NEON_2RM_VCVT_F32_F16 46
5519 #define NEON_2RM_VRINTP 47
5520 #define NEON_2RM_VCVTAU 48
5521 #define NEON_2RM_VCVTAS 49
5522 #define NEON_2RM_VCVTNU 50
5523 #define NEON_2RM_VCVTNS 51
5524 #define NEON_2RM_VCVTPU 52
5525 #define NEON_2RM_VCVTPS 53
5526 #define NEON_2RM_VCVTMU 54
5527 #define NEON_2RM_VCVTMS 55
5528 #define NEON_2RM_VRECPE 56
5529 #define NEON_2RM_VRSQRTE 57
5530 #define NEON_2RM_VRECPE_F 58
5531 #define NEON_2RM_VRSQRTE_F 59
5532 #define NEON_2RM_VCVT_FS 60
5533 #define NEON_2RM_VCVT_FU 61
5534 #define NEON_2RM_VCVT_SF 62
5535 #define NEON_2RM_VCVT_UF 63
5537 static int neon_2rm_is_float_op(int op)
5539 /* Return true if this neon 2reg-misc op is float-to-float */
5540 return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
5541 (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
5542 op == NEON_2RM_VRINTM ||
5543 (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
5544 op >= NEON_2RM_VRECPE_F);
5547 static bool neon_2rm_is_v8_op(int op)
5549 /* Return true if this neon 2reg-misc op is ARMv8 and up */
5551 case NEON_2RM_VRINTN:
5552 case NEON_2RM_VRINTA:
5553 case NEON_2RM_VRINTM:
5554 case NEON_2RM_VRINTP:
5555 case NEON_2RM_VRINTZ:
5556 case NEON_2RM_VRINTX:
5557 case NEON_2RM_VCVTAU:
5558 case NEON_2RM_VCVTAS:
5559 case NEON_2RM_VCVTNU:
5560 case NEON_2RM_VCVTNS:
5561 case NEON_2RM_VCVTPU:
5562 case NEON_2RM_VCVTPS:
5563 case NEON_2RM_VCVTMU:
5564 case NEON_2RM_VCVTMS:
5571 /* Each entry in this array has bit n set if the insn allows
5572 * size value n (otherwise it will UNDEF). Since unallocated
5573 * op values will have no bits set they always UNDEF.
5575 static const uint8_t neon_2rm_sizes[] = {
5576 [NEON_2RM_VREV64] = 0x7,
5577 [NEON_2RM_VREV32] = 0x3,
5578 [NEON_2RM_VREV16] = 0x1,
5579 [NEON_2RM_VPADDL] = 0x7,
5580 [NEON_2RM_VPADDL_U] = 0x7,
5581 [NEON_2RM_AESE] = 0x1,
5582 [NEON_2RM_AESMC] = 0x1,
5583 [NEON_2RM_VCLS] = 0x7,
5584 [NEON_2RM_VCLZ] = 0x7,
5585 [NEON_2RM_VCNT] = 0x1,
5586 [NEON_2RM_VMVN] = 0x1,
5587 [NEON_2RM_VPADAL] = 0x7,
5588 [NEON_2RM_VPADAL_U] = 0x7,
5589 [NEON_2RM_VQABS] = 0x7,
5590 [NEON_2RM_VQNEG] = 0x7,
5591 [NEON_2RM_VCGT0] = 0x7,
5592 [NEON_2RM_VCGE0] = 0x7,
5593 [NEON_2RM_VCEQ0] = 0x7,
5594 [NEON_2RM_VCLE0] = 0x7,
5595 [NEON_2RM_VCLT0] = 0x7,
5596 [NEON_2RM_SHA1H] = 0x4,
5597 [NEON_2RM_VABS] = 0x7,
5598 [NEON_2RM_VNEG] = 0x7,
5599 [NEON_2RM_VCGT0_F] = 0x4,
5600 [NEON_2RM_VCGE0_F] = 0x4,
5601 [NEON_2RM_VCEQ0_F] = 0x4,
5602 [NEON_2RM_VCLE0_F] = 0x4,
5603 [NEON_2RM_VCLT0_F] = 0x4,
5604 [NEON_2RM_VABS_F] = 0x4,
5605 [NEON_2RM_VNEG_F] = 0x4,
5606 [NEON_2RM_VSWP] = 0x1,
5607 [NEON_2RM_VTRN] = 0x7,
5608 [NEON_2RM_VUZP] = 0x7,
5609 [NEON_2RM_VZIP] = 0x7,
5610 [NEON_2RM_VMOVN] = 0x7,
5611 [NEON_2RM_VQMOVN] = 0x7,
5612 [NEON_2RM_VSHLL] = 0x7,
5613 [NEON_2RM_SHA1SU1] = 0x4,
5614 [NEON_2RM_VRINTN] = 0x4,
5615 [NEON_2RM_VRINTX] = 0x4,
5616 [NEON_2RM_VRINTA] = 0x4,
5617 [NEON_2RM_VRINTZ] = 0x4,
5618 [NEON_2RM_VCVT_F16_F32] = 0x2,
5619 [NEON_2RM_VRINTM] = 0x4,
5620 [NEON_2RM_VCVT_F32_F16] = 0x2,
5621 [NEON_2RM_VRINTP] = 0x4,
5622 [NEON_2RM_VCVTAU] = 0x4,
5623 [NEON_2RM_VCVTAS] = 0x4,
5624 [NEON_2RM_VCVTNU] = 0x4,
5625 [NEON_2RM_VCVTNS] = 0x4,
5626 [NEON_2RM_VCVTPU] = 0x4,
5627 [NEON_2RM_VCVTPS] = 0x4,
5628 [NEON_2RM_VCVTMU] = 0x4,
5629 [NEON_2RM_VCVTMS] = 0x4,
5630 [NEON_2RM_VRECPE] = 0x4,
5631 [NEON_2RM_VRSQRTE] = 0x4,
5632 [NEON_2RM_VRECPE_F] = 0x4,
5633 [NEON_2RM_VRSQRTE_F] = 0x4,
5634 [NEON_2RM_VCVT_FS] = 0x4,
5635 [NEON_2RM_VCVT_FU] = 0x4,
5636 [NEON_2RM_VCVT_SF] = 0x4,
5637 [NEON_2RM_VCVT_UF] = 0x4,
5641 /* Expand v8.1 simd helper. */
5642 static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
5643 int q, int rd, int rn, int rm)
5645 if (dc_isar_feature(aa32_rdm, s)) {
5646 int opr_sz = (1 + q) * 8;
5647 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
5648 vfp_reg_offset(1, rn),
5649 vfp_reg_offset(1, rm), cpu_env,
5650 opr_sz, opr_sz, 0, fn);
5657 * Expanders for VBitOps_VBIF, VBIT, VBSL.
5659 static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
5661 tcg_gen_xor_i64(rn, rn, rm);
5662 tcg_gen_and_i64(rn, rn, rd);
5663 tcg_gen_xor_i64(rd, rm, rn);
5666 static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
5668 tcg_gen_xor_i64(rn, rn, rd);
5669 tcg_gen_and_i64(rn, rn, rm);
5670 tcg_gen_xor_i64(rd, rd, rn);
5673 static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
5675 tcg_gen_xor_i64(rn, rn, rd);
5676 tcg_gen_andc_i64(rn, rn, rm);
5677 tcg_gen_xor_i64(rd, rd, rn);
5680 static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
5682 tcg_gen_xor_vec(vece, rn, rn, rm);
5683 tcg_gen_and_vec(vece, rn, rn, rd);
5684 tcg_gen_xor_vec(vece, rd, rm, rn);
5687 static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
5689 tcg_gen_xor_vec(vece, rn, rn, rd);
5690 tcg_gen_and_vec(vece, rn, rn, rm);
5691 tcg_gen_xor_vec(vece, rd, rd, rn);
5694 static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
5696 tcg_gen_xor_vec(vece, rn, rn, rd);
5697 tcg_gen_andc_vec(vece, rn, rn, rm);
5698 tcg_gen_xor_vec(vece, rd, rd, rn);
5701 const GVecGen3 bsl_op = {
5702 .fni8 = gen_bsl_i64,
5703 .fniv = gen_bsl_vec,
5704 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5708 const GVecGen3 bit_op = {
5709 .fni8 = gen_bit_i64,
5710 .fniv = gen_bit_vec,
5711 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5715 const GVecGen3 bif_op = {
5716 .fni8 = gen_bif_i64,
5717 .fniv = gen_bif_vec,
5718 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5722 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5724 tcg_gen_vec_sar8i_i64(a, a, shift);
5725 tcg_gen_vec_add8_i64(d, d, a);
5728 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5730 tcg_gen_vec_sar16i_i64(a, a, shift);
5731 tcg_gen_vec_add16_i64(d, d, a);
5734 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
5736 tcg_gen_sari_i32(a, a, shift);
5737 tcg_gen_add_i32(d, d, a);
5740 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5742 tcg_gen_sari_i64(a, a, shift);
5743 tcg_gen_add_i64(d, d, a);
5746 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
5748 tcg_gen_sari_vec(vece, a, a, sh);
5749 tcg_gen_add_vec(vece, d, d, a);
5752 const GVecGen2i ssra_op[4] = {
5753 { .fni8 = gen_ssra8_i64,
5754 .fniv = gen_ssra_vec,
5756 .opc = INDEX_op_sari_vec,
5758 { .fni8 = gen_ssra16_i64,
5759 .fniv = gen_ssra_vec,
5761 .opc = INDEX_op_sari_vec,
5763 { .fni4 = gen_ssra32_i32,
5764 .fniv = gen_ssra_vec,
5766 .opc = INDEX_op_sari_vec,
5768 { .fni8 = gen_ssra64_i64,
5769 .fniv = gen_ssra_vec,
5770 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5772 .opc = INDEX_op_sari_vec,
5776 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5778 tcg_gen_vec_shr8i_i64(a, a, shift);
5779 tcg_gen_vec_add8_i64(d, d, a);
5782 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5784 tcg_gen_vec_shr16i_i64(a, a, shift);
5785 tcg_gen_vec_add16_i64(d, d, a);
5788 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
5790 tcg_gen_shri_i32(a, a, shift);
5791 tcg_gen_add_i32(d, d, a);
5794 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5796 tcg_gen_shri_i64(a, a, shift);
5797 tcg_gen_add_i64(d, d, a);
5800 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
5802 tcg_gen_shri_vec(vece, a, a, sh);
5803 tcg_gen_add_vec(vece, d, d, a);
5806 const GVecGen2i usra_op[4] = {
5807 { .fni8 = gen_usra8_i64,
5808 .fniv = gen_usra_vec,
5810 .opc = INDEX_op_shri_vec,
5812 { .fni8 = gen_usra16_i64,
5813 .fniv = gen_usra_vec,
5815 .opc = INDEX_op_shri_vec,
5817 { .fni4 = gen_usra32_i32,
5818 .fniv = gen_usra_vec,
5820 .opc = INDEX_op_shri_vec,
5822 { .fni8 = gen_usra64_i64,
5823 .fniv = gen_usra_vec,
5824 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5826 .opc = INDEX_op_shri_vec,
5830 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5832 uint64_t mask = dup_const(MO_8, 0xff >> shift);
5833 TCGv_i64 t = tcg_temp_new_i64();
5835 tcg_gen_shri_i64(t, a, shift);
5836 tcg_gen_andi_i64(t, t, mask);
5837 tcg_gen_andi_i64(d, d, ~mask);
5838 tcg_gen_or_i64(d, d, t);
5839 tcg_temp_free_i64(t);
5842 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5844 uint64_t mask = dup_const(MO_16, 0xffff >> shift);
5845 TCGv_i64 t = tcg_temp_new_i64();
5847 tcg_gen_shri_i64(t, a, shift);
5848 tcg_gen_andi_i64(t, t, mask);
5849 tcg_gen_andi_i64(d, d, ~mask);
5850 tcg_gen_or_i64(d, d, t);
5851 tcg_temp_free_i64(t);
5854 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
5856 tcg_gen_shri_i32(a, a, shift);
5857 tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
5860 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5862 tcg_gen_shri_i64(a, a, shift);
5863 tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
5866 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
5869 tcg_gen_mov_vec(d, a);
5871 TCGv_vec t = tcg_temp_new_vec_matching(d);
5872 TCGv_vec m = tcg_temp_new_vec_matching(d);
5874 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
5875 tcg_gen_shri_vec(vece, t, a, sh);
5876 tcg_gen_and_vec(vece, d, d, m);
5877 tcg_gen_or_vec(vece, d, d, t);
5879 tcg_temp_free_vec(t);
5880 tcg_temp_free_vec(m);
5884 const GVecGen2i sri_op[4] = {
5885 { .fni8 = gen_shr8_ins_i64,
5886 .fniv = gen_shr_ins_vec,
5888 .opc = INDEX_op_shri_vec,
5890 { .fni8 = gen_shr16_ins_i64,
5891 .fniv = gen_shr_ins_vec,
5893 .opc = INDEX_op_shri_vec,
5895 { .fni4 = gen_shr32_ins_i32,
5896 .fniv = gen_shr_ins_vec,
5898 .opc = INDEX_op_shri_vec,
5900 { .fni8 = gen_shr64_ins_i64,
5901 .fniv = gen_shr_ins_vec,
5902 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5904 .opc = INDEX_op_shri_vec,
5908 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5910 uint64_t mask = dup_const(MO_8, 0xff << shift);
5911 TCGv_i64 t = tcg_temp_new_i64();
5913 tcg_gen_shli_i64(t, a, shift);
5914 tcg_gen_andi_i64(t, t, mask);
5915 tcg_gen_andi_i64(d, d, ~mask);
5916 tcg_gen_or_i64(d, d, t);
5917 tcg_temp_free_i64(t);
5920 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5922 uint64_t mask = dup_const(MO_16, 0xffff << shift);
5923 TCGv_i64 t = tcg_temp_new_i64();
5925 tcg_gen_shli_i64(t, a, shift);
5926 tcg_gen_andi_i64(t, t, mask);
5927 tcg_gen_andi_i64(d, d, ~mask);
5928 tcg_gen_or_i64(d, d, t);
5929 tcg_temp_free_i64(t);
5932 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
5934 tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
5937 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
5939 tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
5942 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
5945 tcg_gen_mov_vec(d, a);
5947 TCGv_vec t = tcg_temp_new_vec_matching(d);
5948 TCGv_vec m = tcg_temp_new_vec_matching(d);
5950 tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
5951 tcg_gen_shli_vec(vece, t, a, sh);
5952 tcg_gen_and_vec(vece, d, d, m);
5953 tcg_gen_or_vec(vece, d, d, t);
5955 tcg_temp_free_vec(t);
5956 tcg_temp_free_vec(m);
5960 const GVecGen2i sli_op[4] = {
5961 { .fni8 = gen_shl8_ins_i64,
5962 .fniv = gen_shl_ins_vec,
5964 .opc = INDEX_op_shli_vec,
5966 { .fni8 = gen_shl16_ins_i64,
5967 .fniv = gen_shl_ins_vec,
5969 .opc = INDEX_op_shli_vec,
5971 { .fni4 = gen_shl32_ins_i32,
5972 .fniv = gen_shl_ins_vec,
5974 .opc = INDEX_op_shli_vec,
5976 { .fni8 = gen_shl64_ins_i64,
5977 .fniv = gen_shl_ins_vec,
5978 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5980 .opc = INDEX_op_shli_vec,
5984 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5986 gen_helper_neon_mul_u8(a, a, b);
5987 gen_helper_neon_add_u8(d, d, a);
5990 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5992 gen_helper_neon_mul_u8(a, a, b);
5993 gen_helper_neon_sub_u8(d, d, a);
5996 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5998 gen_helper_neon_mul_u16(a, a, b);
5999 gen_helper_neon_add_u16(d, d, a);
6002 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
6004 gen_helper_neon_mul_u16(a, a, b);
6005 gen_helper_neon_sub_u16(d, d, a);
6008 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
6010 tcg_gen_mul_i32(a, a, b);
6011 tcg_gen_add_i32(d, d, a);
6014 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
6016 tcg_gen_mul_i32(a, a, b);
6017 tcg_gen_sub_i32(d, d, a);
6020 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
6022 tcg_gen_mul_i64(a, a, b);
6023 tcg_gen_add_i64(d, d, a);
6026 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
6028 tcg_gen_mul_i64(a, a, b);
6029 tcg_gen_sub_i64(d, d, a);
6032 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
6034 tcg_gen_mul_vec(vece, a, a, b);
6035 tcg_gen_add_vec(vece, d, d, a);
6038 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
6040 tcg_gen_mul_vec(vece, a, a, b);
6041 tcg_gen_sub_vec(vece, d, d, a);
6044 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
6045 * these tables are shared with AArch64 which does support them.
6047 const GVecGen3 mla_op[4] = {
6048 { .fni4 = gen_mla8_i32,
6049 .fniv = gen_mla_vec,
6050 .opc = INDEX_op_mul_vec,
6053 { .fni4 = gen_mla16_i32,
6054 .fniv = gen_mla_vec,
6055 .opc = INDEX_op_mul_vec,
6058 { .fni4 = gen_mla32_i32,
6059 .fniv = gen_mla_vec,
6060 .opc = INDEX_op_mul_vec,
6063 { .fni8 = gen_mla64_i64,
6064 .fniv = gen_mla_vec,
6065 .opc = INDEX_op_mul_vec,
6066 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
6071 const GVecGen3 mls_op[4] = {
6072 { .fni4 = gen_mls8_i32,
6073 .fniv = gen_mls_vec,
6074 .opc = INDEX_op_mul_vec,
6077 { .fni4 = gen_mls16_i32,
6078 .fniv = gen_mls_vec,
6079 .opc = INDEX_op_mul_vec,
6082 { .fni4 = gen_mls32_i32,
6083 .fniv = gen_mls_vec,
6084 .opc = INDEX_op_mul_vec,
6087 { .fni8 = gen_mls64_i64,
6088 .fniv = gen_mls_vec,
6089 .opc = INDEX_op_mul_vec,
6090 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
6095 /* CMTST : test is "if (X & Y != 0)". */
6096 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
6098 tcg_gen_and_i32(d, a, b);
6099 tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
6100 tcg_gen_neg_i32(d, d);
6103 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
6105 tcg_gen_and_i64(d, a, b);
6106 tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
6107 tcg_gen_neg_i64(d, d);
6110 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
6112 tcg_gen_and_vec(vece, d, a, b);
6113 tcg_gen_dupi_vec(vece, a, 0);
6114 tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
6117 const GVecGen3 cmtst_op[4] = {
6118 { .fni4 = gen_helper_neon_tst_u8,
6119 .fniv = gen_cmtst_vec,
6121 { .fni4 = gen_helper_neon_tst_u16,
6122 .fniv = gen_cmtst_vec,
6124 { .fni4 = gen_cmtst_i32,
6125 .fniv = gen_cmtst_vec,
6127 { .fni8 = gen_cmtst_i64,
6128 .fniv = gen_cmtst_vec,
6129 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
6133 /* Translate a NEON data processing instruction. Return nonzero if the
6134 instruction is invalid.
6135 We process data in a mixture of 32-bit and 64-bit chunks.
6136 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
6138 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
6142 int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
6151 TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
6152 TCGv_ptr ptr1, ptr2, ptr3;
6155 /* FIXME: this access check should not take precedence over UNDEF
6156 * for invalid encodings; we will generate incorrect syndrome information
6157 * for attempts to execute invalid vfp/neon encodings with FP disabled.
6159 if (s->fp_excp_el) {
6160 gen_exception_insn(s, 4, EXCP_UDEF,
6161 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
6165 if (!s->vfp_enabled)
6167 q = (insn & (1 << 6)) != 0;
6168 u = (insn >> 24) & 1;
6169 VFP_DREG_D(rd, insn);
6170 VFP_DREG_N(rn, insn);
6171 VFP_DREG_M(rm, insn);
6172 size = (insn >> 20) & 3;
6173 vec_size = q ? 16 : 8;
6174 rd_ofs = neon_reg_offset(rd, 0);
6175 rn_ofs = neon_reg_offset(rn, 0);
6176 rm_ofs = neon_reg_offset(rm, 0);
6178 if ((insn & (1 << 23)) == 0) {
6179 /* Three register same length. */
6180 op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
6181 /* Catch invalid op and bad size combinations: UNDEF */
6182 if ((neon_3r_sizes[op] & (1 << size)) == 0) {
6185 /* All insns of this form UNDEF for either this condition or the
6186 * superset of cases "Q==1"; we catch the latter later.
6188 if (q && ((rd | rn | rm) & 1)) {
6193 /* The SHA-1/SHA-256 3-register instructions require special
6194 * treatment here, as their size field is overloaded as an
6195 * op type selector, and they all consume their input in a
6201 if (!u) { /* SHA-1 */
6202 if (!dc_isar_feature(aa32_sha1, s)) {
6205 ptr1 = vfp_reg_ptr(true, rd);
6206 ptr2 = vfp_reg_ptr(true, rn);
6207 ptr3 = vfp_reg_ptr(true, rm);
6208 tmp4 = tcg_const_i32(size);
6209 gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
6210 tcg_temp_free_i32(tmp4);
6211 } else { /* SHA-256 */
6212 if (!dc_isar_feature(aa32_sha2, s) || size == 3) {
6215 ptr1 = vfp_reg_ptr(true, rd);
6216 ptr2 = vfp_reg_ptr(true, rn);
6217 ptr3 = vfp_reg_ptr(true, rm);
6220 gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
6223 gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
6226 gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
6230 tcg_temp_free_ptr(ptr1);
6231 tcg_temp_free_ptr(ptr2);
6232 tcg_temp_free_ptr(ptr3);
6235 case NEON_3R_VPADD_VQRDMLAH:
6242 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
6245 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
6250 case NEON_3R_VFM_VQRDMLSH:
6261 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
6264 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
6269 case NEON_3R_LOGIC: /* Logic ops. */
6270 switch ((u << 2) | size) {
6272 tcg_gen_gvec_and(0, rd_ofs, rn_ofs, rm_ofs,
6273 vec_size, vec_size);
6276 tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
6277 vec_size, vec_size);
6282 tcg_gen_gvec_mov(0, rd_ofs, rn_ofs, vec_size, vec_size);
6285 tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
6286 vec_size, vec_size);
6290 tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
6291 vec_size, vec_size);
6294 tcg_gen_gvec_xor(0, rd_ofs, rn_ofs, rm_ofs,
6295 vec_size, vec_size);
6298 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
6299 vec_size, vec_size, &bsl_op);
6302 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
6303 vec_size, vec_size, &bit_op);
6306 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
6307 vec_size, vec_size, &bif_op);
6312 case NEON_3R_VADD_VSUB:
6314 tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
6315 vec_size, vec_size);
6317 tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
6318 vec_size, vec_size);
6322 case NEON_3R_VMUL: /* VMUL */
6324 /* Polynomial case allows only P8 and is handled below. */
6329 tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
6330 vec_size, vec_size);
6335 case NEON_3R_VML: /* VMLA, VMLS */
6336 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
6337 u ? &mls_op[size] : &mla_op[size]);
6340 case NEON_3R_VTST_VCEQ:
6342 tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
6343 vec_size, vec_size);
6345 tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
6346 vec_size, vec_size, &cmtst_op[size]);
6351 tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
6352 rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
6356 tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
6357 rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
6362 /* 64-bit element instructions. */
6363 for (pass = 0; pass < (q ? 2 : 1); pass++) {
6364 neon_load_reg64(cpu_V0, rn + pass);
6365 neon_load_reg64(cpu_V1, rm + pass);
6369 gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
6372 gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
6378 gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
6381 gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
6387 gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
6389 gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
6394 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
6397 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
6403 gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
6405 gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
6408 case NEON_3R_VQRSHL:
6410 gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
6413 gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
6420 neon_store_reg64(cpu_V0, rd + pass);
6429 case NEON_3R_VQRSHL:
6432 /* Shift instruction operands are reversed. */
6438 case NEON_3R_VPADD_VQRDMLAH:
6443 case NEON_3R_FLOAT_ARITH:
6444 pairwise = (u && size < 2); /* if VPADD (float) */
6446 case NEON_3R_FLOAT_MINMAX:
6447 pairwise = u; /* if VPMIN/VPMAX (float) */
6449 case NEON_3R_FLOAT_CMP:
6451 /* no encoding for U=0 C=1x */
6455 case NEON_3R_FLOAT_ACMP:
6460 case NEON_3R_FLOAT_MISC:
6461 /* VMAXNM/VMINNM in ARMv8 */
6462 if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
6466 case NEON_3R_VFM_VQRDMLSH:
6467 if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
6475 if (pairwise && q) {
6476 /* All the pairwise insns UNDEF if Q is set */
6480 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6485 tmp = neon_load_reg(rn, 0);
6486 tmp2 = neon_load_reg(rn, 1);
6488 tmp = neon_load_reg(rm, 0);
6489 tmp2 = neon_load_reg(rm, 1);
6493 tmp = neon_load_reg(rn, pass);
6494 tmp2 = neon_load_reg(rm, pass);
6498 GEN_NEON_INTEGER_OP(hadd);
6501 GEN_NEON_INTEGER_OP_ENV(qadd);
6503 case NEON_3R_VRHADD:
6504 GEN_NEON_INTEGER_OP(rhadd);
6507 GEN_NEON_INTEGER_OP(hsub);
6510 GEN_NEON_INTEGER_OP_ENV(qsub);
6513 GEN_NEON_INTEGER_OP(shl);
6516 GEN_NEON_INTEGER_OP_ENV(qshl);
6519 GEN_NEON_INTEGER_OP(rshl);
6521 case NEON_3R_VQRSHL:
6522 GEN_NEON_INTEGER_OP_ENV(qrshl);
6525 GEN_NEON_INTEGER_OP(max);
6528 GEN_NEON_INTEGER_OP(min);
6531 GEN_NEON_INTEGER_OP(abd);
6534 GEN_NEON_INTEGER_OP(abd);
6535 tcg_temp_free_i32(tmp2);
6536 tmp2 = neon_load_reg(rd, pass);
6537 gen_neon_add(size, tmp, tmp2);
6540 /* VMUL.P8; other cases already eliminated. */
6541 gen_helper_neon_mul_p8(tmp, tmp, tmp2);
6544 GEN_NEON_INTEGER_OP(pmax);
6547 GEN_NEON_INTEGER_OP(pmin);
6549 case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
6550 if (!u) { /* VQDMULH */
6553 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6556 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6560 } else { /* VQRDMULH */
6563 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6566 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6572 case NEON_3R_VPADD_VQRDMLAH:
6574 case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
6575 case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
6576 case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
6580 case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
6582 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6583 switch ((u << 2) | size) {
6586 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6589 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
6592 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
6597 tcg_temp_free_ptr(fpstatus);
6600 case NEON_3R_FLOAT_MULTIPLY:
6602 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6603 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6605 tcg_temp_free_i32(tmp2);
6606 tmp2 = neon_load_reg(rd, pass);
6608 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6610 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6613 tcg_temp_free_ptr(fpstatus);
6616 case NEON_3R_FLOAT_CMP:
6618 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6620 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6623 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6625 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6628 tcg_temp_free_ptr(fpstatus);
6631 case NEON_3R_FLOAT_ACMP:
6633 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6635 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
6637 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
6639 tcg_temp_free_ptr(fpstatus);
6642 case NEON_3R_FLOAT_MINMAX:
6644 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6646 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
6648 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
6650 tcg_temp_free_ptr(fpstatus);
6653 case NEON_3R_FLOAT_MISC:
6656 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6658 gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
6660 gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
6662 tcg_temp_free_ptr(fpstatus);
6665 gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
6667 gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
6671 case NEON_3R_VFM_VQRDMLSH:
6673 /* VFMA, VFMS: fused multiply-add */
6674 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6675 TCGv_i32 tmp3 = neon_load_reg(rd, pass);
6678 gen_helper_vfp_negs(tmp, tmp);
6680 gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
6681 tcg_temp_free_i32(tmp3);
6682 tcg_temp_free_ptr(fpstatus);
6688 tcg_temp_free_i32(tmp2);
6690 /* Save the result. For elementwise operations we can put it
6691 straight into the destination register. For pairwise operations
6692 we have to be careful to avoid clobbering the source operands. */
6693 if (pairwise && rd == rm) {
6694 neon_store_scratch(pass, tmp);
6696 neon_store_reg(rd, pass, tmp);
6700 if (pairwise && rd == rm) {
6701 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6702 tmp = neon_load_scratch(pass);
6703 neon_store_reg(rd, pass, tmp);
6706 /* End of 3 register same size operations. */
6707 } else if (insn & (1 << 4)) {
6708 if ((insn & 0x00380080) != 0) {
6709 /* Two registers and shift. */
6710 op = (insn >> 8) & 0xf;
6711 if (insn & (1 << 7)) {
6719 while ((insn & (1 << (size + 19))) == 0)
6722 shift = (insn >> 16) & ((1 << (3 + size)) - 1);
6724 /* Shift by immediate:
6725 VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
6726 if (q && ((rd | rm) & 1)) {
6729 if (!u && (op == 4 || op == 6)) {
6732 /* Right shifts are encoded as N - shift, where N is the
6733 element size in bits. */
6735 shift = shift - (1 << (size + 3));
6740 /* Right shift comes here negative. */
6742 /* Shifts larger than the element size are architecturally
6743 * valid. Unsigned results in all zeros; signed results
6747 tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
6748 MIN(shift, (8 << size) - 1),
6749 vec_size, vec_size);
6750 } else if (shift >= 8 << size) {
6751 tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
6753 tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
6754 vec_size, vec_size);
6759 /* Right shift comes here negative. */
6761 /* Shifts larger than the element size are architecturally
6762 * valid. Unsigned results in all zeros; signed results
6766 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
6767 MIN(shift, (8 << size) - 1),
6769 } else if (shift >= 8 << size) {
6772 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
6773 shift, &usra_op[size]);
6781 /* Right shift comes here negative. */
6783 /* Shift out of range leaves destination unchanged. */
6784 if (shift < 8 << size) {
6785 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
6786 shift, &sri_op[size]);
6790 case 5: /* VSHL, VSLI */
6792 /* Shift out of range leaves destination unchanged. */
6793 if (shift < 8 << size) {
6794 tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
6795 vec_size, shift, &sli_op[size]);
6798 /* Shifts larger than the element size are
6799 * architecturally valid and results in zero.
6801 if (shift >= 8 << size) {
6802 tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, 0);
6804 tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
6805 vec_size, vec_size);
6817 /* To avoid excessive duplication of ops we implement shift
6818 * by immediate using the variable shift operations.
6820 imm = dup_const(size, shift);
6822 for (pass = 0; pass < count; pass++) {
6824 neon_load_reg64(cpu_V0, rm + pass);
6825 tcg_gen_movi_i64(cpu_V1, imm);
6830 gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
6832 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
6834 case 6: /* VQSHLU */
6835 gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
6840 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
6843 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
6848 g_assert_not_reached();
6852 neon_load_reg64(cpu_V1, rd + pass);
6853 tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
6855 neon_store_reg64(cpu_V0, rd + pass);
6856 } else { /* size < 3 */
6857 /* Operands in T0 and T1. */
6858 tmp = neon_load_reg(rm, pass);
6859 tmp2 = tcg_temp_new_i32();
6860 tcg_gen_movi_i32(tmp2, imm);
6864 GEN_NEON_INTEGER_OP(rshl);
6866 case 6: /* VQSHLU */
6869 gen_helper_neon_qshlu_s8(tmp, cpu_env,
6873 gen_helper_neon_qshlu_s16(tmp, cpu_env,
6877 gen_helper_neon_qshlu_s32(tmp, cpu_env,
6885 GEN_NEON_INTEGER_OP_ENV(qshl);
6888 g_assert_not_reached();
6890 tcg_temp_free_i32(tmp2);
6894 tmp2 = neon_load_reg(rd, pass);
6895 gen_neon_add(size, tmp, tmp2);
6896 tcg_temp_free_i32(tmp2);
6898 neon_store_reg(rd, pass, tmp);
6901 } else if (op < 10) {
6902 /* Shift by immediate and narrow:
6903 VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
6904 int input_unsigned = (op == 8) ? !u : u;
6908 shift = shift - (1 << (size + 3));
6911 tmp64 = tcg_const_i64(shift);
6912 neon_load_reg64(cpu_V0, rm);
6913 neon_load_reg64(cpu_V1, rm + 1);
6914 for (pass = 0; pass < 2; pass++) {
6922 if (input_unsigned) {
6923 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
6925 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
6928 if (input_unsigned) {
6929 gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
6931 gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
6934 tmp = tcg_temp_new_i32();
6935 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
6936 neon_store_reg(rd, pass, tmp);
6938 tcg_temp_free_i64(tmp64);
6941 imm = (uint16_t)shift;
6945 imm = (uint32_t)shift;
6947 tmp2 = tcg_const_i32(imm);
6948 tmp4 = neon_load_reg(rm + 1, 0);
6949 tmp5 = neon_load_reg(rm + 1, 1);
6950 for (pass = 0; pass < 2; pass++) {
6952 tmp = neon_load_reg(rm, 0);
6956 gen_neon_shift_narrow(size, tmp, tmp2, q,
6959 tmp3 = neon_load_reg(rm, 1);
6963 gen_neon_shift_narrow(size, tmp3, tmp2, q,
6965 tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
6966 tcg_temp_free_i32(tmp);
6967 tcg_temp_free_i32(tmp3);
6968 tmp = tcg_temp_new_i32();
6969 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
6970 neon_store_reg(rd, pass, tmp);
6972 tcg_temp_free_i32(tmp2);
6974 } else if (op == 10) {
6976 if (q || (rd & 1)) {
6979 tmp = neon_load_reg(rm, 0);
6980 tmp2 = neon_load_reg(rm, 1);
6981 for (pass = 0; pass < 2; pass++) {
6985 gen_neon_widen(cpu_V0, tmp, size, u);
6988 /* The shift is less than the width of the source
6989 type, so we can just shift the whole register. */
6990 tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
6991 /* Widen the result of shift: we need to clear
6992 * the potential overflow bits resulting from
6993 * left bits of the narrow input appearing as
6994 * right bits of left the neighbour narrow
6996 if (size < 2 || !u) {
6999 imm = (0xffu >> (8 - shift));
7001 } else if (size == 1) {
7002 imm = 0xffff >> (16 - shift);
7005 imm = 0xffffffff >> (32 - shift);
7008 imm64 = imm | (((uint64_t)imm) << 32);
7012 tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
7015 neon_store_reg64(cpu_V0, rd + pass);
7017 } else if (op >= 14) {
7018 /* VCVT fixed-point. */
7019 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
7022 /* We have already masked out the must-be-1 top bit of imm6,
7023 * hence this 32-shift where the ARM ARM has 64-imm6.
7026 for (pass = 0; pass < (q ? 4 : 2); pass++) {
7027 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
7030 gen_vfp_ulto(0, shift, 1);
7032 gen_vfp_slto(0, shift, 1);
7035 gen_vfp_toul(0, shift, 1);
7037 gen_vfp_tosl(0, shift, 1);
7039 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
7044 } else { /* (insn & 0x00380080) == 0 */
7045 int invert, reg_ofs, vec_size;
7047 if (q && (rd & 1)) {
7051 op = (insn >> 8) & 0xf;
7052 /* One register and immediate. */
7053 imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
7054 invert = (insn & (1 << 5)) != 0;
7055 /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
7056 * We choose to not special-case this and will behave as if a
7057 * valid constant encoding of 0 had been given.
7076 imm = (imm << 8) | (imm << 24);
7079 imm = (imm << 8) | 0xff;
7082 imm = (imm << 16) | 0xffff;
7085 imm |= (imm << 8) | (imm << 16) | (imm << 24);
7094 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
7095 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
7102 reg_ofs = neon_reg_offset(rd, 0);
7103 vec_size = q ? 16 : 8;
7105 if (op & 1 && op < 12) {
7107 /* The immediate value has already been inverted,
7108 * so BIC becomes AND.
7110 tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
7111 vec_size, vec_size);
7113 tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
7114 vec_size, vec_size);
7118 if (op == 14 && invert) {
7119 TCGv_i64 t64 = tcg_temp_new_i64();
7121 for (pass = 0; pass <= q; ++pass) {
7125 for (n = 0; n < 8; n++) {
7126 if (imm & (1 << (n + pass * 8))) {
7127 val |= 0xffull << (n * 8);
7130 tcg_gen_movi_i64(t64, val);
7131 neon_store_reg64(t64, rd + pass);
7133 tcg_temp_free_i64(t64);
7135 tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
7139 } else { /* (insn & 0x00800010 == 0x00800000) */
7141 op = (insn >> 8) & 0xf;
7142 if ((insn & (1 << 6)) == 0) {
7143 /* Three registers of different lengths. */
7147 /* undefreq: bit 0 : UNDEF if size == 0
7148 * bit 1 : UNDEF if size == 1
7149 * bit 2 : UNDEF if size == 2
7150 * bit 3 : UNDEF if U == 1
7151 * Note that [2:0] set implies 'always UNDEF'
7154 /* prewiden, src1_wide, src2_wide, undefreq */
7155 static const int neon_3reg_wide[16][4] = {
7156 {1, 0, 0, 0}, /* VADDL */
7157 {1, 1, 0, 0}, /* VADDW */
7158 {1, 0, 0, 0}, /* VSUBL */
7159 {1, 1, 0, 0}, /* VSUBW */
7160 {0, 1, 1, 0}, /* VADDHN */
7161 {0, 0, 0, 0}, /* VABAL */
7162 {0, 1, 1, 0}, /* VSUBHN */
7163 {0, 0, 0, 0}, /* VABDL */
7164 {0, 0, 0, 0}, /* VMLAL */
7165 {0, 0, 0, 9}, /* VQDMLAL */
7166 {0, 0, 0, 0}, /* VMLSL */
7167 {0, 0, 0, 9}, /* VQDMLSL */
7168 {0, 0, 0, 0}, /* Integer VMULL */
7169 {0, 0, 0, 1}, /* VQDMULL */
7170 {0, 0, 0, 0xa}, /* Polynomial VMULL */
7171 {0, 0, 0, 7}, /* Reserved: always UNDEF */
7174 prewiden = neon_3reg_wide[op][0];
7175 src1_wide = neon_3reg_wide[op][1];
7176 src2_wide = neon_3reg_wide[op][2];
7177 undefreq = neon_3reg_wide[op][3];
7179 if ((undefreq & (1 << size)) ||
7180 ((undefreq & 8) && u)) {
7183 if ((src1_wide && (rn & 1)) ||
7184 (src2_wide && (rm & 1)) ||
7185 (!src2_wide && (rd & 1))) {
7189 /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
7190 * outside the loop below as it only performs a single pass.
7192 if (op == 14 && size == 2) {
7193 TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
7195 if (!dc_isar_feature(aa32_pmull, s)) {
7198 tcg_rn = tcg_temp_new_i64();
7199 tcg_rm = tcg_temp_new_i64();
7200 tcg_rd = tcg_temp_new_i64();
7201 neon_load_reg64(tcg_rn, rn);
7202 neon_load_reg64(tcg_rm, rm);
7203 gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
7204 neon_store_reg64(tcg_rd, rd);
7205 gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
7206 neon_store_reg64(tcg_rd, rd + 1);
7207 tcg_temp_free_i64(tcg_rn);
7208 tcg_temp_free_i64(tcg_rm);
7209 tcg_temp_free_i64(tcg_rd);
7213 /* Avoid overlapping operands. Wide source operands are
7214 always aligned so will never overlap with wide
7215 destinations in problematic ways. */
7216 if (rd == rm && !src2_wide) {
7217 tmp = neon_load_reg(rm, 1);
7218 neon_store_scratch(2, tmp);
7219 } else if (rd == rn && !src1_wide) {
7220 tmp = neon_load_reg(rn, 1);
7221 neon_store_scratch(2, tmp);
7224 for (pass = 0; pass < 2; pass++) {
7226 neon_load_reg64(cpu_V0, rn + pass);
7229 if (pass == 1 && rd == rn) {
7230 tmp = neon_load_scratch(2);
7232 tmp = neon_load_reg(rn, pass);
7235 gen_neon_widen(cpu_V0, tmp, size, u);
7239 neon_load_reg64(cpu_V1, rm + pass);
7242 if (pass == 1 && rd == rm) {
7243 tmp2 = neon_load_scratch(2);
7245 tmp2 = neon_load_reg(rm, pass);
7248 gen_neon_widen(cpu_V1, tmp2, size, u);
7252 case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
7253 gen_neon_addl(size);
7255 case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
7256 gen_neon_subl(size);
7258 case 5: case 7: /* VABAL, VABDL */
7259 switch ((size << 1) | u) {
7261 gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
7264 gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
7267 gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
7270 gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
7273 gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
7276 gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
7280 tcg_temp_free_i32(tmp2);
7281 tcg_temp_free_i32(tmp);
7283 case 8: case 9: case 10: case 11: case 12: case 13:
7284 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
7285 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
7287 case 14: /* Polynomial VMULL */
7288 gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
7289 tcg_temp_free_i32(tmp2);
7290 tcg_temp_free_i32(tmp);
7292 default: /* 15 is RESERVED: caught earlier */
7297 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
7298 neon_store_reg64(cpu_V0, rd + pass);
7299 } else if (op == 5 || (op >= 8 && op <= 11)) {
7301 neon_load_reg64(cpu_V1, rd + pass);
7303 case 10: /* VMLSL */
7304 gen_neon_negl(cpu_V0, size);
7306 case 5: case 8: /* VABAL, VMLAL */
7307 gen_neon_addl(size);
7309 case 9: case 11: /* VQDMLAL, VQDMLSL */
7310 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
7312 gen_neon_negl(cpu_V0, size);
7314 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
7319 neon_store_reg64(cpu_V0, rd + pass);
7320 } else if (op == 4 || op == 6) {
7321 /* Narrowing operation. */
7322 tmp = tcg_temp_new_i32();
7326 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
7329 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
7332 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
7333 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
7340 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
7343 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
7346 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
7347 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
7348 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
7356 neon_store_reg(rd, 0, tmp3);
7357 neon_store_reg(rd, 1, tmp);
7360 /* Write back the result. */
7361 neon_store_reg64(cpu_V0, rd + pass);
7365 /* Two registers and a scalar. NB that for ops of this form
7366 * the ARM ARM labels bit 24 as Q, but it is in our variable
7373 case 1: /* Float VMLA scalar */
7374 case 5: /* Floating point VMLS scalar */
7375 case 9: /* Floating point VMUL scalar */
7380 case 0: /* Integer VMLA scalar */
7381 case 4: /* Integer VMLS scalar */
7382 case 8: /* Integer VMUL scalar */
7383 case 12: /* VQDMULH scalar */
7384 case 13: /* VQRDMULH scalar */
7385 if (u && ((rd | rn) & 1)) {
7388 tmp = neon_get_scalar(size, rm);
7389 neon_store_scratch(0, tmp);
7390 for (pass = 0; pass < (u ? 4 : 2); pass++) {
7391 tmp = neon_load_scratch(0);
7392 tmp2 = neon_load_reg(rn, pass);
7395 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
7397 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
7399 } else if (op == 13) {
7401 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
7403 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
7405 } else if (op & 1) {
7406 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7407 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
7408 tcg_temp_free_ptr(fpstatus);
7411 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
7412 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
7413 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
7417 tcg_temp_free_i32(tmp2);
7420 tmp2 = neon_load_reg(rd, pass);
7423 gen_neon_add(size, tmp, tmp2);
7427 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7428 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
7429 tcg_temp_free_ptr(fpstatus);
7433 gen_neon_rsb(size, tmp, tmp2);
7437 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7438 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
7439 tcg_temp_free_ptr(fpstatus);
7445 tcg_temp_free_i32(tmp2);
7447 neon_store_reg(rd, pass, tmp);
7450 case 3: /* VQDMLAL scalar */
7451 case 7: /* VQDMLSL scalar */
7452 case 11: /* VQDMULL scalar */
7457 case 2: /* VMLAL sclar */
7458 case 6: /* VMLSL scalar */
7459 case 10: /* VMULL scalar */
7463 tmp2 = neon_get_scalar(size, rm);
7464 /* We need a copy of tmp2 because gen_neon_mull
7465 * deletes it during pass 0. */
7466 tmp4 = tcg_temp_new_i32();
7467 tcg_gen_mov_i32(tmp4, tmp2);
7468 tmp3 = neon_load_reg(rn, 1);
7470 for (pass = 0; pass < 2; pass++) {
7472 tmp = neon_load_reg(rn, 0);
7477 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
7479 neon_load_reg64(cpu_V1, rd + pass);
7483 gen_neon_negl(cpu_V0, size);
7486 gen_neon_addl(size);
7489 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
7491 gen_neon_negl(cpu_V0, size);
7493 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
7499 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
7504 neon_store_reg64(cpu_V0, rd + pass);
7507 case 14: /* VQRDMLAH scalar */
7508 case 15: /* VQRDMLSH scalar */
7510 NeonGenThreeOpEnvFn *fn;
7512 if (!dc_isar_feature(aa32_rdm, s)) {
7515 if (u && ((rd | rn) & 1)) {
7520 fn = gen_helper_neon_qrdmlah_s16;
7522 fn = gen_helper_neon_qrdmlah_s32;
7526 fn = gen_helper_neon_qrdmlsh_s16;
7528 fn = gen_helper_neon_qrdmlsh_s32;
7532 tmp2 = neon_get_scalar(size, rm);
7533 for (pass = 0; pass < (u ? 4 : 2); pass++) {
7534 tmp = neon_load_reg(rn, pass);
7535 tmp3 = neon_load_reg(rd, pass);
7536 fn(tmp, cpu_env, tmp, tmp2, tmp3);
7537 tcg_temp_free_i32(tmp3);
7538 neon_store_reg(rd, pass, tmp);
7540 tcg_temp_free_i32(tmp2);
7544 g_assert_not_reached();
7547 } else { /* size == 3 */
7550 imm = (insn >> 8) & 0xf;
7555 if (q && ((rd | rn | rm) & 1)) {
7560 neon_load_reg64(cpu_V0, rn);
7562 neon_load_reg64(cpu_V1, rn + 1);
7564 } else if (imm == 8) {
7565 neon_load_reg64(cpu_V0, rn + 1);
7567 neon_load_reg64(cpu_V1, rm);
7570 tmp64 = tcg_temp_new_i64();
7572 neon_load_reg64(cpu_V0, rn);
7573 neon_load_reg64(tmp64, rn + 1);
7575 neon_load_reg64(cpu_V0, rn + 1);
7576 neon_load_reg64(tmp64, rm);
7578 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
7579 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
7580 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
7582 neon_load_reg64(cpu_V1, rm);
7584 neon_load_reg64(cpu_V1, rm + 1);
7587 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
7588 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
7589 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
7590 tcg_temp_free_i64(tmp64);
7593 neon_load_reg64(cpu_V0, rn);
7594 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
7595 neon_load_reg64(cpu_V1, rm);
7596 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
7597 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
7599 neon_store_reg64(cpu_V0, rd);
7601 neon_store_reg64(cpu_V1, rd + 1);
7603 } else if ((insn & (1 << 11)) == 0) {
7604 /* Two register misc. */
7605 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
7606 size = (insn >> 18) & 3;
7607 /* UNDEF for unknown op values and bad op-size combinations */
7608 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
7611 if (neon_2rm_is_v8_op(op) &&
7612 !arm_dc_feature(s, ARM_FEATURE_V8)) {
7615 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
7616 q && ((rm | rd) & 1)) {
7620 case NEON_2RM_VREV64:
7621 for (pass = 0; pass < (q ? 2 : 1); pass++) {
7622 tmp = neon_load_reg(rm, pass * 2);
7623 tmp2 = neon_load_reg(rm, pass * 2 + 1);
7625 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
7626 case 1: gen_swap_half(tmp); break;
7627 case 2: /* no-op */ break;
7630 neon_store_reg(rd, pass * 2 + 1, tmp);
7632 neon_store_reg(rd, pass * 2, tmp2);
7635 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
7636 case 1: gen_swap_half(tmp2); break;
7639 neon_store_reg(rd, pass * 2, tmp2);
7643 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
7644 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
7645 for (pass = 0; pass < q + 1; pass++) {
7646 tmp = neon_load_reg(rm, pass * 2);
7647 gen_neon_widen(cpu_V0, tmp, size, op & 1);
7648 tmp = neon_load_reg(rm, pass * 2 + 1);
7649 gen_neon_widen(cpu_V1, tmp, size, op & 1);
7651 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
7652 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
7653 case 2: tcg_gen_add_i64(CPU_V001); break;
7656 if (op >= NEON_2RM_VPADAL) {
7658 neon_load_reg64(cpu_V1, rd + pass);
7659 gen_neon_addl(size);
7661 neon_store_reg64(cpu_V0, rd + pass);
7667 for (n = 0; n < (q ? 4 : 2); n += 2) {
7668 tmp = neon_load_reg(rm, n);
7669 tmp2 = neon_load_reg(rd, n + 1);
7670 neon_store_reg(rm, n, tmp2);
7671 neon_store_reg(rd, n + 1, tmp);
7678 if (gen_neon_unzip(rd, rm, size, q)) {
7683 if (gen_neon_zip(rd, rm, size, q)) {
7687 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
7688 /* also VQMOVUN; op field and mnemonics don't line up */
7693 for (pass = 0; pass < 2; pass++) {
7694 neon_load_reg64(cpu_V0, rm + pass);
7695 tmp = tcg_temp_new_i32();
7696 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
7701 neon_store_reg(rd, 0, tmp2);
7702 neon_store_reg(rd, 1, tmp);
7706 case NEON_2RM_VSHLL:
7707 if (q || (rd & 1)) {
7710 tmp = neon_load_reg(rm, 0);
7711 tmp2 = neon_load_reg(rm, 1);
7712 for (pass = 0; pass < 2; pass++) {
7715 gen_neon_widen(cpu_V0, tmp, size, 1);
7716 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
7717 neon_store_reg64(cpu_V0, rd + pass);
7720 case NEON_2RM_VCVT_F16_F32:
7725 if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
7729 tmp = tcg_temp_new_i32();
7730 tmp2 = tcg_temp_new_i32();
7731 fpst = get_fpstatus_ptr(true);
7732 ahp = get_ahp_flag();
7733 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
7734 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
7735 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
7736 gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
7737 tcg_gen_shli_i32(tmp2, tmp2, 16);
7738 tcg_gen_or_i32(tmp2, tmp2, tmp);
7739 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
7740 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, fpst, ahp);
7741 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
7742 neon_store_reg(rd, 0, tmp2);
7743 tmp2 = tcg_temp_new_i32();
7744 gen_helper_vfp_fcvt_f32_to_f16(tmp2, cpu_F0s, fpst, ahp);
7745 tcg_gen_shli_i32(tmp2, tmp2, 16);
7746 tcg_gen_or_i32(tmp2, tmp2, tmp);
7747 neon_store_reg(rd, 1, tmp2);
7748 tcg_temp_free_i32(tmp);
7749 tcg_temp_free_i32(ahp);
7750 tcg_temp_free_ptr(fpst);
7753 case NEON_2RM_VCVT_F32_F16:
7757 if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
7761 fpst = get_fpstatus_ptr(true);
7762 ahp = get_ahp_flag();
7763 tmp3 = tcg_temp_new_i32();
7764 tmp = neon_load_reg(rm, 0);
7765 tmp2 = neon_load_reg(rm, 1);
7766 tcg_gen_ext16u_i32(tmp3, tmp);
7767 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
7768 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
7769 tcg_gen_shri_i32(tmp3, tmp, 16);
7770 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
7771 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
7772 tcg_temp_free_i32(tmp);
7773 tcg_gen_ext16u_i32(tmp3, tmp2);
7774 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
7775 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
7776 tcg_gen_shri_i32(tmp3, tmp2, 16);
7777 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp3, fpst, ahp);
7778 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
7779 tcg_temp_free_i32(tmp2);
7780 tcg_temp_free_i32(tmp3);
7781 tcg_temp_free_i32(ahp);
7782 tcg_temp_free_ptr(fpst);
7785 case NEON_2RM_AESE: case NEON_2RM_AESMC:
7786 if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
7789 ptr1 = vfp_reg_ptr(true, rd);
7790 ptr2 = vfp_reg_ptr(true, rm);
7792 /* Bit 6 is the lowest opcode bit; it distinguishes between
7793 * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
7795 tmp3 = tcg_const_i32(extract32(insn, 6, 1));
7797 if (op == NEON_2RM_AESE) {
7798 gen_helper_crypto_aese(ptr1, ptr2, tmp3);
7800 gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
7802 tcg_temp_free_ptr(ptr1);
7803 tcg_temp_free_ptr(ptr2);
7804 tcg_temp_free_i32(tmp3);
7806 case NEON_2RM_SHA1H:
7807 if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
7810 ptr1 = vfp_reg_ptr(true, rd);
7811 ptr2 = vfp_reg_ptr(true, rm);
7813 gen_helper_crypto_sha1h(ptr1, ptr2);
7815 tcg_temp_free_ptr(ptr1);
7816 tcg_temp_free_ptr(ptr2);
7818 case NEON_2RM_SHA1SU1:
7819 if ((rm | rd) & 1) {
7822 /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
7824 if (!dc_isar_feature(aa32_sha2, s)) {
7827 } else if (!dc_isar_feature(aa32_sha1, s)) {
7830 ptr1 = vfp_reg_ptr(true, rd);
7831 ptr2 = vfp_reg_ptr(true, rm);
7833 gen_helper_crypto_sha256su0(ptr1, ptr2);
7835 gen_helper_crypto_sha1su1(ptr1, ptr2);
7837 tcg_temp_free_ptr(ptr1);
7838 tcg_temp_free_ptr(ptr2);
7842 tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
7845 tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
7850 for (pass = 0; pass < (q ? 4 : 2); pass++) {
7851 if (neon_2rm_is_float_op(op)) {
7852 tcg_gen_ld_f32(cpu_F0s, cpu_env,
7853 neon_reg_offset(rm, pass));
7856 tmp = neon_load_reg(rm, pass);
7859 case NEON_2RM_VREV32:
7861 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
7862 case 1: gen_swap_half(tmp); break;
7866 case NEON_2RM_VREV16:
7871 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
7872 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
7873 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
7879 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
7880 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
7881 case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
7886 gen_helper_neon_cnt_u8(tmp, tmp);
7888 case NEON_2RM_VQABS:
7891 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
7894 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
7897 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
7902 case NEON_2RM_VQNEG:
7905 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
7908 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
7911 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
7916 case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
7917 tmp2 = tcg_const_i32(0);
7919 case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
7920 case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
7921 case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
7924 tcg_temp_free_i32(tmp2);
7925 if (op == NEON_2RM_VCLE0) {
7926 tcg_gen_not_i32(tmp, tmp);
7929 case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
7930 tmp2 = tcg_const_i32(0);
7932 case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
7933 case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
7934 case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
7937 tcg_temp_free_i32(tmp2);
7938 if (op == NEON_2RM_VCLT0) {
7939 tcg_gen_not_i32(tmp, tmp);
7942 case NEON_2RM_VCEQ0:
7943 tmp2 = tcg_const_i32(0);
7945 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
7946 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
7947 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
7950 tcg_temp_free_i32(tmp2);
7954 case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
7955 case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
7956 case 2: tcg_gen_abs_i32(tmp, tmp); break;
7960 case NEON_2RM_VCGT0_F:
7962 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7963 tmp2 = tcg_const_i32(0);
7964 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
7965 tcg_temp_free_i32(tmp2);
7966 tcg_temp_free_ptr(fpstatus);
7969 case NEON_2RM_VCGE0_F:
7971 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7972 tmp2 = tcg_const_i32(0);
7973 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
7974 tcg_temp_free_i32(tmp2);
7975 tcg_temp_free_ptr(fpstatus);
7978 case NEON_2RM_VCEQ0_F:
7980 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7981 tmp2 = tcg_const_i32(0);
7982 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
7983 tcg_temp_free_i32(tmp2);
7984 tcg_temp_free_ptr(fpstatus);
7987 case NEON_2RM_VCLE0_F:
7989 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7990 tmp2 = tcg_const_i32(0);
7991 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
7992 tcg_temp_free_i32(tmp2);
7993 tcg_temp_free_ptr(fpstatus);
7996 case NEON_2RM_VCLT0_F:
7998 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7999 tmp2 = tcg_const_i32(0);
8000 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
8001 tcg_temp_free_i32(tmp2);
8002 tcg_temp_free_ptr(fpstatus);
8005 case NEON_2RM_VABS_F:
8008 case NEON_2RM_VNEG_F:
8012 tmp2 = neon_load_reg(rd, pass);
8013 neon_store_reg(rm, pass, tmp2);
8016 tmp2 = neon_load_reg(rd, pass);
8018 case 0: gen_neon_trn_u8(tmp, tmp2); break;
8019 case 1: gen_neon_trn_u16(tmp, tmp2); break;
8022 neon_store_reg(rm, pass, tmp2);
8024 case NEON_2RM_VRINTN:
8025 case NEON_2RM_VRINTA:
8026 case NEON_2RM_VRINTM:
8027 case NEON_2RM_VRINTP:
8028 case NEON_2RM_VRINTZ:
8031 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
8034 if (op == NEON_2RM_VRINTZ) {
8035 rmode = FPROUNDING_ZERO;
8037 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
8040 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8041 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
8043 gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
8044 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
8046 tcg_temp_free_ptr(fpstatus);
8047 tcg_temp_free_i32(tcg_rmode);
8050 case NEON_2RM_VRINTX:
8052 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
8053 gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
8054 tcg_temp_free_ptr(fpstatus);
8057 case NEON_2RM_VCVTAU:
8058 case NEON_2RM_VCVTAS:
8059 case NEON_2RM_VCVTNU:
8060 case NEON_2RM_VCVTNS:
8061 case NEON_2RM_VCVTPU:
8062 case NEON_2RM_VCVTPS:
8063 case NEON_2RM_VCVTMU:
8064 case NEON_2RM_VCVTMS:
8066 bool is_signed = !extract32(insn, 7, 1);
8067 TCGv_ptr fpst = get_fpstatus_ptr(1);
8068 TCGv_i32 tcg_rmode, tcg_shift;
8069 int rmode = fp_decode_rm[extract32(insn, 8, 2)];
8071 tcg_shift = tcg_const_i32(0);
8072 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8073 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
8077 gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
8080 gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
8084 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
8086 tcg_temp_free_i32(tcg_rmode);
8087 tcg_temp_free_i32(tcg_shift);
8088 tcg_temp_free_ptr(fpst);
8091 case NEON_2RM_VRECPE:
8093 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
8094 gen_helper_recpe_u32(tmp, tmp, fpstatus);
8095 tcg_temp_free_ptr(fpstatus);
8098 case NEON_2RM_VRSQRTE:
8100 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
8101 gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
8102 tcg_temp_free_ptr(fpstatus);
8105 case NEON_2RM_VRECPE_F:
8107 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
8108 gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
8109 tcg_temp_free_ptr(fpstatus);
8112 case NEON_2RM_VRSQRTE_F:
8114 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
8115 gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
8116 tcg_temp_free_ptr(fpstatus);
8119 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
8122 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
8125 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
8126 gen_vfp_tosiz(0, 1);
8128 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
8129 gen_vfp_touiz(0, 1);
8132 /* Reserved op values were caught by the
8133 * neon_2rm_sizes[] check earlier.
8137 if (neon_2rm_is_float_op(op)) {
8138 tcg_gen_st_f32(cpu_F0s, cpu_env,
8139 neon_reg_offset(rd, pass));
8141 neon_store_reg(rd, pass, tmp);
8146 } else if ((insn & (1 << 10)) == 0) {
8148 int n = ((insn >> 8) & 3) + 1;
8149 if ((rn + n) > 32) {
8150 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
8151 * helper function running off the end of the register file.
8156 if (insn & (1 << 6)) {
8157 tmp = neon_load_reg(rd, 0);
8159 tmp = tcg_temp_new_i32();
8160 tcg_gen_movi_i32(tmp, 0);
8162 tmp2 = neon_load_reg(rm, 0);
8163 ptr1 = vfp_reg_ptr(true, rn);
8164 tmp5 = tcg_const_i32(n);
8165 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
8166 tcg_temp_free_i32(tmp);
8167 if (insn & (1 << 6)) {
8168 tmp = neon_load_reg(rd, 1);
8170 tmp = tcg_temp_new_i32();
8171 tcg_gen_movi_i32(tmp, 0);
8173 tmp3 = neon_load_reg(rm, 1);
8174 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
8175 tcg_temp_free_i32(tmp5);
8176 tcg_temp_free_ptr(ptr1);
8177 neon_store_reg(rd, 0, tmp2);
8178 neon_store_reg(rd, 1, tmp3);
8179 tcg_temp_free_i32(tmp);
8180 } else if ((insn & 0x380) == 0) {
8185 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
8188 if (insn & (1 << 16)) {
8190 element = (insn >> 17) & 7;
8191 } else if (insn & (1 << 17)) {
8193 element = (insn >> 18) & 3;
8196 element = (insn >> 19) & 1;
8198 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
8199 neon_element_offset(rm, element, size),
8200 q ? 16 : 8, q ? 16 : 8);
8209 /* Advanced SIMD three registers of the same length extension.
8210 * 31 25 23 22 20 16 12 11 10 9 8 3 0
8211 * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
8212 * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
8213 * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
8215 static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
8217 gen_helper_gvec_3 *fn_gvec = NULL;
8218 gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
8219 int rd, rn, rm, opr_sz;
8223 q = extract32(insn, 6, 1);
8224 VFP_DREG_D(rd, insn);
8225 VFP_DREG_N(rn, insn);
8226 VFP_DREG_M(rm, insn);
8227 if ((rd | rn | rm) & q) {
8231 if ((insn & 0xfe200f10) == 0xfc200800) {
8232 /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
8233 int size = extract32(insn, 20, 1);
8234 data = extract32(insn, 23, 2); /* rot */
8235 if (!dc_isar_feature(aa32_vcma, s)
8236 || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
8239 fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
8240 } else if ((insn & 0xfea00f10) == 0xfc800800) {
8241 /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
8242 int size = extract32(insn, 20, 1);
8243 data = extract32(insn, 24, 1); /* rot */
8244 if (!dc_isar_feature(aa32_vcma, s)
8245 || (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
8248 fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
8249 } else if ((insn & 0xfeb00f00) == 0xfc200d00) {
8250 /* V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... */
8251 bool u = extract32(insn, 4, 1);
8252 if (!dc_isar_feature(aa32_dp, s)) {
8255 fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
8260 if (s->fp_excp_el) {
8261 gen_exception_insn(s, 4, EXCP_UDEF,
8262 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
8265 if (!s->vfp_enabled) {
8269 opr_sz = (1 + q) * 8;
8271 TCGv_ptr fpst = get_fpstatus_ptr(1);
8272 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
8273 vfp_reg_offset(1, rn),
8274 vfp_reg_offset(1, rm), fpst,
8275 opr_sz, opr_sz, data, fn_gvec_ptr);
8276 tcg_temp_free_ptr(fpst);
8278 tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
8279 vfp_reg_offset(1, rn),
8280 vfp_reg_offset(1, rm),
8281 opr_sz, opr_sz, data, fn_gvec);
8286 /* Advanced SIMD two registers and a scalar extension.
8287 * 31 24 23 22 20 16 12 11 10 9 8 3 0
8288 * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
8289 * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
8290 * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
8294 static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
8296 gen_helper_gvec_3 *fn_gvec = NULL;
8297 gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
8298 int rd, rn, rm, opr_sz, data;
8301 q = extract32(insn, 6, 1);
8302 VFP_DREG_D(rd, insn);
8303 VFP_DREG_N(rn, insn);
8304 if ((rd | rn) & q) {
8308 if ((insn & 0xff000f10) == 0xfe000800) {
8309 /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
8310 int rot = extract32(insn, 20, 2);
8311 int size = extract32(insn, 23, 1);
8314 if (!dc_isar_feature(aa32_vcma, s)) {
8318 if (!dc_isar_feature(aa32_fp16_arith, s)) {
8321 /* For fp16, rm is just Vm, and index is M. */
8322 rm = extract32(insn, 0, 4);
8323 index = extract32(insn, 5, 1);
8325 /* For fp32, rm is the usual M:Vm, and index is 0. */
8326 VFP_DREG_M(rm, insn);
8329 data = (index << 2) | rot;
8330 fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
8331 : gen_helper_gvec_fcmlah_idx);
8332 } else if ((insn & 0xffb00f00) == 0xfe200d00) {
8333 /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
8334 int u = extract32(insn, 4, 1);
8335 if (!dc_isar_feature(aa32_dp, s)) {
8338 fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
8339 /* rm is just Vm, and index is M. */
8340 data = extract32(insn, 5, 1); /* index */
8341 rm = extract32(insn, 0, 4);
8346 if (s->fp_excp_el) {
8347 gen_exception_insn(s, 4, EXCP_UDEF,
8348 syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
8351 if (!s->vfp_enabled) {
8355 opr_sz = (1 + q) * 8;
8357 TCGv_ptr fpst = get_fpstatus_ptr(1);
8358 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
8359 vfp_reg_offset(1, rn),
8360 vfp_reg_offset(1, rm), fpst,
8361 opr_sz, opr_sz, data, fn_gvec_ptr);
8362 tcg_temp_free_ptr(fpst);
8364 tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
8365 vfp_reg_offset(1, rn),
8366 vfp_reg_offset(1, rm),
8367 opr_sz, opr_sz, data, fn_gvec);
8372 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
8374 int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
8375 const ARMCPRegInfo *ri;
8377 cpnum = (insn >> 8) & 0xf;
8379 /* First check for coprocessor space used for XScale/iwMMXt insns */
8380 if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
8381 if (extract32(s->c15_cpar, cpnum, 1) == 0) {
8384 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8385 return disas_iwmmxt_insn(s, insn);
8386 } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
8387 return disas_dsp_insn(s, insn);
8392 /* Otherwise treat as a generic register access */
8393 is64 = (insn & (1 << 25)) == 0;
8394 if (!is64 && ((insn & (1 << 4)) == 0)) {
8402 opc1 = (insn >> 4) & 0xf;
8404 rt2 = (insn >> 16) & 0xf;
8406 crn = (insn >> 16) & 0xf;
8407 opc1 = (insn >> 21) & 7;
8408 opc2 = (insn >> 5) & 7;
8411 isread = (insn >> 20) & 1;
8412 rt = (insn >> 12) & 0xf;
8414 ri = get_arm_cp_reginfo(s->cp_regs,
8415 ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
8417 /* Check access permissions */
8418 if (!cp_access_ok(s->current_el, ri, isread)) {
8423 (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
8424 /* Emit code to perform further access permissions checks at
8425 * runtime; this may result in an exception.
8426 * Note that on XScale all cp0..c13 registers do an access check
8427 * call in order to handle c15_cpar.
8430 TCGv_i32 tcg_syn, tcg_isread;
8433 /* Note that since we are an implementation which takes an
8434 * exception on a trapped conditional instruction only if the
8435 * instruction passes its condition code check, we can take
8436 * advantage of the clause in the ARM ARM that allows us to set
8437 * the COND field in the instruction to 0xE in all cases.
8438 * We could fish the actual condition out of the insn (ARM)
8439 * or the condexec bits (Thumb) but it isn't necessary.
8444 syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
8447 syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
8453 syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
8456 syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
8461 /* ARMv8 defines that only coprocessors 14 and 15 exist,
8462 * so this can only happen if this is an ARMv7 or earlier CPU,
8463 * in which case the syndrome information won't actually be
8466 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
8467 syndrome = syn_uncategorized();
8471 gen_set_condexec(s);
8472 gen_set_pc_im(s, s->pc - 4);
8473 tmpptr = tcg_const_ptr(ri);
8474 tcg_syn = tcg_const_i32(syndrome);
8475 tcg_isread = tcg_const_i32(isread);
8476 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
8478 tcg_temp_free_ptr(tmpptr);
8479 tcg_temp_free_i32(tcg_syn);
8480 tcg_temp_free_i32(tcg_isread);
8483 /* Handle special cases first */
8484 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
8491 gen_set_pc_im(s, s->pc);
8492 s->base.is_jmp = DISAS_WFI;
8498 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
8507 if (ri->type & ARM_CP_CONST) {
8508 tmp64 = tcg_const_i64(ri->resetvalue);
8509 } else if (ri->readfn) {
8511 tmp64 = tcg_temp_new_i64();
8512 tmpptr = tcg_const_ptr(ri);
8513 gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
8514 tcg_temp_free_ptr(tmpptr);
8516 tmp64 = tcg_temp_new_i64();
8517 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
8519 tmp = tcg_temp_new_i32();
8520 tcg_gen_extrl_i64_i32(tmp, tmp64);
8521 store_reg(s, rt, tmp);
8522 tcg_gen_shri_i64(tmp64, tmp64, 32);
8523 tmp = tcg_temp_new_i32();
8524 tcg_gen_extrl_i64_i32(tmp, tmp64);
8525 tcg_temp_free_i64(tmp64);
8526 store_reg(s, rt2, tmp);
8529 if (ri->type & ARM_CP_CONST) {
8530 tmp = tcg_const_i32(ri->resetvalue);
8531 } else if (ri->readfn) {
8533 tmp = tcg_temp_new_i32();
8534 tmpptr = tcg_const_ptr(ri);
8535 gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
8536 tcg_temp_free_ptr(tmpptr);
8538 tmp = load_cpu_offset(ri->fieldoffset);
8541 /* Destination register of r15 for 32 bit loads sets
8542 * the condition codes from the high 4 bits of the value
8545 tcg_temp_free_i32(tmp);
8547 store_reg(s, rt, tmp);
8552 if (ri->type & ARM_CP_CONST) {
8553 /* If not forbidden by access permissions, treat as WI */
8558 TCGv_i32 tmplo, tmphi;
8559 TCGv_i64 tmp64 = tcg_temp_new_i64();
8560 tmplo = load_reg(s, rt);
8561 tmphi = load_reg(s, rt2);
8562 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
8563 tcg_temp_free_i32(tmplo);
8564 tcg_temp_free_i32(tmphi);
8566 TCGv_ptr tmpptr = tcg_const_ptr(ri);
8567 gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
8568 tcg_temp_free_ptr(tmpptr);
8570 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
8572 tcg_temp_free_i64(tmp64);
8577 tmp = load_reg(s, rt);
8578 tmpptr = tcg_const_ptr(ri);
8579 gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
8580 tcg_temp_free_ptr(tmpptr);
8581 tcg_temp_free_i32(tmp);
8583 TCGv_i32 tmp = load_reg(s, rt);
8584 store_cpu_offset(tmp, ri->fieldoffset);
8589 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
8590 /* I/O operations must end the TB here (whether read or write) */
8593 } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
8594 /* We default to ending the TB on a coprocessor register write,
8595 * but allow this to be suppressed by the register definition
8596 * (usually only necessary to work around guest bugs).
8604 /* Unknown register; this might be a guest error or a QEMU
8605 * unimplemented feature.
8608 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
8609 "64 bit system register cp:%d opc1: %d crm:%d "
8611 isread ? "read" : "write", cpnum, opc1, crm,
8612 s->ns ? "non-secure" : "secure");
8614 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
8615 "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
8617 isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
8618 s->ns ? "non-secure" : "secure");
8625 /* Store a 64-bit value to a register pair. Clobbers val. */
8626 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
8629 tmp = tcg_temp_new_i32();
8630 tcg_gen_extrl_i64_i32(tmp, val);
8631 store_reg(s, rlow, tmp);
8632 tmp = tcg_temp_new_i32();
8633 tcg_gen_shri_i64(val, val, 32);
8634 tcg_gen_extrl_i64_i32(tmp, val);
8635 store_reg(s, rhigh, tmp);
8638 /* load a 32-bit value from a register and perform a 64-bit accumulate. */
8639 static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
8644 /* Load value and extend to 64 bits. */
8645 tmp = tcg_temp_new_i64();
8646 tmp2 = load_reg(s, rlow);
8647 tcg_gen_extu_i32_i64(tmp, tmp2);
8648 tcg_temp_free_i32(tmp2);
8649 tcg_gen_add_i64(val, val, tmp);
8650 tcg_temp_free_i64(tmp);
8653 /* load and add a 64-bit value from a register pair. */
8654 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
8660 /* Load 64-bit value rd:rn. */
8661 tmpl = load_reg(s, rlow);
8662 tmph = load_reg(s, rhigh);
8663 tmp = tcg_temp_new_i64();
8664 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
8665 tcg_temp_free_i32(tmpl);
8666 tcg_temp_free_i32(tmph);
8667 tcg_gen_add_i64(val, val, tmp);
8668 tcg_temp_free_i64(tmp);
8671 /* Set N and Z flags from hi|lo. */
8672 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
8674 tcg_gen_mov_i32(cpu_NF, hi);
8675 tcg_gen_or_i32(cpu_ZF, lo, hi);
8678 /* Load/Store exclusive instructions are implemented by remembering
8679 the value/address loaded, and seeing if these are the same
8680 when the store is performed. This should be sufficient to implement
8681 the architecturally mandated semantics, and avoids having to monitor
8682 regular stores. The compare vs the remembered value is done during
8683 the cmpxchg operation, but we must compare the addresses manually. */
8684 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
8685 TCGv_i32 addr, int size)
8687 TCGv_i32 tmp = tcg_temp_new_i32();
8688 TCGMemOp opc = size | MO_ALIGN | s->be_data;
8693 TCGv_i32 tmp2 = tcg_temp_new_i32();
8694 TCGv_i64 t64 = tcg_temp_new_i64();
8696 /* For AArch32, architecturally the 32-bit word at the lowest
8697 * address is always Rt and the one at addr+4 is Rt2, even if
8698 * the CPU is big-endian. That means we don't want to do a
8699 * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
8700 * for an architecturally 64-bit access, but instead do a
8701 * 64-bit access using MO_BE if appropriate and then split
8703 * This only makes a difference for BE32 user-mode, where
8704 * frob64() must not flip the two halves of the 64-bit data
8705 * but this code must treat BE32 user-mode like BE32 system.
8707 TCGv taddr = gen_aa32_addr(s, addr, opc);
8709 tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
8710 tcg_temp_free(taddr);
8711 tcg_gen_mov_i64(cpu_exclusive_val, t64);
8712 if (s->be_data == MO_BE) {
8713 tcg_gen_extr_i64_i32(tmp2, tmp, t64);
8715 tcg_gen_extr_i64_i32(tmp, tmp2, t64);
8717 tcg_temp_free_i64(t64);
8719 store_reg(s, rt2, tmp2);
8721 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
8722 tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
8725 store_reg(s, rt, tmp);
8726 tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
8729 static void gen_clrex(DisasContext *s)
8731 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
8734 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
8735 TCGv_i32 addr, int size)
8737 TCGv_i32 t0, t1, t2;
8740 TCGLabel *done_label;
8741 TCGLabel *fail_label;
8742 TCGMemOp opc = size | MO_ALIGN | s->be_data;
8744 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
8750 fail_label = gen_new_label();
8751 done_label = gen_new_label();
8752 extaddr = tcg_temp_new_i64();
8753 tcg_gen_extu_i32_i64(extaddr, addr);
8754 tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
8755 tcg_temp_free_i64(extaddr);
8757 taddr = gen_aa32_addr(s, addr, opc);
8758 t0 = tcg_temp_new_i32();
8759 t1 = load_reg(s, rt);
8761 TCGv_i64 o64 = tcg_temp_new_i64();
8762 TCGv_i64 n64 = tcg_temp_new_i64();
8764 t2 = load_reg(s, rt2);
8765 /* For AArch32, architecturally the 32-bit word at the lowest
8766 * address is always Rt and the one at addr+4 is Rt2, even if
8767 * the CPU is big-endian. Since we're going to treat this as a
8768 * single 64-bit BE store, we need to put the two halves in the
8769 * opposite order for BE to LE, so that they end up in the right
8771 * We don't want gen_aa32_frob64() because that does the wrong
8772 * thing for BE32 usermode.
8774 if (s->be_data == MO_BE) {
8775 tcg_gen_concat_i32_i64(n64, t2, t1);
8777 tcg_gen_concat_i32_i64(n64, t1, t2);
8779 tcg_temp_free_i32(t2);
8781 tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
8782 get_mem_index(s), opc);
8783 tcg_temp_free_i64(n64);
8785 tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
8786 tcg_gen_extrl_i64_i32(t0, o64);
8788 tcg_temp_free_i64(o64);
8790 t2 = tcg_temp_new_i32();
8791 tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
8792 tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
8793 tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
8794 tcg_temp_free_i32(t2);
8796 tcg_temp_free_i32(t1);
8797 tcg_temp_free(taddr);
8798 tcg_gen_mov_i32(cpu_R[rd], t0);
8799 tcg_temp_free_i32(t0);
8800 tcg_gen_br(done_label);
8802 gen_set_label(fail_label);
8803 tcg_gen_movi_i32(cpu_R[rd], 1);
8804 gen_set_label(done_label);
8805 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
8811 * @mode: mode field from insn (which stack to store to)
8812 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
8813 * @writeback: true if writeback bit set
8815 * Generate code for the SRS (Store Return State) insn.
8817 static void gen_srs(DisasContext *s,
8818 uint32_t mode, uint32_t amode, bool writeback)
8825 * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
8826 * and specified mode is monitor mode
8827 * - UNDEFINED in Hyp mode
8828 * - UNPREDICTABLE in User or System mode
8829 * - UNPREDICTABLE if the specified mode is:
8830 * -- not implemented
8831 * -- not a valid mode number
8832 * -- a mode that's at a higher exception level
8833 * -- Monitor, if we are Non-secure
8834 * For the UNPREDICTABLE cases we choose to UNDEF.
8836 if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
8837 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), 3);
8841 if (s->current_el == 0 || s->current_el == 2) {
8846 case ARM_CPU_MODE_USR:
8847 case ARM_CPU_MODE_FIQ:
8848 case ARM_CPU_MODE_IRQ:
8849 case ARM_CPU_MODE_SVC:
8850 case ARM_CPU_MODE_ABT:
8851 case ARM_CPU_MODE_UND:
8852 case ARM_CPU_MODE_SYS:
8854 case ARM_CPU_MODE_HYP:
8855 if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
8859 case ARM_CPU_MODE_MON:
8860 /* No need to check specifically for "are we non-secure" because
8861 * we've already made EL0 UNDEF and handled the trap for S-EL1;
8862 * so if this isn't EL3 then we must be non-secure.
8864 if (s->current_el != 3) {
8873 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
8874 default_exception_el(s));
8878 addr = tcg_temp_new_i32();
8879 tmp = tcg_const_i32(mode);
8880 /* get_r13_banked() will raise an exception if called from System mode */
8881 gen_set_condexec(s);
8882 gen_set_pc_im(s, s->pc - 4);
8883 gen_helper_get_r13_banked(addr, cpu_env, tmp);
8884 tcg_temp_free_i32(tmp);
8901 tcg_gen_addi_i32(addr, addr, offset);
8902 tmp = load_reg(s, 14);
8903 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
8904 tcg_temp_free_i32(tmp);
8905 tmp = load_cpu_field(spsr);
8906 tcg_gen_addi_i32(addr, addr, 4);
8907 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
8908 tcg_temp_free_i32(tmp);
8926 tcg_gen_addi_i32(addr, addr, offset);
8927 tmp = tcg_const_i32(mode);
8928 gen_helper_set_r13_banked(cpu_env, tmp, addr);
8929 tcg_temp_free_i32(tmp);
8931 tcg_temp_free_i32(addr);
8932 s->base.is_jmp = DISAS_UPDATE;
8935 /* Generate a label used for skipping this instruction */
8936 static void arm_gen_condlabel(DisasContext *s)
8939 s->condlabel = gen_new_label();
8944 /* Skip this instruction if the ARM condition is false */
8945 static void arm_skip_unless(DisasContext *s, uint32_t cond)
8947 arm_gen_condlabel(s);
8948 arm_gen_test_cc(cond ^ 1, s->condlabel);
8951 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8953 unsigned int cond, val, op1, i, shift, rm, rs, rn, rd, sh;
8960 /* M variants do not implement ARM mode; this must raise the INVSTATE
8961 * UsageFault exception.
8963 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8964 gen_exception_insn(s, 4, EXCP_INVSTATE, syn_uncategorized(),
8965 default_exception_el(s));
8970 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8971 * choose to UNDEF. In ARMv5 and above the space is used
8972 * for miscellaneous unconditional instructions.
8976 /* Unconditional instructions. */
8977 if (((insn >> 25) & 7) == 1) {
8978 /* NEON Data processing. */
8979 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
8983 if (disas_neon_data_insn(s, insn)) {
8988 if ((insn & 0x0f100000) == 0x04000000) {
8989 /* NEON load/store. */
8990 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
8994 if (disas_neon_ls_insn(s, insn)) {
8999 if ((insn & 0x0f000e10) == 0x0e000a00) {
9001 if (disas_vfp_insn(s, insn)) {
9006 if (((insn & 0x0f30f000) == 0x0510f000) ||
9007 ((insn & 0x0f30f010) == 0x0710f000)) {
9008 if ((insn & (1 << 22)) == 0) {
9010 if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
9014 /* Otherwise PLD; v5TE+ */
9018 if (((insn & 0x0f70f000) == 0x0450f000) ||
9019 ((insn & 0x0f70f010) == 0x0650f000)) {
9021 return; /* PLI; V7 */
9023 if (((insn & 0x0f700000) == 0x04100000) ||
9024 ((insn & 0x0f700010) == 0x06100000)) {
9025 if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
9028 return; /* v7MP: Unallocated memory hint: must NOP */
9031 if ((insn & 0x0ffffdff) == 0x01010000) {
9034 if (((insn >> 9) & 1) != !!(s->be_data == MO_BE)) {
9035 gen_helper_setend(cpu_env);
9036 s->base.is_jmp = DISAS_UPDATE;
9039 } else if ((insn & 0x0fffff00) == 0x057ff000) {
9040 switch ((insn >> 4) & 0xf) {
9048 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
9051 /* We need to break the TB after this insn to execute
9052 * self-modifying code correctly and also to take
9053 * any pending interrupts immediately.
9055 gen_goto_tb(s, 0, s->pc & ~1);
9060 } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
9063 gen_srs(s, (insn & 0x1f), (insn >> 23) & 3, insn & (1 << 21));
9065 } else if ((insn & 0x0e50ffe0) == 0x08100a00) {
9071 rn = (insn >> 16) & 0xf;
9072 addr = load_reg(s, rn);
9073 i = (insn >> 23) & 3;
9075 case 0: offset = -4; break; /* DA */
9076 case 1: offset = 0; break; /* IA */
9077 case 2: offset = -8; break; /* DB */
9078 case 3: offset = 4; break; /* IB */
9082 tcg_gen_addi_i32(addr, addr, offset);
9083 /* Load PC into tmp and CPSR into tmp2. */
9084 tmp = tcg_temp_new_i32();
9085 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
9086 tcg_gen_addi_i32(addr, addr, 4);
9087 tmp2 = tcg_temp_new_i32();
9088 gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
9089 if (insn & (1 << 21)) {
9090 /* Base writeback. */
9092 case 0: offset = -8; break;
9093 case 1: offset = 4; break;
9094 case 2: offset = -4; break;
9095 case 3: offset = 0; break;
9099 tcg_gen_addi_i32(addr, addr, offset);
9100 store_reg(s, rn, addr);
9102 tcg_temp_free_i32(addr);
9104 gen_rfe(s, tmp, tmp2);
9106 } else if ((insn & 0x0e000000) == 0x0a000000) {
9107 /* branch link and change to thumb (blx <offset>) */
9110 val = (uint32_t)s->pc;
9111 tmp = tcg_temp_new_i32();
9112 tcg_gen_movi_i32(tmp, val);
9113 store_reg(s, 14, tmp);
9114 /* Sign-extend the 24-bit offset */
9115 offset = (((int32_t)insn) << 8) >> 8;
9116 /* offset * 4 + bit24 * 2 + (thumb bit) */
9117 val += (offset << 2) | ((insn >> 23) & 2) | 1;
9118 /* pipeline offset */
9120 /* protected by ARCH(5); above, near the start of uncond block */
9123 } else if ((insn & 0x0e000f00) == 0x0c000100) {
9124 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
9125 /* iWMMXt register transfer. */
9126 if (extract32(s->c15_cpar, 1, 1)) {
9127 if (!disas_iwmmxt_insn(s, insn)) {
9132 } else if ((insn & 0x0e000a00) == 0x0c000800
9133 && arm_dc_feature(s, ARM_FEATURE_V8)) {
9134 if (disas_neon_insn_3same_ext(s, insn)) {
9138 } else if ((insn & 0x0f000a00) == 0x0e000800
9139 && arm_dc_feature(s, ARM_FEATURE_V8)) {
9140 if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
9144 } else if ((insn & 0x0fe00000) == 0x0c400000) {
9145 /* Coprocessor double register transfer. */
9147 } else if ((insn & 0x0f000010) == 0x0e000010) {
9148 /* Additional coprocessor register transfer. */
9149 } else if ((insn & 0x0ff10020) == 0x01000000) {
9152 /* cps (privileged) */
9156 if (insn & (1 << 19)) {
9157 if (insn & (1 << 8))
9159 if (insn & (1 << 7))
9161 if (insn & (1 << 6))
9163 if (insn & (1 << 18))
9166 if (insn & (1 << 17)) {
9168 val |= (insn & 0x1f);
9171 gen_set_psr_im(s, mask, 0, val);
9178 /* if not always execute, we generate a conditional jump to
9180 arm_skip_unless(s, cond);
9182 if ((insn & 0x0f900000) == 0x03000000) {
9183 if ((insn & (1 << 21)) == 0) {
9185 rd = (insn >> 12) & 0xf;
9186 val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
9187 if ((insn & (1 << 22)) == 0) {
9189 tmp = tcg_temp_new_i32();
9190 tcg_gen_movi_i32(tmp, val);
9193 tmp = load_reg(s, rd);
9194 tcg_gen_ext16u_i32(tmp, tmp);
9195 tcg_gen_ori_i32(tmp, tmp, val << 16);
9197 store_reg(s, rd, tmp);
9199 if (((insn >> 12) & 0xf) != 0xf)
9201 if (((insn >> 16) & 0xf) == 0) {
9202 gen_nop_hint(s, insn & 0xff);
9204 /* CPSR = immediate */
9206 shift = ((insn >> 8) & 0xf) * 2;
9208 val = (val >> shift) | (val << (32 - shift));
9209 i = ((insn & (1 << 22)) != 0);
9210 if (gen_set_psr_im(s, msr_mask(s, (insn >> 16) & 0xf, i),
9216 } else if ((insn & 0x0f900000) == 0x01000000
9217 && (insn & 0x00000090) != 0x00000090) {
9218 /* miscellaneous instructions */
9219 op1 = (insn >> 21) & 3;
9220 sh = (insn >> 4) & 0xf;
9223 case 0x0: /* MSR, MRS */
9224 if (insn & (1 << 9)) {
9225 /* MSR (banked) and MRS (banked) */
9226 int sysm = extract32(insn, 16, 4) |
9227 (extract32(insn, 8, 1) << 4);
9228 int r = extract32(insn, 22, 1);
9232 gen_msr_banked(s, r, sysm, rm);
9235 int rd = extract32(insn, 12, 4);
9237 gen_mrs_banked(s, r, sysm, rd);
9242 /* MSR, MRS (for PSRs) */
9245 tmp = load_reg(s, rm);
9246 i = ((op1 & 2) != 0);
9247 if (gen_set_psr(s, msr_mask(s, (insn >> 16) & 0xf, i), i, tmp))
9251 rd = (insn >> 12) & 0xf;
9255 tmp = load_cpu_field(spsr);
9257 tmp = tcg_temp_new_i32();
9258 gen_helper_cpsr_read(tmp, cpu_env);
9260 store_reg(s, rd, tmp);
9265 /* branch/exchange thumb (bx). */
9267 tmp = load_reg(s, rm);
9269 } else if (op1 == 3) {
9272 rd = (insn >> 12) & 0xf;
9273 tmp = load_reg(s, rm);
9274 tcg_gen_clzi_i32(tmp, tmp, 32);
9275 store_reg(s, rd, tmp);
9283 /* Trivial implementation equivalent to bx. */
9284 tmp = load_reg(s, rm);
9295 /* branch link/exchange thumb (blx) */
9296 tmp = load_reg(s, rm);
9297 tmp2 = tcg_temp_new_i32();
9298 tcg_gen_movi_i32(tmp2, s->pc);
9299 store_reg(s, 14, tmp2);
9305 uint32_t c = extract32(insn, 8, 4);
9307 /* Check this CPU supports ARMv8 CRC instructions.
9308 * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
9309 * Bits 8, 10 and 11 should be zero.
9311 if (!dc_isar_feature(aa32_crc32, s) || op1 == 0x3 || (c & 0xd) != 0) {
9315 rn = extract32(insn, 16, 4);
9316 rd = extract32(insn, 12, 4);
9318 tmp = load_reg(s, rn);
9319 tmp2 = load_reg(s, rm);
9321 tcg_gen_andi_i32(tmp2, tmp2, 0xff);
9322 } else if (op1 == 1) {
9323 tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
9325 tmp3 = tcg_const_i32(1 << op1);
9327 gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
9329 gen_helper_crc32(tmp, tmp, tmp2, tmp3);
9331 tcg_temp_free_i32(tmp2);
9332 tcg_temp_free_i32(tmp3);
9333 store_reg(s, rd, tmp);
9336 case 0x5: /* saturating add/subtract */
9338 rd = (insn >> 12) & 0xf;
9339 rn = (insn >> 16) & 0xf;
9340 tmp = load_reg(s, rm);
9341 tmp2 = load_reg(s, rn);
9343 gen_helper_double_saturate(tmp2, cpu_env, tmp2);
9345 gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
9347 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
9348 tcg_temp_free_i32(tmp2);
9349 store_reg(s, rd, tmp);
9351 case 0x6: /* ERET */
9355 if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
9358 if ((insn & 0x000fff0f) != 0x0000000e) {
9359 /* UNPREDICTABLE; we choose to UNDEF */
9363 if (s->current_el == 2) {
9364 tmp = load_cpu_field(elr_el[2]);
9366 tmp = load_reg(s, 14);
9368 gen_exception_return(s, tmp);
9372 int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
9381 gen_exception_bkpt_insn(s, 4, syn_aa32_bkpt(imm16, false));
9384 /* Hypervisor call (v7) */
9392 /* Secure monitor call (v6+) */
9400 g_assert_not_reached();
9404 case 0x8: /* signed multiply */
9409 rs = (insn >> 8) & 0xf;
9410 rn = (insn >> 12) & 0xf;
9411 rd = (insn >> 16) & 0xf;
9413 /* (32 * 16) >> 16 */
9414 tmp = load_reg(s, rm);
9415 tmp2 = load_reg(s, rs);
9417 tcg_gen_sari_i32(tmp2, tmp2, 16);
9420 tmp64 = gen_muls_i64_i32(tmp, tmp2);
9421 tcg_gen_shri_i64(tmp64, tmp64, 16);
9422 tmp = tcg_temp_new_i32();
9423 tcg_gen_extrl_i64_i32(tmp, tmp64);
9424 tcg_temp_free_i64(tmp64);
9425 if ((sh & 2) == 0) {
9426 tmp2 = load_reg(s, rn);
9427 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9428 tcg_temp_free_i32(tmp2);
9430 store_reg(s, rd, tmp);
9433 tmp = load_reg(s, rm);
9434 tmp2 = load_reg(s, rs);
9435 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
9436 tcg_temp_free_i32(tmp2);
9438 tmp64 = tcg_temp_new_i64();
9439 tcg_gen_ext_i32_i64(tmp64, tmp);
9440 tcg_temp_free_i32(tmp);
9441 gen_addq(s, tmp64, rn, rd);
9442 gen_storeq_reg(s, rn, rd, tmp64);
9443 tcg_temp_free_i64(tmp64);
9446 tmp2 = load_reg(s, rn);
9447 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9448 tcg_temp_free_i32(tmp2);
9450 store_reg(s, rd, tmp);
9457 } else if (((insn & 0x0e000000) == 0 &&
9458 (insn & 0x00000090) != 0x90) ||
9459 ((insn & 0x0e000000) == (1 << 25))) {
9460 int set_cc, logic_cc, shiftop;
9462 op1 = (insn >> 21) & 0xf;
9463 set_cc = (insn >> 20) & 1;
9464 logic_cc = table_logic_cc[op1] & set_cc;
9466 /* data processing instruction */
9467 if (insn & (1 << 25)) {
9468 /* immediate operand */
9470 shift = ((insn >> 8) & 0xf) * 2;
9472 val = (val >> shift) | (val << (32 - shift));
9474 tmp2 = tcg_temp_new_i32();
9475 tcg_gen_movi_i32(tmp2, val);
9476 if (logic_cc && shift) {
9477 gen_set_CF_bit31(tmp2);
9482 tmp2 = load_reg(s, rm);
9483 shiftop = (insn >> 5) & 3;
9484 if (!(insn & (1 << 4))) {
9485 shift = (insn >> 7) & 0x1f;
9486 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
9488 rs = (insn >> 8) & 0xf;
9489 tmp = load_reg(s, rs);
9490 gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
9493 if (op1 != 0x0f && op1 != 0x0d) {
9494 rn = (insn >> 16) & 0xf;
9495 tmp = load_reg(s, rn);
9499 rd = (insn >> 12) & 0xf;
9502 tcg_gen_and_i32(tmp, tmp, tmp2);
9506 store_reg_bx(s, rd, tmp);
9509 tcg_gen_xor_i32(tmp, tmp, tmp2);
9513 store_reg_bx(s, rd, tmp);
9516 if (set_cc && rd == 15) {
9517 /* SUBS r15, ... is used for exception return. */
9521 gen_sub_CC(tmp, tmp, tmp2);
9522 gen_exception_return(s, tmp);
9525 gen_sub_CC(tmp, tmp, tmp2);
9527 tcg_gen_sub_i32(tmp, tmp, tmp2);
9529 store_reg_bx(s, rd, tmp);
9534 gen_sub_CC(tmp, tmp2, tmp);
9536 tcg_gen_sub_i32(tmp, tmp2, tmp);
9538 store_reg_bx(s, rd, tmp);
9542 gen_add_CC(tmp, tmp, tmp2);
9544 tcg_gen_add_i32(tmp, tmp, tmp2);
9546 store_reg_bx(s, rd, tmp);
9550 gen_adc_CC(tmp, tmp, tmp2);
9552 gen_add_carry(tmp, tmp, tmp2);
9554 store_reg_bx(s, rd, tmp);
9558 gen_sbc_CC(tmp, tmp, tmp2);
9560 gen_sub_carry(tmp, tmp, tmp2);
9562 store_reg_bx(s, rd, tmp);
9566 gen_sbc_CC(tmp, tmp2, tmp);
9568 gen_sub_carry(tmp, tmp2, tmp);
9570 store_reg_bx(s, rd, tmp);
9574 tcg_gen_and_i32(tmp, tmp, tmp2);
9577 tcg_temp_free_i32(tmp);
9581 tcg_gen_xor_i32(tmp, tmp, tmp2);
9584 tcg_temp_free_i32(tmp);
9588 gen_sub_CC(tmp, tmp, tmp2);
9590 tcg_temp_free_i32(tmp);
9594 gen_add_CC(tmp, tmp, tmp2);
9596 tcg_temp_free_i32(tmp);
9599 tcg_gen_or_i32(tmp, tmp, tmp2);
9603 store_reg_bx(s, rd, tmp);
9606 if (logic_cc && rd == 15) {
9607 /* MOVS r15, ... is used for exception return. */
9611 gen_exception_return(s, tmp2);
9616 store_reg_bx(s, rd, tmp2);
9620 tcg_gen_andc_i32(tmp, tmp, tmp2);
9624 store_reg_bx(s, rd, tmp);
9628 tcg_gen_not_i32(tmp2, tmp2);
9632 store_reg_bx(s, rd, tmp2);
9635 if (op1 != 0x0f && op1 != 0x0d) {
9636 tcg_temp_free_i32(tmp2);
9639 /* other instructions */
9640 op1 = (insn >> 24) & 0xf;
9644 /* multiplies, extra load/stores */
9645 sh = (insn >> 5) & 3;
9648 rd = (insn >> 16) & 0xf;
9649 rn = (insn >> 12) & 0xf;
9650 rs = (insn >> 8) & 0xf;
9652 op1 = (insn >> 20) & 0xf;
9654 case 0: case 1: case 2: case 3: case 6:
9656 tmp = load_reg(s, rs);
9657 tmp2 = load_reg(s, rm);
9658 tcg_gen_mul_i32(tmp, tmp, tmp2);
9659 tcg_temp_free_i32(tmp2);
9660 if (insn & (1 << 22)) {
9661 /* Subtract (mls) */
9663 tmp2 = load_reg(s, rn);
9664 tcg_gen_sub_i32(tmp, tmp2, tmp);
9665 tcg_temp_free_i32(tmp2);
9666 } else if (insn & (1 << 21)) {
9668 tmp2 = load_reg(s, rn);
9669 tcg_gen_add_i32(tmp, tmp, tmp2);
9670 tcg_temp_free_i32(tmp2);
9672 if (insn & (1 << 20))
9674 store_reg(s, rd, tmp);
9677 /* 64 bit mul double accumulate (UMAAL) */
9679 tmp = load_reg(s, rs);
9680 tmp2 = load_reg(s, rm);
9681 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
9682 gen_addq_lo(s, tmp64, rn);
9683 gen_addq_lo(s, tmp64, rd);
9684 gen_storeq_reg(s, rn, rd, tmp64);
9685 tcg_temp_free_i64(tmp64);
9687 case 8: case 9: case 10: case 11:
9688 case 12: case 13: case 14: case 15:
9689 /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */
9690 tmp = load_reg(s, rs);
9691 tmp2 = load_reg(s, rm);
9692 if (insn & (1 << 22)) {
9693 tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2);
9695 tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2);
9697 if (insn & (1 << 21)) { /* mult accumulate */
9698 TCGv_i32 al = load_reg(s, rn);
9699 TCGv_i32 ah = load_reg(s, rd);
9700 tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, al, ah);
9701 tcg_temp_free_i32(al);
9702 tcg_temp_free_i32(ah);
9704 if (insn & (1 << 20)) {
9705 gen_logicq_cc(tmp, tmp2);
9707 store_reg(s, rn, tmp);
9708 store_reg(s, rd, tmp2);
9714 rn = (insn >> 16) & 0xf;
9715 rd = (insn >> 12) & 0xf;
9716 if (insn & (1 << 23)) {
9717 /* load/store exclusive */
9718 int op2 = (insn >> 8) & 3;
9719 op1 = (insn >> 21) & 0x3;
9722 case 0: /* lda/stl */
9728 case 1: /* reserved */
9730 case 2: /* ldaex/stlex */
9733 case 3: /* ldrex/strex */
9742 addr = tcg_temp_local_new_i32();
9743 load_reg_var(s, addr, rn);
9745 /* Since the emulation does not have barriers,
9746 the acquire/release semantics need no special
9749 if (insn & (1 << 20)) {
9750 tmp = tcg_temp_new_i32();
9753 gen_aa32_ld32u_iss(s, tmp, addr,
9758 gen_aa32_ld8u_iss(s, tmp, addr,
9763 gen_aa32_ld16u_iss(s, tmp, addr,
9770 store_reg(s, rd, tmp);
9773 tmp = load_reg(s, rm);
9776 gen_aa32_st32_iss(s, tmp, addr,
9781 gen_aa32_st8_iss(s, tmp, addr,
9786 gen_aa32_st16_iss(s, tmp, addr,
9793 tcg_temp_free_i32(tmp);
9795 } else if (insn & (1 << 20)) {
9798 gen_load_exclusive(s, rd, 15, addr, 2);
9800 case 1: /* ldrexd */
9801 gen_load_exclusive(s, rd, rd + 1, addr, 3);
9803 case 2: /* ldrexb */
9804 gen_load_exclusive(s, rd, 15, addr, 0);
9806 case 3: /* ldrexh */
9807 gen_load_exclusive(s, rd, 15, addr, 1);
9816 gen_store_exclusive(s, rd, rm, 15, addr, 2);
9818 case 1: /* strexd */
9819 gen_store_exclusive(s, rd, rm, rm + 1, addr, 3);
9821 case 2: /* strexb */
9822 gen_store_exclusive(s, rd, rm, 15, addr, 0);
9824 case 3: /* strexh */
9825 gen_store_exclusive(s, rd, rm, 15, addr, 1);
9831 tcg_temp_free_i32(addr);
9832 } else if ((insn & 0x00300f00) == 0) {
9833 /* 0bcccc_0001_0x00_xxxx_xxxx_0000_1001_xxxx
9838 TCGMemOp opc = s->be_data;
9842 if (insn & (1 << 22)) {
9845 opc |= MO_UL | MO_ALIGN;
9848 addr = load_reg(s, rn);
9849 taddr = gen_aa32_addr(s, addr, opc);
9850 tcg_temp_free_i32(addr);
9852 tmp = load_reg(s, rm);
9853 tcg_gen_atomic_xchg_i32(tmp, taddr, tmp,
9854 get_mem_index(s), opc);
9855 tcg_temp_free(taddr);
9856 store_reg(s, rd, tmp);
9863 bool load = insn & (1 << 20);
9864 bool wbit = insn & (1 << 21);
9865 bool pbit = insn & (1 << 24);
9866 bool doubleword = false;
9869 /* Misc load/store */
9870 rn = (insn >> 16) & 0xf;
9871 rd = (insn >> 12) & 0xf;
9873 /* ISS not valid if writeback */
9874 issinfo = (pbit & !wbit) ? rd : ISSInvalid;
9876 if (!load && (sh & 2)) {
9880 /* UNPREDICTABLE; we choose to UNDEF */
9883 load = (sh & 1) == 0;
9887 addr = load_reg(s, rn);
9889 gen_add_datah_offset(s, insn, 0, addr);
9896 tmp = load_reg(s, rd);
9897 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
9898 tcg_temp_free_i32(tmp);
9899 tcg_gen_addi_i32(addr, addr, 4);
9900 tmp = load_reg(s, rd + 1);
9901 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
9902 tcg_temp_free_i32(tmp);
9905 tmp = tcg_temp_new_i32();
9906 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
9907 store_reg(s, rd, tmp);
9908 tcg_gen_addi_i32(addr, addr, 4);
9909 tmp = tcg_temp_new_i32();
9910 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
9913 address_offset = -4;
9916 tmp = tcg_temp_new_i32();
9919 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s),
9923 gen_aa32_ld8s_iss(s, tmp, addr, get_mem_index(s),
9928 gen_aa32_ld16s_iss(s, tmp, addr, get_mem_index(s),
9934 tmp = load_reg(s, rd);
9935 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), issinfo);
9936 tcg_temp_free_i32(tmp);
9938 /* Perform base writeback before the loaded value to
9939 ensure correct behavior with overlapping index registers.
9940 ldrd with base writeback is undefined if the
9941 destination and index registers overlap. */
9943 gen_add_datah_offset(s, insn, address_offset, addr);
9944 store_reg(s, rn, addr);
9947 tcg_gen_addi_i32(addr, addr, address_offset);
9948 store_reg(s, rn, addr);
9950 tcg_temp_free_i32(addr);
9953 /* Complete the load. */
9954 store_reg(s, rd, tmp);
9963 if (insn & (1 << 4)) {
9965 /* Armv6 Media instructions. */
9967 rn = (insn >> 16) & 0xf;
9968 rd = (insn >> 12) & 0xf;
9969 rs = (insn >> 8) & 0xf;
9970 switch ((insn >> 23) & 3) {
9971 case 0: /* Parallel add/subtract. */
9972 op1 = (insn >> 20) & 7;
9973 tmp = load_reg(s, rn);
9974 tmp2 = load_reg(s, rm);
9975 sh = (insn >> 5) & 7;
9976 if ((op1 & 3) == 0 || sh == 5 || sh == 6)
9978 gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
9979 tcg_temp_free_i32(tmp2);
9980 store_reg(s, rd, tmp);
9983 if ((insn & 0x00700020) == 0) {
9984 /* Halfword pack. */
9985 tmp = load_reg(s, rn);
9986 tmp2 = load_reg(s, rm);
9987 shift = (insn >> 7) & 0x1f;
9988 if (insn & (1 << 6)) {
9992 tcg_gen_sari_i32(tmp2, tmp2, shift);
9993 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
9994 tcg_gen_ext16u_i32(tmp2, tmp2);
9998 tcg_gen_shli_i32(tmp2, tmp2, shift);
9999 tcg_gen_ext16u_i32(tmp, tmp);
10000 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
10002 tcg_gen_or_i32(tmp, tmp, tmp2);
10003 tcg_temp_free_i32(tmp2);
10004 store_reg(s, rd, tmp);
10005 } else if ((insn & 0x00200020) == 0x00200000) {
10007 tmp = load_reg(s, rm);
10008 shift = (insn >> 7) & 0x1f;
10009 if (insn & (1 << 6)) {
10012 tcg_gen_sari_i32(tmp, tmp, shift);
10014 tcg_gen_shli_i32(tmp, tmp, shift);
10016 sh = (insn >> 16) & 0x1f;
10017 tmp2 = tcg_const_i32(sh);
10018 if (insn & (1 << 22))
10019 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
10021 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
10022 tcg_temp_free_i32(tmp2);
10023 store_reg(s, rd, tmp);
10024 } else if ((insn & 0x00300fe0) == 0x00200f20) {
10026 tmp = load_reg(s, rm);
10027 sh = (insn >> 16) & 0x1f;
10028 tmp2 = tcg_const_i32(sh);
10029 if (insn & (1 << 22))
10030 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
10032 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
10033 tcg_temp_free_i32(tmp2);
10034 store_reg(s, rd, tmp);
10035 } else if ((insn & 0x00700fe0) == 0x00000fa0) {
10036 /* Select bytes. */
10037 tmp = load_reg(s, rn);
10038 tmp2 = load_reg(s, rm);
10039 tmp3 = tcg_temp_new_i32();
10040 tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
10041 gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
10042 tcg_temp_free_i32(tmp3);
10043 tcg_temp_free_i32(tmp2);
10044 store_reg(s, rd, tmp);
10045 } else if ((insn & 0x000003e0) == 0x00000060) {
10046 tmp = load_reg(s, rm);
10047 shift = (insn >> 10) & 3;
10048 /* ??? In many cases it's not necessary to do a
10049 rotate, a shift is sufficient. */
10051 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
10052 op1 = (insn >> 20) & 7;
10054 case 0: gen_sxtb16(tmp); break;
10055 case 2: gen_sxtb(tmp); break;
10056 case 3: gen_sxth(tmp); break;
10057 case 4: gen_uxtb16(tmp); break;
10058 case 6: gen_uxtb(tmp); break;
10059 case 7: gen_uxth(tmp); break;
10060 default: goto illegal_op;
10063 tmp2 = load_reg(s, rn);
10064 if ((op1 & 3) == 0) {
10065 gen_add16(tmp, tmp2);
10067 tcg_gen_add_i32(tmp, tmp, tmp2);
10068 tcg_temp_free_i32(tmp2);
10071 store_reg(s, rd, tmp);
10072 } else if ((insn & 0x003f0f60) == 0x003f0f20) {
10074 tmp = load_reg(s, rm);
10075 if (insn & (1 << 22)) {
10076 if (insn & (1 << 7)) {
10080 gen_helper_rbit(tmp, tmp);
10083 if (insn & (1 << 7))
10086 tcg_gen_bswap32_i32(tmp, tmp);
10088 store_reg(s, rd, tmp);
10093 case 2: /* Multiplies (Type 3). */
10094 switch ((insn >> 20) & 0x7) {
10096 if (((insn >> 6) ^ (insn >> 7)) & 1) {
10097 /* op2 not 00x or 11x : UNDEF */
10100 /* Signed multiply most significant [accumulate].
10101 (SMMUL, SMMLA, SMMLS) */
10102 tmp = load_reg(s, rm);
10103 tmp2 = load_reg(s, rs);
10104 tmp64 = gen_muls_i64_i32(tmp, tmp2);
10107 tmp = load_reg(s, rd);
10108 if (insn & (1 << 6)) {
10109 tmp64 = gen_subq_msw(tmp64, tmp);
10111 tmp64 = gen_addq_msw(tmp64, tmp);
10114 if (insn & (1 << 5)) {
10115 tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
10117 tcg_gen_shri_i64(tmp64, tmp64, 32);
10118 tmp = tcg_temp_new_i32();
10119 tcg_gen_extrl_i64_i32(tmp, tmp64);
10120 tcg_temp_free_i64(tmp64);
10121 store_reg(s, rn, tmp);
10125 /* SMLAD, SMUAD, SMLSD, SMUSD, SMLALD, SMLSLD */
10126 if (insn & (1 << 7)) {
10129 tmp = load_reg(s, rm);
10130 tmp2 = load_reg(s, rs);
10131 if (insn & (1 << 5))
10132 gen_swap_half(tmp2);
10133 gen_smul_dual(tmp, tmp2);
10134 if (insn & (1 << 22)) {
10135 /* smlald, smlsld */
10138 tmp64 = tcg_temp_new_i64();
10139 tmp64_2 = tcg_temp_new_i64();
10140 tcg_gen_ext_i32_i64(tmp64, tmp);
10141 tcg_gen_ext_i32_i64(tmp64_2, tmp2);
10142 tcg_temp_free_i32(tmp);
10143 tcg_temp_free_i32(tmp2);
10144 if (insn & (1 << 6)) {
10145 tcg_gen_sub_i64(tmp64, tmp64, tmp64_2);
10147 tcg_gen_add_i64(tmp64, tmp64, tmp64_2);
10149 tcg_temp_free_i64(tmp64_2);
10150 gen_addq(s, tmp64, rd, rn);
10151 gen_storeq_reg(s, rd, rn, tmp64);
10152 tcg_temp_free_i64(tmp64);
10154 /* smuad, smusd, smlad, smlsd */
10155 if (insn & (1 << 6)) {
10156 /* This subtraction cannot overflow. */
10157 tcg_gen_sub_i32(tmp, tmp, tmp2);
10159 /* This addition cannot overflow 32 bits;
10160 * however it may overflow considered as a
10161 * signed operation, in which case we must set
10164 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
10166 tcg_temp_free_i32(tmp2);
10169 tmp2 = load_reg(s, rd);
10170 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
10171 tcg_temp_free_i32(tmp2);
10173 store_reg(s, rn, tmp);
10179 if (!dc_isar_feature(arm_div, s)) {
10182 if (((insn >> 5) & 7) || (rd != 15)) {
10185 tmp = load_reg(s, rm);
10186 tmp2 = load_reg(s, rs);
10187 if (insn & (1 << 21)) {
10188 gen_helper_udiv(tmp, tmp, tmp2);
10190 gen_helper_sdiv(tmp, tmp, tmp2);
10192 tcg_temp_free_i32(tmp2);
10193 store_reg(s, rn, tmp);
10200 op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
10202 case 0: /* Unsigned sum of absolute differences. */
10204 tmp = load_reg(s, rm);
10205 tmp2 = load_reg(s, rs);
10206 gen_helper_usad8(tmp, tmp, tmp2);
10207 tcg_temp_free_i32(tmp2);
10209 tmp2 = load_reg(s, rd);
10210 tcg_gen_add_i32(tmp, tmp, tmp2);
10211 tcg_temp_free_i32(tmp2);
10213 store_reg(s, rn, tmp);
10215 case 0x20: case 0x24: case 0x28: case 0x2c:
10216 /* Bitfield insert/clear. */
10218 shift = (insn >> 7) & 0x1f;
10219 i = (insn >> 16) & 0x1f;
10221 /* UNPREDICTABLE; we choose to UNDEF */
10226 tmp = tcg_temp_new_i32();
10227 tcg_gen_movi_i32(tmp, 0);
10229 tmp = load_reg(s, rm);
10232 tmp2 = load_reg(s, rd);
10233 tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
10234 tcg_temp_free_i32(tmp2);
10236 store_reg(s, rd, tmp);
10238 case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
10239 case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
10241 tmp = load_reg(s, rm);
10242 shift = (insn >> 7) & 0x1f;
10243 i = ((insn >> 16) & 0x1f) + 1;
10244 if (shift + i > 32)
10248 tcg_gen_extract_i32(tmp, tmp, shift, i);
10250 tcg_gen_sextract_i32(tmp, tmp, shift, i);
10253 store_reg(s, rd, tmp);
10263 /* Check for undefined extension instructions
10264 * per the ARM Bible IE:
10265 * xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx
10267 sh = (0xf << 20) | (0xf << 4);
10268 if (op1 == 0x7 && ((insn & sh) == sh))
10272 /* load/store byte/word */
10273 rn = (insn >> 16) & 0xf;
10274 rd = (insn >> 12) & 0xf;
10275 tmp2 = load_reg(s, rn);
10276 if ((insn & 0x01200000) == 0x00200000) {
10278 i = get_a32_user_mem_index(s);
10280 i = get_mem_index(s);
10282 if (insn & (1 << 24))
10283 gen_add_data_offset(s, insn, tmp2);
10284 if (insn & (1 << 20)) {
10286 tmp = tcg_temp_new_i32();
10287 if (insn & (1 << 22)) {
10288 gen_aa32_ld8u_iss(s, tmp, tmp2, i, rd);
10290 gen_aa32_ld32u_iss(s, tmp, tmp2, i, rd);
10294 tmp = load_reg(s, rd);
10295 if (insn & (1 << 22)) {
10296 gen_aa32_st8_iss(s, tmp, tmp2, i, rd);
10298 gen_aa32_st32_iss(s, tmp, tmp2, i, rd);
10300 tcg_temp_free_i32(tmp);
10302 if (!(insn & (1 << 24))) {
10303 gen_add_data_offset(s, insn, tmp2);
10304 store_reg(s, rn, tmp2);
10305 } else if (insn & (1 << 21)) {
10306 store_reg(s, rn, tmp2);
10308 tcg_temp_free_i32(tmp2);
10310 if (insn & (1 << 20)) {
10311 /* Complete the load. */
10312 store_reg_from_load(s, rd, tmp);
10318 int j, n, loaded_base;
10319 bool exc_return = false;
10320 bool is_load = extract32(insn, 20, 1);
10322 TCGv_i32 loaded_var;
10323 /* load/store multiple words */
10324 /* XXX: store correct base if write back */
10325 if (insn & (1 << 22)) {
10326 /* LDM (user), LDM (exception return) and STM (user) */
10328 goto illegal_op; /* only usable in supervisor mode */
10330 if (is_load && extract32(insn, 15, 1)) {
10336 rn = (insn >> 16) & 0xf;
10337 addr = load_reg(s, rn);
10339 /* compute total size */
10343 for(i=0;i<16;i++) {
10344 if (insn & (1 << i))
10347 /* XXX: test invalid n == 0 case ? */
10348 if (insn & (1 << 23)) {
10349 if (insn & (1 << 24)) {
10350 /* pre increment */
10351 tcg_gen_addi_i32(addr, addr, 4);
10353 /* post increment */
10356 if (insn & (1 << 24)) {
10357 /* pre decrement */
10358 tcg_gen_addi_i32(addr, addr, -(n * 4));
10360 /* post decrement */
10362 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10366 for(i=0;i<16;i++) {
10367 if (insn & (1 << i)) {
10370 tmp = tcg_temp_new_i32();
10371 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
10373 tmp2 = tcg_const_i32(i);
10374 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
10375 tcg_temp_free_i32(tmp2);
10376 tcg_temp_free_i32(tmp);
10377 } else if (i == rn) {
10380 } else if (rn == 15 && exc_return) {
10381 store_pc_exc_ret(s, tmp);
10383 store_reg_from_load(s, i, tmp);
10388 /* special case: r15 = PC + 8 */
10389 val = (long)s->pc + 4;
10390 tmp = tcg_temp_new_i32();
10391 tcg_gen_movi_i32(tmp, val);
10393 tmp = tcg_temp_new_i32();
10394 tmp2 = tcg_const_i32(i);
10395 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
10396 tcg_temp_free_i32(tmp2);
10398 tmp = load_reg(s, i);
10400 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
10401 tcg_temp_free_i32(tmp);
10404 /* no need to add after the last transfer */
10406 tcg_gen_addi_i32(addr, addr, 4);
10409 if (insn & (1 << 21)) {
10411 if (insn & (1 << 23)) {
10412 if (insn & (1 << 24)) {
10413 /* pre increment */
10415 /* post increment */
10416 tcg_gen_addi_i32(addr, addr, 4);
10419 if (insn & (1 << 24)) {
10420 /* pre decrement */
10422 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
10424 /* post decrement */
10425 tcg_gen_addi_i32(addr, addr, -(n * 4));
10428 store_reg(s, rn, addr);
10430 tcg_temp_free_i32(addr);
10433 store_reg(s, rn, loaded_var);
10436 /* Restore CPSR from SPSR. */
10437 tmp = load_cpu_field(spsr);
10438 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10441 gen_helper_cpsr_write_eret(cpu_env, tmp);
10442 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10445 tcg_temp_free_i32(tmp);
10446 /* Must exit loop to check un-masked IRQs */
10447 s->base.is_jmp = DISAS_EXIT;
10456 /* branch (and link) */
10457 val = (int32_t)s->pc;
10458 if (insn & (1 << 24)) {
10459 tmp = tcg_temp_new_i32();
10460 tcg_gen_movi_i32(tmp, val);
10461 store_reg(s, 14, tmp);
10463 offset = sextract32(insn << 2, 0, 26);
10471 if (((insn >> 8) & 0xe) == 10) {
10473 if (disas_vfp_insn(s, insn)) {
10476 } else if (disas_coproc_insn(s, insn)) {
10483 gen_set_pc_im(s, s->pc);
10484 s->svc_imm = extract32(insn, 0, 24);
10485 s->base.is_jmp = DISAS_SWI;
10489 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
10490 default_exception_el(s));
10496 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t insn)
10498 /* Return true if this is a 16 bit instruction. We must be precise
10499 * about this (matching the decode). We assume that s->pc still
10500 * points to the first 16 bits of the insn.
10502 if ((insn >> 11) < 0x1d) {
10503 /* Definitely a 16-bit instruction */
10507 /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10508 * first half of a 32-bit Thumb insn. Thumb-1 cores might
10509 * end up actually treating this as two 16-bit insns, though,
10510 * if it's half of a bl/blx pair that might span a page boundary.
10512 if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10513 arm_dc_feature(s, ARM_FEATURE_M)) {
10514 /* Thumb2 cores (including all M profile ones) always treat
10515 * 32-bit insns as 32-bit.
10520 if ((insn >> 11) == 0x1e && s->pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10521 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10522 * is not on the next page; we merge this into a 32-bit
10527 /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10528 * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10529 * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10530 * -- handle as single 16 bit insn
10535 /* Return true if this is a Thumb-2 logical op. */
10537 thumb2_logic_op(int op)
10542 /* Generate code for a Thumb-2 data processing operation. If CONDS is nonzero
10543 then set condition code flags based on the result of the operation.
10544 If SHIFTER_OUT is nonzero then set the carry flag for logical operations
10545 to the high bit of T1.
10546 Returns zero if the opcode is valid. */
10549 gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out,
10550 TCGv_i32 t0, TCGv_i32 t1)
10557 tcg_gen_and_i32(t0, t0, t1);
10561 tcg_gen_andc_i32(t0, t0, t1);
10565 tcg_gen_or_i32(t0, t0, t1);
10569 tcg_gen_orc_i32(t0, t0, t1);
10573 tcg_gen_xor_i32(t0, t0, t1);
10578 gen_add_CC(t0, t0, t1);
10580 tcg_gen_add_i32(t0, t0, t1);
10584 gen_adc_CC(t0, t0, t1);
10590 gen_sbc_CC(t0, t0, t1);
10592 gen_sub_carry(t0, t0, t1);
10597 gen_sub_CC(t0, t0, t1);
10599 tcg_gen_sub_i32(t0, t0, t1);
10603 gen_sub_CC(t0, t1, t0);
10605 tcg_gen_sub_i32(t0, t1, t0);
10607 default: /* 5, 6, 7, 9, 12, 15. */
10613 gen_set_CF_bit31(t1);
10618 /* Translate a 32-bit thumb instruction. */
10619 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10621 uint32_t imm, shift, offset;
10622 uint32_t rd, rn, rm, rs;
10634 * ARMv6-M supports a limited subset of Thumb2 instructions.
10635 * Other Thumb1 architectures allow only 32-bit
10636 * combined BL/BLX prefix and suffix.
10638 if (arm_dc_feature(s, ARM_FEATURE_M) &&
10639 !arm_dc_feature(s, ARM_FEATURE_V7)) {
10641 bool found = false;
10642 static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10643 0xf3b08040 /* dsb */,
10644 0xf3b08050 /* dmb */,
10645 0xf3b08060 /* isb */,
10646 0xf3e08000 /* mrs */,
10647 0xf000d000 /* bl */};
10648 static const uint32_t armv6m_mask[] = {0xffe0d000,
10655 for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10656 if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10664 } else if ((insn & 0xf800e800) != 0xf000e800) {
10668 rn = (insn >> 16) & 0xf;
10669 rs = (insn >> 12) & 0xf;
10670 rd = (insn >> 8) & 0xf;
10672 switch ((insn >> 25) & 0xf) {
10673 case 0: case 1: case 2: case 3:
10674 /* 16-bit instructions. Should never happen. */
10677 if (insn & (1 << 22)) {
10678 /* 0b1110_100x_x1xx_xxxx_xxxx_xxxx_xxxx_xxxx
10679 * - load/store doubleword, load/store exclusive, ldacq/strel,
10680 * table branch, TT.
10682 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_M) &&
10683 arm_dc_feature(s, ARM_FEATURE_V8)) {
10684 /* 0b1110_1001_0111_1111_1110_1001_0111_111
10686 * The bulk of the behaviour for this instruction is implemented
10687 * in v7m_handle_execute_nsc(), which deals with the insn when
10688 * it is executed by a CPU in non-secure state from memory
10689 * which is Secure & NonSecure-Callable.
10690 * Here we only need to handle the remaining cases:
10691 * * in NS memory (including the "security extension not
10692 * implemented" case) : NOP
10693 * * in S memory but CPU already secure (clear IT bits)
10694 * We know that the attribute for the memory this insn is
10695 * in must match the current CPU state, because otherwise
10696 * get_phys_addr_pmsav8 would have generated an exception.
10698 if (s->v8m_secure) {
10699 /* Like the IT insn, we don't need to generate any code */
10700 s->condexec_cond = 0;
10701 s->condexec_mask = 0;
10703 } else if (insn & 0x01200000) {
10704 /* 0b1110_1000_x11x_xxxx_xxxx_xxxx_xxxx_xxxx
10705 * - load/store dual (post-indexed)
10706 * 0b1111_1001_x10x_xxxx_xxxx_xxxx_xxxx_xxxx
10707 * - load/store dual (literal and immediate)
10708 * 0b1111_1001_x11x_xxxx_xxxx_xxxx_xxxx_xxxx
10709 * - load/store dual (pre-indexed)
10711 bool wback = extract32(insn, 21, 1);
10714 if (insn & (1 << 21)) {
10715 /* UNPREDICTABLE */
10718 addr = tcg_temp_new_i32();
10719 tcg_gen_movi_i32(addr, s->pc & ~3);
10721 addr = load_reg(s, rn);
10723 offset = (insn & 0xff) * 4;
10724 if ((insn & (1 << 23)) == 0) {
10728 if (s->v8m_stackcheck && rn == 13 && wback) {
10730 * Here 'addr' is the current SP; if offset is +ve we're
10731 * moving SP up, else down. It is UNKNOWN whether the limit
10732 * check triggers when SP starts below the limit and ends
10733 * up above it; check whichever of the current and final
10734 * SP is lower, so QEMU will trigger in that situation.
10736 if ((int32_t)offset < 0) {
10737 TCGv_i32 newsp = tcg_temp_new_i32();
10739 tcg_gen_addi_i32(newsp, addr, offset);
10740 gen_helper_v8m_stackcheck(cpu_env, newsp);
10741 tcg_temp_free_i32(newsp);
10743 gen_helper_v8m_stackcheck(cpu_env, addr);
10747 if (insn & (1 << 24)) {
10748 tcg_gen_addi_i32(addr, addr, offset);
10751 if (insn & (1 << 20)) {
10753 tmp = tcg_temp_new_i32();
10754 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
10755 store_reg(s, rs, tmp);
10756 tcg_gen_addi_i32(addr, addr, 4);
10757 tmp = tcg_temp_new_i32();
10758 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
10759 store_reg(s, rd, tmp);
10762 tmp = load_reg(s, rs);
10763 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
10764 tcg_temp_free_i32(tmp);
10765 tcg_gen_addi_i32(addr, addr, 4);
10766 tmp = load_reg(s, rd);
10767 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
10768 tcg_temp_free_i32(tmp);
10771 /* Base writeback. */
10772 tcg_gen_addi_i32(addr, addr, offset - 4);
10773 store_reg(s, rn, addr);
10775 tcg_temp_free_i32(addr);
10777 } else if ((insn & (1 << 23)) == 0) {
10778 /* 0b1110_1000_010x_xxxx_xxxx_xxxx_xxxx_xxxx
10779 * - load/store exclusive word
10783 if (!(insn & (1 << 20)) &&
10784 arm_dc_feature(s, ARM_FEATURE_M) &&
10785 arm_dc_feature(s, ARM_FEATURE_V8)) {
10786 /* 0b1110_1000_0100_xxxx_1111_xxxx_xxxx_xxxx
10789 bool alt = insn & (1 << 7);
10790 TCGv_i32 addr, op, ttresp;
10792 if ((insn & 0x3f) || rd == 13 || rd == 15 || rn == 15) {
10793 /* we UNDEF for these UNPREDICTABLE cases */
10797 if (alt && !s->v8m_secure) {
10801 addr = load_reg(s, rn);
10802 op = tcg_const_i32(extract32(insn, 6, 2));
10803 ttresp = tcg_temp_new_i32();
10804 gen_helper_v7m_tt(ttresp, cpu_env, addr, op);
10805 tcg_temp_free_i32(addr);
10806 tcg_temp_free_i32(op);
10807 store_reg(s, rd, ttresp);
10812 addr = tcg_temp_local_new_i32();
10813 load_reg_var(s, addr, rn);
10814 tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2);
10815 if (insn & (1 << 20)) {
10816 gen_load_exclusive(s, rs, 15, addr, 2);
10818 gen_store_exclusive(s, rd, rs, 15, addr, 2);
10820 tcg_temp_free_i32(addr);
10821 } else if ((insn & (7 << 5)) == 0) {
10822 /* Table Branch. */
10824 addr = tcg_temp_new_i32();
10825 tcg_gen_movi_i32(addr, s->pc);
10827 addr = load_reg(s, rn);
10829 tmp = load_reg(s, rm);
10830 tcg_gen_add_i32(addr, addr, tmp);
10831 if (insn & (1 << 4)) {
10833 tcg_gen_add_i32(addr, addr, tmp);
10834 tcg_temp_free_i32(tmp);
10835 tmp = tcg_temp_new_i32();
10836 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
10838 tcg_temp_free_i32(tmp);
10839 tmp = tcg_temp_new_i32();
10840 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
10842 tcg_temp_free_i32(addr);
10843 tcg_gen_shli_i32(tmp, tmp, 1);
10844 tcg_gen_addi_i32(tmp, tmp, s->pc);
10845 store_reg(s, 15, tmp);
10847 int op2 = (insn >> 6) & 0x3;
10848 op = (insn >> 4) & 0x3;
10853 /* Load/store exclusive byte/halfword/doubleword */
10860 /* Load-acquire/store-release */
10866 /* Load-acquire/store-release exclusive */
10870 addr = tcg_temp_local_new_i32();
10871 load_reg_var(s, addr, rn);
10873 if (insn & (1 << 20)) {
10874 tmp = tcg_temp_new_i32();
10877 gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s),
10881 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s),
10885 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s),
10891 store_reg(s, rs, tmp);
10893 tmp = load_reg(s, rs);
10896 gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s),
10900 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s),
10904 gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s),
10910 tcg_temp_free_i32(tmp);
10912 } else if (insn & (1 << 20)) {
10913 gen_load_exclusive(s, rs, rd, addr, op);
10915 gen_store_exclusive(s, rm, rs, rd, addr, op);
10917 tcg_temp_free_i32(addr);
10920 /* Load/store multiple, RFE, SRS. */
10921 if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
10922 /* RFE, SRS: not available in user mode or on M profile */
10923 if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
10926 if (insn & (1 << 20)) {
10928 addr = load_reg(s, rn);
10929 if ((insn & (1 << 24)) == 0)
10930 tcg_gen_addi_i32(addr, addr, -8);
10931 /* Load PC into tmp and CPSR into tmp2. */
10932 tmp = tcg_temp_new_i32();
10933 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
10934 tcg_gen_addi_i32(addr, addr, 4);
10935 tmp2 = tcg_temp_new_i32();
10936 gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
10937 if (insn & (1 << 21)) {
10938 /* Base writeback. */
10939 if (insn & (1 << 24)) {
10940 tcg_gen_addi_i32(addr, addr, 4);
10942 tcg_gen_addi_i32(addr, addr, -4);
10944 store_reg(s, rn, addr);
10946 tcg_temp_free_i32(addr);
10948 gen_rfe(s, tmp, tmp2);
10951 gen_srs(s, (insn & 0x1f), (insn & (1 << 24)) ? 1 : 2,
10955 int i, loaded_base = 0;
10956 TCGv_i32 loaded_var;
10957 bool wback = extract32(insn, 21, 1);
10958 /* Load/store multiple. */
10959 addr = load_reg(s, rn);
10961 for (i = 0; i < 16; i++) {
10962 if (insn & (1 << i))
10966 if (insn & (1 << 24)) {
10967 tcg_gen_addi_i32(addr, addr, -offset);
10970 if (s->v8m_stackcheck && rn == 13 && wback) {
10972 * If the writeback is incrementing SP rather than
10973 * decrementing it, and the initial SP is below the
10974 * stack limit but the final written-back SP would
10975 * be above, then then we must not perform any memory
10976 * accesses, but it is IMPDEF whether we generate
10977 * an exception. We choose to do so in this case.
10978 * At this point 'addr' is the lowest address, so
10979 * either the original SP (if incrementing) or our
10980 * final SP (if decrementing), so that's what we check.
10982 gen_helper_v8m_stackcheck(cpu_env, addr);
10986 for (i = 0; i < 16; i++) {
10987 if ((insn & (1 << i)) == 0)
10989 if (insn & (1 << 20)) {
10991 tmp = tcg_temp_new_i32();
10992 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
10994 gen_bx_excret(s, tmp);
10995 } else if (i == rn) {
10999 store_reg(s, i, tmp);
11003 tmp = load_reg(s, i);
11004 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
11005 tcg_temp_free_i32(tmp);
11007 tcg_gen_addi_i32(addr, addr, 4);
11010 store_reg(s, rn, loaded_var);
11013 /* Base register writeback. */
11014 if (insn & (1 << 24)) {
11015 tcg_gen_addi_i32(addr, addr, -offset);
11017 /* Fault if writeback register is in register list. */
11018 if (insn & (1 << rn))
11020 store_reg(s, rn, addr);
11022 tcg_temp_free_i32(addr);
11029 op = (insn >> 21) & 0xf;
11031 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11034 /* Halfword pack. */
11035 tmp = load_reg(s, rn);
11036 tmp2 = load_reg(s, rm);
11037 shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3);
11038 if (insn & (1 << 5)) {
11042 tcg_gen_sari_i32(tmp2, tmp2, shift);
11043 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
11044 tcg_gen_ext16u_i32(tmp2, tmp2);
11048 tcg_gen_shli_i32(tmp2, tmp2, shift);
11049 tcg_gen_ext16u_i32(tmp, tmp);
11050 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
11052 tcg_gen_or_i32(tmp, tmp, tmp2);
11053 tcg_temp_free_i32(tmp2);
11054 store_reg(s, rd, tmp);
11056 /* Data processing register constant shift. */
11058 tmp = tcg_temp_new_i32();
11059 tcg_gen_movi_i32(tmp, 0);
11061 tmp = load_reg(s, rn);
11063 tmp2 = load_reg(s, rm);
11065 shiftop = (insn >> 4) & 3;
11066 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
11067 conds = (insn & (1 << 20)) != 0;
11068 logic_cc = (conds && thumb2_logic_op(op));
11069 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
11070 if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
11072 tcg_temp_free_i32(tmp2);
11074 ((op == 2 && rn == 15) ||
11075 (op == 8 && rn == 13) ||
11076 (op == 13 && rn == 13))) {
11077 /* MOV SP, ... or ADD SP, SP, ... or SUB SP, SP, ... */
11078 store_sp_checked(s, tmp);
11079 } else if (rd != 15) {
11080 store_reg(s, rd, tmp);
11082 tcg_temp_free_i32(tmp);
11086 case 13: /* Misc data processing. */
11087 op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
11088 if (op < 4 && (insn & 0xf000) != 0xf000)
11091 case 0: /* Register controlled shift. */
11092 tmp = load_reg(s, rn);
11093 tmp2 = load_reg(s, rm);
11094 if ((insn & 0x70) != 0)
11097 * 0b1111_1010_0xxx_xxxx_1111_xxxx_0000_xxxx:
11098 * - MOV, MOVS (register-shifted register), flagsetting
11100 op = (insn >> 21) & 3;
11101 logic_cc = (insn & (1 << 20)) != 0;
11102 gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
11105 store_reg(s, rd, tmp);
11107 case 1: /* Sign/zero extend. */
11108 op = (insn >> 20) & 7;
11110 case 0: /* SXTAH, SXTH */
11111 case 1: /* UXTAH, UXTH */
11112 case 4: /* SXTAB, SXTB */
11113 case 5: /* UXTAB, UXTB */
11115 case 2: /* SXTAB16, SXTB16 */
11116 case 3: /* UXTAB16, UXTB16 */
11117 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11125 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11129 tmp = load_reg(s, rm);
11130 shift = (insn >> 4) & 3;
11131 /* ??? In many cases it's not necessary to do a
11132 rotate, a shift is sufficient. */
11134 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
11135 op = (insn >> 20) & 7;
11137 case 0: gen_sxth(tmp); break;
11138 case 1: gen_uxth(tmp); break;
11139 case 2: gen_sxtb16(tmp); break;
11140 case 3: gen_uxtb16(tmp); break;
11141 case 4: gen_sxtb(tmp); break;
11142 case 5: gen_uxtb(tmp); break;
11144 g_assert_not_reached();
11147 tmp2 = load_reg(s, rn);
11148 if ((op >> 1) == 1) {
11149 gen_add16(tmp, tmp2);
11151 tcg_gen_add_i32(tmp, tmp, tmp2);
11152 tcg_temp_free_i32(tmp2);
11155 store_reg(s, rd, tmp);
11157 case 2: /* SIMD add/subtract. */
11158 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11161 op = (insn >> 20) & 7;
11162 shift = (insn >> 4) & 7;
11163 if ((op & 3) == 3 || (shift & 3) == 3)
11165 tmp = load_reg(s, rn);
11166 tmp2 = load_reg(s, rm);
11167 gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
11168 tcg_temp_free_i32(tmp2);
11169 store_reg(s, rd, tmp);
11171 case 3: /* Other data processing. */
11172 op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
11174 /* Saturating add/subtract. */
11175 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11178 tmp = load_reg(s, rn);
11179 tmp2 = load_reg(s, rm);
11181 gen_helper_double_saturate(tmp, cpu_env, tmp);
11183 gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
11185 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
11186 tcg_temp_free_i32(tmp2);
11189 case 0x0a: /* rbit */
11190 case 0x08: /* rev */
11191 case 0x09: /* rev16 */
11192 case 0x0b: /* revsh */
11193 case 0x18: /* clz */
11195 case 0x10: /* sel */
11196 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11200 case 0x20: /* crc32/crc32c */
11206 if (!dc_isar_feature(aa32_crc32, s)) {
11213 tmp = load_reg(s, rn);
11215 case 0x0a: /* rbit */
11216 gen_helper_rbit(tmp, tmp);
11218 case 0x08: /* rev */
11219 tcg_gen_bswap32_i32(tmp, tmp);
11221 case 0x09: /* rev16 */
11224 case 0x0b: /* revsh */
11227 case 0x10: /* sel */
11228 tmp2 = load_reg(s, rm);
11229 tmp3 = tcg_temp_new_i32();
11230 tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
11231 gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
11232 tcg_temp_free_i32(tmp3);
11233 tcg_temp_free_i32(tmp2);
11235 case 0x18: /* clz */
11236 tcg_gen_clzi_i32(tmp, tmp, 32);
11246 uint32_t sz = op & 0x3;
11247 uint32_t c = op & 0x8;
11249 tmp2 = load_reg(s, rm);
11251 tcg_gen_andi_i32(tmp2, tmp2, 0xff);
11252 } else if (sz == 1) {
11253 tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
11255 tmp3 = tcg_const_i32(1 << sz);
11257 gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
11259 gen_helper_crc32(tmp, tmp, tmp2, tmp3);
11261 tcg_temp_free_i32(tmp2);
11262 tcg_temp_free_i32(tmp3);
11266 g_assert_not_reached();
11269 store_reg(s, rd, tmp);
11271 case 4: case 5: /* 32-bit multiply. Sum of absolute differences. */
11272 switch ((insn >> 20) & 7) {
11273 case 0: /* 32 x 32 -> 32 */
11274 case 7: /* Unsigned sum of absolute differences. */
11276 case 1: /* 16 x 16 -> 32 */
11277 case 2: /* Dual multiply add. */
11278 case 3: /* 32 * 16 -> 32msb */
11279 case 4: /* Dual multiply subtract. */
11280 case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
11281 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11286 op = (insn >> 4) & 0xf;
11287 tmp = load_reg(s, rn);
11288 tmp2 = load_reg(s, rm);
11289 switch ((insn >> 20) & 7) {
11290 case 0: /* 32 x 32 -> 32 */
11291 tcg_gen_mul_i32(tmp, tmp, tmp2);
11292 tcg_temp_free_i32(tmp2);
11294 tmp2 = load_reg(s, rs);
11296 tcg_gen_sub_i32(tmp, tmp2, tmp);
11298 tcg_gen_add_i32(tmp, tmp, tmp2);
11299 tcg_temp_free_i32(tmp2);
11302 case 1: /* 16 x 16 -> 32 */
11303 gen_mulxy(tmp, tmp2, op & 2, op & 1);
11304 tcg_temp_free_i32(tmp2);
11306 tmp2 = load_reg(s, rs);
11307 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
11308 tcg_temp_free_i32(tmp2);
11311 case 2: /* Dual multiply add. */
11312 case 4: /* Dual multiply subtract. */
11314 gen_swap_half(tmp2);
11315 gen_smul_dual(tmp, tmp2);
11316 if (insn & (1 << 22)) {
11317 /* This subtraction cannot overflow. */
11318 tcg_gen_sub_i32(tmp, tmp, tmp2);
11320 /* This addition cannot overflow 32 bits;
11321 * however it may overflow considered as a signed
11322 * operation, in which case we must set the Q flag.
11324 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
11326 tcg_temp_free_i32(tmp2);
11329 tmp2 = load_reg(s, rs);
11330 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
11331 tcg_temp_free_i32(tmp2);
11334 case 3: /* 32 * 16 -> 32msb */
11336 tcg_gen_sari_i32(tmp2, tmp2, 16);
11339 tmp64 = gen_muls_i64_i32(tmp, tmp2);
11340 tcg_gen_shri_i64(tmp64, tmp64, 16);
11341 tmp = tcg_temp_new_i32();
11342 tcg_gen_extrl_i64_i32(tmp, tmp64);
11343 tcg_temp_free_i64(tmp64);
11346 tmp2 = load_reg(s, rs);
11347 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
11348 tcg_temp_free_i32(tmp2);
11351 case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
11352 tmp64 = gen_muls_i64_i32(tmp, tmp2);
11354 tmp = load_reg(s, rs);
11355 if (insn & (1 << 20)) {
11356 tmp64 = gen_addq_msw(tmp64, tmp);
11358 tmp64 = gen_subq_msw(tmp64, tmp);
11361 if (insn & (1 << 4)) {
11362 tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
11364 tcg_gen_shri_i64(tmp64, tmp64, 32);
11365 tmp = tcg_temp_new_i32();
11366 tcg_gen_extrl_i64_i32(tmp, tmp64);
11367 tcg_temp_free_i64(tmp64);
11369 case 7: /* Unsigned sum of absolute differences. */
11370 gen_helper_usad8(tmp, tmp, tmp2);
11371 tcg_temp_free_i32(tmp2);
11373 tmp2 = load_reg(s, rs);
11374 tcg_gen_add_i32(tmp, tmp, tmp2);
11375 tcg_temp_free_i32(tmp2);
11379 store_reg(s, rd, tmp);
11381 case 6: case 7: /* 64-bit multiply, Divide. */
11382 op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
11383 tmp = load_reg(s, rn);
11384 tmp2 = load_reg(s, rm);
11385 if ((op & 0x50) == 0x10) {
11387 if (!dc_isar_feature(thumb_div, s)) {
11391 gen_helper_udiv(tmp, tmp, tmp2);
11393 gen_helper_sdiv(tmp, tmp, tmp2);
11394 tcg_temp_free_i32(tmp2);
11395 store_reg(s, rd, tmp);
11396 } else if ((op & 0xe) == 0xc) {
11397 /* Dual multiply accumulate long. */
11398 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11399 tcg_temp_free_i32(tmp);
11400 tcg_temp_free_i32(tmp2);
11404 gen_swap_half(tmp2);
11405 gen_smul_dual(tmp, tmp2);
11407 tcg_gen_sub_i32(tmp, tmp, tmp2);
11409 tcg_gen_add_i32(tmp, tmp, tmp2);
11411 tcg_temp_free_i32(tmp2);
11413 tmp64 = tcg_temp_new_i64();
11414 tcg_gen_ext_i32_i64(tmp64, tmp);
11415 tcg_temp_free_i32(tmp);
11416 gen_addq(s, tmp64, rs, rd);
11417 gen_storeq_reg(s, rs, rd, tmp64);
11418 tcg_temp_free_i64(tmp64);
11421 /* Unsigned 64-bit multiply */
11422 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
11426 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11427 tcg_temp_free_i32(tmp2);
11428 tcg_temp_free_i32(tmp);
11431 gen_mulxy(tmp, tmp2, op & 2, op & 1);
11432 tcg_temp_free_i32(tmp2);
11433 tmp64 = tcg_temp_new_i64();
11434 tcg_gen_ext_i32_i64(tmp64, tmp);
11435 tcg_temp_free_i32(tmp);
11437 /* Signed 64-bit multiply */
11438 tmp64 = gen_muls_i64_i32(tmp, tmp2);
11443 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11444 tcg_temp_free_i64(tmp64);
11447 gen_addq_lo(s, tmp64, rs);
11448 gen_addq_lo(s, tmp64, rd);
11449 } else if (op & 0x40) {
11450 /* 64-bit accumulate. */
11451 gen_addq(s, tmp64, rs, rd);
11453 gen_storeq_reg(s, rs, rd, tmp64);
11454 tcg_temp_free_i64(tmp64);
11459 case 6: case 7: case 14: case 15:
11461 if (arm_dc_feature(s, ARM_FEATURE_M)) {
11462 /* We don't currently implement M profile FP support,
11463 * so this entire space should give a NOCP fault, with
11464 * the exception of the v8M VLLDM and VLSTM insns, which
11465 * must be NOPs in Secure state and UNDEF in Nonsecure state.
11467 if (arm_dc_feature(s, ARM_FEATURE_V8) &&
11468 (insn & 0xffa00f00) == 0xec200a00) {
11469 /* 0b1110_1100_0x1x_xxxx_xxxx_1010_xxxx_xxxx
11471 * We choose to UNDEF if the RAZ bits are non-zero.
11473 if (!s->v8m_secure || (insn & 0x0040f0ff)) {
11476 /* Just NOP since FP support is not implemented */
11479 /* All other insns: NOCP */
11480 gen_exception_insn(s, 4, EXCP_NOCP, syn_uncategorized(),
11481 default_exception_el(s));
11484 if ((insn & 0xfe000a00) == 0xfc000800
11485 && arm_dc_feature(s, ARM_FEATURE_V8)) {
11486 /* The Thumb2 and ARM encodings are identical. */
11487 if (disas_neon_insn_3same_ext(s, insn)) {
11490 } else if ((insn & 0xff000a00) == 0xfe000800
11491 && arm_dc_feature(s, ARM_FEATURE_V8)) {
11492 /* The Thumb2 and ARM encodings are identical. */
11493 if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
11496 } else if (((insn >> 24) & 3) == 3) {
11497 /* Translate into the equivalent ARM encoding. */
11498 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
11499 if (disas_neon_data_insn(s, insn)) {
11502 } else if (((insn >> 8) & 0xe) == 10) {
11503 if (disas_vfp_insn(s, insn)) {
11507 if (insn & (1 << 28))
11509 if (disas_coproc_insn(s, insn)) {
11514 case 8: case 9: case 10: case 11:
11515 if (insn & (1 << 15)) {
11516 /* Branches, misc control. */
11517 if (insn & 0x5000) {
11518 /* Unconditional branch. */
11519 /* signextend(hw1[10:0]) -> offset[:12]. */
11520 offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
11521 /* hw1[10:0] -> offset[11:1]. */
11522 offset |= (insn & 0x7ff) << 1;
11523 /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
11524 offset[24:22] already have the same value because of the
11525 sign extension above. */
11526 offset ^= ((~insn) & (1 << 13)) << 10;
11527 offset ^= ((~insn) & (1 << 11)) << 11;
11529 if (insn & (1 << 14)) {
11530 /* Branch and link. */
11531 tcg_gen_movi_i32(cpu_R[14], s->pc | 1);
11535 if (insn & (1 << 12)) {
11537 gen_jmp(s, offset);
11540 offset &= ~(uint32_t)2;
11541 /* thumb2 bx, no need to check */
11542 gen_bx_im(s, offset);
11544 } else if (((insn >> 23) & 7) == 7) {
11546 if (insn & (1 << 13))
11549 if (insn & (1 << 26)) {
11550 if (arm_dc_feature(s, ARM_FEATURE_M)) {
11553 if (!(insn & (1 << 20))) {
11554 /* Hypervisor call (v7) */
11555 int imm16 = extract32(insn, 16, 4) << 12
11556 | extract32(insn, 0, 12);
11563 /* Secure monitor call (v6+) */
11571 op = (insn >> 20) & 7;
11573 case 0: /* msr cpsr. */
11574 if (arm_dc_feature(s, ARM_FEATURE_M)) {
11575 tmp = load_reg(s, rn);
11576 /* the constant is the mask and SYSm fields */
11577 addr = tcg_const_i32(insn & 0xfff);
11578 gen_helper_v7m_msr(cpu_env, addr, tmp);
11579 tcg_temp_free_i32(addr);
11580 tcg_temp_free_i32(tmp);
11585 case 1: /* msr spsr. */
11586 if (arm_dc_feature(s, ARM_FEATURE_M)) {
11590 if (extract32(insn, 5, 1)) {
11592 int sysm = extract32(insn, 8, 4) |
11593 (extract32(insn, 4, 1) << 4);
11596 gen_msr_banked(s, r, sysm, rm);
11600 /* MSR (for PSRs) */
11601 tmp = load_reg(s, rn);
11603 msr_mask(s, (insn >> 8) & 0xf, op == 1),
11607 case 2: /* cps, nop-hint. */
11608 if (((insn >> 8) & 7) == 0) {
11609 gen_nop_hint(s, insn & 0xff);
11611 /* Implemented as NOP in user mode. */
11616 if (insn & (1 << 10)) {
11617 if (insn & (1 << 7))
11619 if (insn & (1 << 6))
11621 if (insn & (1 << 5))
11623 if (insn & (1 << 9))
11624 imm = CPSR_A | CPSR_I | CPSR_F;
11626 if (insn & (1 << 8)) {
11628 imm |= (insn & 0x1f);
11631 gen_set_psr_im(s, offset, 0, imm);
11634 case 3: /* Special control operations. */
11635 if (!arm_dc_feature(s, ARM_FEATURE_V7) &&
11636 !arm_dc_feature(s, ARM_FEATURE_M)) {
11639 op = (insn >> 4) & 0xf;
11641 case 2: /* clrex */
11646 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
11649 /* We need to break the TB after this insn
11650 * to execute self-modifying code correctly
11651 * and also to take any pending interrupts
11654 gen_goto_tb(s, 0, s->pc & ~1);
11661 /* Trivial implementation equivalent to bx.
11662 * This instruction doesn't exist at all for M-profile.
11664 if (arm_dc_feature(s, ARM_FEATURE_M)) {
11667 tmp = load_reg(s, rn);
11670 case 5: /* Exception return. */
11674 if (rn != 14 || rd != 15) {
11677 if (s->current_el == 2) {
11678 /* ERET from Hyp uses ELR_Hyp, not LR */
11682 tmp = load_cpu_field(elr_el[2]);
11684 tmp = load_reg(s, rn);
11685 tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
11687 gen_exception_return(s, tmp);
11690 if (extract32(insn, 5, 1) &&
11691 !arm_dc_feature(s, ARM_FEATURE_M)) {
11693 int sysm = extract32(insn, 16, 4) |
11694 (extract32(insn, 4, 1) << 4);
11696 gen_mrs_banked(s, 0, sysm, rd);
11700 if (extract32(insn, 16, 4) != 0xf) {
11703 if (!arm_dc_feature(s, ARM_FEATURE_M) &&
11704 extract32(insn, 0, 8) != 0) {
11709 tmp = tcg_temp_new_i32();
11710 if (arm_dc_feature(s, ARM_FEATURE_M)) {
11711 addr = tcg_const_i32(insn & 0xff);
11712 gen_helper_v7m_mrs(tmp, cpu_env, addr);
11713 tcg_temp_free_i32(addr);
11715 gen_helper_cpsr_read(tmp, cpu_env);
11717 store_reg(s, rd, tmp);
11720 if (extract32(insn, 5, 1) &&
11721 !arm_dc_feature(s, ARM_FEATURE_M)) {
11723 int sysm = extract32(insn, 16, 4) |
11724 (extract32(insn, 4, 1) << 4);
11726 gen_mrs_banked(s, 1, sysm, rd);
11731 /* Not accessible in user mode. */
11732 if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
11736 if (extract32(insn, 16, 4) != 0xf ||
11737 extract32(insn, 0, 8) != 0) {
11741 tmp = load_cpu_field(spsr);
11742 store_reg(s, rd, tmp);
11747 /* Conditional branch. */
11748 op = (insn >> 22) & 0xf;
11749 /* Generate a conditional jump to next instruction. */
11750 arm_skip_unless(s, op);
11752 /* offset[11:1] = insn[10:0] */
11753 offset = (insn & 0x7ff) << 1;
11754 /* offset[17:12] = insn[21:16]. */
11755 offset |= (insn & 0x003f0000) >> 4;
11756 /* offset[31:20] = insn[26]. */
11757 offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
11758 /* offset[18] = insn[13]. */
11759 offset |= (insn & (1 << 13)) << 5;
11760 /* offset[19] = insn[11]. */
11761 offset |= (insn & (1 << 11)) << 8;
11763 /* jump to the offset */
11764 gen_jmp(s, s->pc + offset);
11768 * 0b1111_0xxx_xxxx_0xxx_xxxx_xxxx
11769 * - Data-processing (modified immediate, plain binary immediate)
11771 if (insn & (1 << 25)) {
11773 * 0b1111_0x1x_xxxx_0xxx_xxxx_xxxx
11774 * - Data-processing (plain binary immediate)
11776 if (insn & (1 << 24)) {
11777 if (insn & (1 << 20))
11779 /* Bitfield/Saturate. */
11780 op = (insn >> 21) & 7;
11782 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
11784 tmp = tcg_temp_new_i32();
11785 tcg_gen_movi_i32(tmp, 0);
11787 tmp = load_reg(s, rn);
11790 case 2: /* Signed bitfield extract. */
11792 if (shift + imm > 32)
11795 tcg_gen_sextract_i32(tmp, tmp, shift, imm);
11798 case 6: /* Unsigned bitfield extract. */
11800 if (shift + imm > 32)
11803 tcg_gen_extract_i32(tmp, tmp, shift, imm);
11806 case 3: /* Bitfield insert/clear. */
11809 imm = imm + 1 - shift;
11811 tmp2 = load_reg(s, rd);
11812 tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
11813 tcg_temp_free_i32(tmp2);
11818 default: /* Saturate. */
11821 tcg_gen_sari_i32(tmp, tmp, shift);
11823 tcg_gen_shli_i32(tmp, tmp, shift);
11825 tmp2 = tcg_const_i32(imm);
11828 if ((op & 1) && shift == 0) {
11829 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11830 tcg_temp_free_i32(tmp);
11831 tcg_temp_free_i32(tmp2);
11834 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
11836 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
11840 if ((op & 1) && shift == 0) {
11841 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11842 tcg_temp_free_i32(tmp);
11843 tcg_temp_free_i32(tmp2);
11846 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
11848 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
11851 tcg_temp_free_i32(tmp2);
11854 store_reg(s, rd, tmp);
11856 imm = ((insn & 0x04000000) >> 15)
11857 | ((insn & 0x7000) >> 4) | (insn & 0xff);
11858 if (insn & (1 << 22)) {
11859 /* 16-bit immediate. */
11860 imm |= (insn >> 4) & 0xf000;
11861 if (insn & (1 << 23)) {
11863 tmp = load_reg(s, rd);
11864 tcg_gen_ext16u_i32(tmp, tmp);
11865 tcg_gen_ori_i32(tmp, tmp, imm << 16);
11868 tmp = tcg_temp_new_i32();
11869 tcg_gen_movi_i32(tmp, imm);
11871 store_reg(s, rd, tmp);
11873 /* Add/sub 12-bit immediate. */
11875 offset = s->pc & ~(uint32_t)3;
11876 if (insn & (1 << 23))
11880 tmp = tcg_temp_new_i32();
11881 tcg_gen_movi_i32(tmp, offset);
11882 store_reg(s, rd, tmp);
11884 tmp = load_reg(s, rn);
11885 if (insn & (1 << 23))
11886 tcg_gen_subi_i32(tmp, tmp, imm);
11888 tcg_gen_addi_i32(tmp, tmp, imm);
11889 if (rn == 13 && rd == 13) {
11890 /* ADD SP, SP, imm or SUB SP, SP, imm */
11891 store_sp_checked(s, tmp);
11893 store_reg(s, rd, tmp);
11900 * 0b1111_0x0x_xxxx_0xxx_xxxx_xxxx
11901 * - Data-processing (modified immediate)
11903 int shifter_out = 0;
11904 /* modified 12-bit immediate. */
11905 shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
11906 imm = (insn & 0xff);
11909 /* Nothing to do. */
11911 case 1: /* 00XY00XY */
11914 case 2: /* XY00XY00 */
11918 case 3: /* XYXYXYXY */
11922 default: /* Rotated constant. */
11923 shift = (shift << 1) | (imm >> 7);
11925 imm = imm << (32 - shift);
11929 tmp2 = tcg_temp_new_i32();
11930 tcg_gen_movi_i32(tmp2, imm);
11931 rn = (insn >> 16) & 0xf;
11933 tmp = tcg_temp_new_i32();
11934 tcg_gen_movi_i32(tmp, 0);
11936 tmp = load_reg(s, rn);
11938 op = (insn >> 21) & 0xf;
11939 if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
11940 shifter_out, tmp, tmp2))
11942 tcg_temp_free_i32(tmp2);
11943 rd = (insn >> 8) & 0xf;
11944 if (rd == 13 && rn == 13
11945 && (op == 8 || op == 13)) {
11946 /* ADD(S) SP, SP, imm or SUB(S) SP, SP, imm */
11947 store_sp_checked(s, tmp);
11948 } else if (rd != 15) {
11949 store_reg(s, rd, tmp);
11951 tcg_temp_free_i32(tmp);
11956 case 12: /* Load/store single data item. */
11963 if ((insn & 0x01100000) == 0x01000000) {
11964 if (disas_neon_ls_insn(s, insn)) {
11969 op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
11971 if (!(insn & (1 << 20))) {
11975 /* Byte or halfword load space with dest == r15 : memory hints.
11976 * Catch them early so we don't emit pointless addressing code.
11977 * This space is a mix of:
11978 * PLD/PLDW/PLI, which we implement as NOPs (note that unlike
11979 * the ARM encodings, PLDW space doesn't UNDEF for non-v7MP
11981 * unallocated hints, which must be treated as NOPs
11982 * UNPREDICTABLE space, which we NOP or UNDEF depending on
11983 * which is easiest for the decoding logic
11984 * Some space which must UNDEF
11986 int op1 = (insn >> 23) & 3;
11987 int op2 = (insn >> 6) & 0x3f;
11992 /* UNPREDICTABLE, unallocated hint or
11993 * PLD/PLDW/PLI (literal)
11998 return; /* PLD/PLDW/PLI or unallocated hint */
12000 if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) {
12001 return; /* PLD/PLDW/PLI or unallocated hint */
12003 /* UNDEF space, or an UNPREDICTABLE */
12007 memidx = get_mem_index(s);
12009 addr = tcg_temp_new_i32();
12011 /* s->pc has already been incremented by 4. */
12012 imm = s->pc & 0xfffffffc;
12013 if (insn & (1 << 23))
12014 imm += insn & 0xfff;
12016 imm -= insn & 0xfff;
12017 tcg_gen_movi_i32(addr, imm);
12019 addr = load_reg(s, rn);
12020 if (insn & (1 << 23)) {
12021 /* Positive offset. */
12022 imm = insn & 0xfff;
12023 tcg_gen_addi_i32(addr, addr, imm);
12026 switch ((insn >> 8) & 0xf) {
12027 case 0x0: /* Shifted Register. */
12028 shift = (insn >> 4) & 0xf;
12030 tcg_temp_free_i32(addr);
12033 tmp = load_reg(s, rm);
12035 tcg_gen_shli_i32(tmp, tmp, shift);
12036 tcg_gen_add_i32(addr, addr, tmp);
12037 tcg_temp_free_i32(tmp);
12039 case 0xc: /* Negative offset. */
12040 tcg_gen_addi_i32(addr, addr, -imm);
12042 case 0xe: /* User privilege. */
12043 tcg_gen_addi_i32(addr, addr, imm);
12044 memidx = get_a32_user_mem_index(s);
12046 case 0x9: /* Post-decrement. */
12048 /* Fall through. */
12049 case 0xb: /* Post-increment. */
12053 case 0xd: /* Pre-decrement. */
12055 /* Fall through. */
12056 case 0xf: /* Pre-increment. */
12060 tcg_temp_free_i32(addr);
12066 issinfo = writeback ? ISSInvalid : rs;
12068 if (s->v8m_stackcheck && rn == 13 && writeback) {
12070 * Stackcheck. Here we know 'addr' is the current SP;
12071 * if imm is +ve we're moving SP up, else down. It is
12072 * UNKNOWN whether the limit check triggers when SP starts
12073 * below the limit and ends up above it; we chose to do so.
12075 if ((int32_t)imm < 0) {
12076 TCGv_i32 newsp = tcg_temp_new_i32();
12078 tcg_gen_addi_i32(newsp, addr, imm);
12079 gen_helper_v8m_stackcheck(cpu_env, newsp);
12080 tcg_temp_free_i32(newsp);
12082 gen_helper_v8m_stackcheck(cpu_env, addr);
12086 if (writeback && !postinc) {
12087 tcg_gen_addi_i32(addr, addr, imm);
12090 if (insn & (1 << 20)) {
12092 tmp = tcg_temp_new_i32();
12095 gen_aa32_ld8u_iss(s, tmp, addr, memidx, issinfo);
12098 gen_aa32_ld8s_iss(s, tmp, addr, memidx, issinfo);
12101 gen_aa32_ld16u_iss(s, tmp, addr, memidx, issinfo);
12104 gen_aa32_ld16s_iss(s, tmp, addr, memidx, issinfo);
12107 gen_aa32_ld32u_iss(s, tmp, addr, memidx, issinfo);
12110 tcg_temp_free_i32(tmp);
12111 tcg_temp_free_i32(addr);
12115 gen_bx_excret(s, tmp);
12117 store_reg(s, rs, tmp);
12121 tmp = load_reg(s, rs);
12124 gen_aa32_st8_iss(s, tmp, addr, memidx, issinfo);
12127 gen_aa32_st16_iss(s, tmp, addr, memidx, issinfo);
12130 gen_aa32_st32_iss(s, tmp, addr, memidx, issinfo);
12133 tcg_temp_free_i32(tmp);
12134 tcg_temp_free_i32(addr);
12137 tcg_temp_free_i32(tmp);
12140 tcg_gen_addi_i32(addr, addr, imm);
12142 store_reg(s, rn, addr);
12144 tcg_temp_free_i32(addr);
12153 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
12154 default_exception_el(s));
12157 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
12159 uint32_t val, op, rm, rn, rd, shift, cond;
12166 switch (insn >> 12) {
12170 op = (insn >> 11) & 3;
12173 * 0b0001_1xxx_xxxx_xxxx
12174 * - Add, subtract (three low registers)
12175 * - Add, subtract (two low registers and immediate)
12177 rn = (insn >> 3) & 7;
12178 tmp = load_reg(s, rn);
12179 if (insn & (1 << 10)) {
12181 tmp2 = tcg_temp_new_i32();
12182 tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
12185 rm = (insn >> 6) & 7;
12186 tmp2 = load_reg(s, rm);
12188 if (insn & (1 << 9)) {
12189 if (s->condexec_mask)
12190 tcg_gen_sub_i32(tmp, tmp, tmp2);
12192 gen_sub_CC(tmp, tmp, tmp2);
12194 if (s->condexec_mask)
12195 tcg_gen_add_i32(tmp, tmp, tmp2);
12197 gen_add_CC(tmp, tmp, tmp2);
12199 tcg_temp_free_i32(tmp2);
12200 store_reg(s, rd, tmp);
12202 /* shift immediate */
12203 rm = (insn >> 3) & 7;
12204 shift = (insn >> 6) & 0x1f;
12205 tmp = load_reg(s, rm);
12206 gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
12207 if (!s->condexec_mask)
12209 store_reg(s, rd, tmp);
12214 * 0b001x_xxxx_xxxx_xxxx
12215 * - Add, subtract, compare, move (one low register and immediate)
12217 op = (insn >> 11) & 3;
12218 rd = (insn >> 8) & 0x7;
12219 if (op == 0) { /* mov */
12220 tmp = tcg_temp_new_i32();
12221 tcg_gen_movi_i32(tmp, insn & 0xff);
12222 if (!s->condexec_mask)
12224 store_reg(s, rd, tmp);
12226 tmp = load_reg(s, rd);
12227 tmp2 = tcg_temp_new_i32();
12228 tcg_gen_movi_i32(tmp2, insn & 0xff);
12231 gen_sub_CC(tmp, tmp, tmp2);
12232 tcg_temp_free_i32(tmp);
12233 tcg_temp_free_i32(tmp2);
12236 if (s->condexec_mask)
12237 tcg_gen_add_i32(tmp, tmp, tmp2);
12239 gen_add_CC(tmp, tmp, tmp2);
12240 tcg_temp_free_i32(tmp2);
12241 store_reg(s, rd, tmp);
12244 if (s->condexec_mask)
12245 tcg_gen_sub_i32(tmp, tmp, tmp2);
12247 gen_sub_CC(tmp, tmp, tmp2);
12248 tcg_temp_free_i32(tmp2);
12249 store_reg(s, rd, tmp);
12255 if (insn & (1 << 11)) {
12256 rd = (insn >> 8) & 7;
12257 /* load pc-relative. Bit 1 of PC is ignored. */
12258 val = s->pc + 2 + ((insn & 0xff) * 4);
12259 val &= ~(uint32_t)2;
12260 addr = tcg_temp_new_i32();
12261 tcg_gen_movi_i32(addr, val);
12262 tmp = tcg_temp_new_i32();
12263 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s),
12265 tcg_temp_free_i32(addr);
12266 store_reg(s, rd, tmp);
12269 if (insn & (1 << 10)) {
12270 /* 0b0100_01xx_xxxx_xxxx
12271 * - data processing extended, branch and exchange
12273 rd = (insn & 7) | ((insn >> 4) & 8);
12274 rm = (insn >> 3) & 0xf;
12275 op = (insn >> 8) & 3;
12278 tmp = load_reg(s, rd);
12279 tmp2 = load_reg(s, rm);
12280 tcg_gen_add_i32(tmp, tmp, tmp2);
12281 tcg_temp_free_i32(tmp2);
12283 /* ADD SP, SP, reg */
12284 store_sp_checked(s, tmp);
12286 store_reg(s, rd, tmp);
12290 tmp = load_reg(s, rd);
12291 tmp2 = load_reg(s, rm);
12292 gen_sub_CC(tmp, tmp, tmp2);
12293 tcg_temp_free_i32(tmp2);
12294 tcg_temp_free_i32(tmp);
12296 case 2: /* mov/cpy */
12297 tmp = load_reg(s, rm);
12300 store_sp_checked(s, tmp);
12302 store_reg(s, rd, tmp);
12307 /* 0b0100_0111_xxxx_xxxx
12308 * - branch [and link] exchange thumb register
12310 bool link = insn & (1 << 7);
12319 /* BXNS/BLXNS: only exists for v8M with the
12320 * security extensions, and always UNDEF if NonSecure.
12321 * We don't implement these in the user-only mode
12322 * either (in theory you can use them from Secure User
12323 * mode but they are too tied in to system emulation.)
12325 if (!s->v8m_secure || IS_USER_ONLY) {
12336 tmp = load_reg(s, rm);
12338 val = (uint32_t)s->pc | 1;
12339 tmp2 = tcg_temp_new_i32();
12340 tcg_gen_movi_i32(tmp2, val);
12341 store_reg(s, 14, tmp2);
12344 /* Only BX works as exception-return, not BLX */
12345 gen_bx_excret(s, tmp);
12354 * 0b0100_00xx_xxxx_xxxx
12355 * - Data-processing (two low registers)
12358 rm = (insn >> 3) & 7;
12359 op = (insn >> 6) & 0xf;
12360 if (op == 2 || op == 3 || op == 4 || op == 7) {
12361 /* the shift/rotate ops want the operands backwards */
12370 if (op == 9) { /* neg */
12371 tmp = tcg_temp_new_i32();
12372 tcg_gen_movi_i32(tmp, 0);
12373 } else if (op != 0xf) { /* mvn doesn't read its first operand */
12374 tmp = load_reg(s, rd);
12379 tmp2 = load_reg(s, rm);
12381 case 0x0: /* and */
12382 tcg_gen_and_i32(tmp, tmp, tmp2);
12383 if (!s->condexec_mask)
12386 case 0x1: /* eor */
12387 tcg_gen_xor_i32(tmp, tmp, tmp2);
12388 if (!s->condexec_mask)
12391 case 0x2: /* lsl */
12392 if (s->condexec_mask) {
12393 gen_shl(tmp2, tmp2, tmp);
12395 gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
12396 gen_logic_CC(tmp2);
12399 case 0x3: /* lsr */
12400 if (s->condexec_mask) {
12401 gen_shr(tmp2, tmp2, tmp);
12403 gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
12404 gen_logic_CC(tmp2);
12407 case 0x4: /* asr */
12408 if (s->condexec_mask) {
12409 gen_sar(tmp2, tmp2, tmp);
12411 gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
12412 gen_logic_CC(tmp2);
12415 case 0x5: /* adc */
12416 if (s->condexec_mask) {
12417 gen_adc(tmp, tmp2);
12419 gen_adc_CC(tmp, tmp, tmp2);
12422 case 0x6: /* sbc */
12423 if (s->condexec_mask) {
12424 gen_sub_carry(tmp, tmp, tmp2);
12426 gen_sbc_CC(tmp, tmp, tmp2);
12429 case 0x7: /* ror */
12430 if (s->condexec_mask) {
12431 tcg_gen_andi_i32(tmp, tmp, 0x1f);
12432 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
12434 gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
12435 gen_logic_CC(tmp2);
12438 case 0x8: /* tst */
12439 tcg_gen_and_i32(tmp, tmp, tmp2);
12443 case 0x9: /* neg */
12444 if (s->condexec_mask)
12445 tcg_gen_neg_i32(tmp, tmp2);
12447 gen_sub_CC(tmp, tmp, tmp2);
12449 case 0xa: /* cmp */
12450 gen_sub_CC(tmp, tmp, tmp2);
12453 case 0xb: /* cmn */
12454 gen_add_CC(tmp, tmp, tmp2);
12457 case 0xc: /* orr */
12458 tcg_gen_or_i32(tmp, tmp, tmp2);
12459 if (!s->condexec_mask)
12462 case 0xd: /* mul */
12463 tcg_gen_mul_i32(tmp, tmp, tmp2);
12464 if (!s->condexec_mask)
12467 case 0xe: /* bic */
12468 tcg_gen_andc_i32(tmp, tmp, tmp2);
12469 if (!s->condexec_mask)
12472 case 0xf: /* mvn */
12473 tcg_gen_not_i32(tmp2, tmp2);
12474 if (!s->condexec_mask)
12475 gen_logic_CC(tmp2);
12482 store_reg(s, rm, tmp2);
12484 tcg_temp_free_i32(tmp);
12486 store_reg(s, rd, tmp);
12487 tcg_temp_free_i32(tmp2);
12490 tcg_temp_free_i32(tmp);
12491 tcg_temp_free_i32(tmp2);
12496 /* load/store register offset. */
12498 rn = (insn >> 3) & 7;
12499 rm = (insn >> 6) & 7;
12500 op = (insn >> 9) & 7;
12501 addr = load_reg(s, rn);
12502 tmp = load_reg(s, rm);
12503 tcg_gen_add_i32(addr, addr, tmp);
12504 tcg_temp_free_i32(tmp);
12506 if (op < 3) { /* store */
12507 tmp = load_reg(s, rd);
12509 tmp = tcg_temp_new_i32();
12514 gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12517 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12520 gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12522 case 3: /* ldrsb */
12523 gen_aa32_ld8s_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12526 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12529 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12532 gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12534 case 7: /* ldrsh */
12535 gen_aa32_ld16s_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12538 if (op >= 3) { /* load */
12539 store_reg(s, rd, tmp);
12541 tcg_temp_free_i32(tmp);
12543 tcg_temp_free_i32(addr);
12547 /* load/store word immediate offset */
12549 rn = (insn >> 3) & 7;
12550 addr = load_reg(s, rn);
12551 val = (insn >> 4) & 0x7c;
12552 tcg_gen_addi_i32(addr, addr, val);
12554 if (insn & (1 << 11)) {
12556 tmp = tcg_temp_new_i32();
12557 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
12558 store_reg(s, rd, tmp);
12561 tmp = load_reg(s, rd);
12562 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
12563 tcg_temp_free_i32(tmp);
12565 tcg_temp_free_i32(addr);
12569 /* load/store byte immediate offset */
12571 rn = (insn >> 3) & 7;
12572 addr = load_reg(s, rn);
12573 val = (insn >> 6) & 0x1f;
12574 tcg_gen_addi_i32(addr, addr, val);
12576 if (insn & (1 << 11)) {
12578 tmp = tcg_temp_new_i32();
12579 gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12580 store_reg(s, rd, tmp);
12583 tmp = load_reg(s, rd);
12584 gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12585 tcg_temp_free_i32(tmp);
12587 tcg_temp_free_i32(addr);
12591 /* load/store halfword immediate offset */
12593 rn = (insn >> 3) & 7;
12594 addr = load_reg(s, rn);
12595 val = (insn >> 5) & 0x3e;
12596 tcg_gen_addi_i32(addr, addr, val);
12598 if (insn & (1 << 11)) {
12600 tmp = tcg_temp_new_i32();
12601 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12602 store_reg(s, rd, tmp);
12605 tmp = load_reg(s, rd);
12606 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12607 tcg_temp_free_i32(tmp);
12609 tcg_temp_free_i32(addr);
12613 /* load/store from stack */
12614 rd = (insn >> 8) & 7;
12615 addr = load_reg(s, 13);
12616 val = (insn & 0xff) * 4;
12617 tcg_gen_addi_i32(addr, addr, val);
12619 if (insn & (1 << 11)) {
12621 tmp = tcg_temp_new_i32();
12622 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12623 store_reg(s, rd, tmp);
12626 tmp = load_reg(s, rd);
12627 gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
12628 tcg_temp_free_i32(tmp);
12630 tcg_temp_free_i32(addr);
12635 * 0b1010_xxxx_xxxx_xxxx
12636 * - Add PC/SP (immediate)
12638 rd = (insn >> 8) & 7;
12639 if (insn & (1 << 11)) {
12641 tmp = load_reg(s, 13);
12643 /* PC. bit 1 is ignored. */
12644 tmp = tcg_temp_new_i32();
12645 tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
12647 val = (insn & 0xff) * 4;
12648 tcg_gen_addi_i32(tmp, tmp, val);
12649 store_reg(s, rd, tmp);
12654 op = (insn >> 8) & 0xf;
12658 * 0b1011_0000_xxxx_xxxx
12659 * - ADD (SP plus immediate)
12660 * - SUB (SP minus immediate)
12662 tmp = load_reg(s, 13);
12663 val = (insn & 0x7f) * 4;
12664 if (insn & (1 << 7))
12665 val = -(int32_t)val;
12666 tcg_gen_addi_i32(tmp, tmp, val);
12667 store_sp_checked(s, tmp);
12670 case 2: /* sign/zero extend. */
12673 rm = (insn >> 3) & 7;
12674 tmp = load_reg(s, rm);
12675 switch ((insn >> 6) & 3) {
12676 case 0: gen_sxth(tmp); break;
12677 case 1: gen_sxtb(tmp); break;
12678 case 2: gen_uxth(tmp); break;
12679 case 3: gen_uxtb(tmp); break;
12681 store_reg(s, rd, tmp);
12683 case 4: case 5: case 0xc: case 0xd:
12685 * 0b1011_x10x_xxxx_xxxx
12688 addr = load_reg(s, 13);
12689 if (insn & (1 << 8))
12693 for (i = 0; i < 8; i++) {
12694 if (insn & (1 << i))
12697 if ((insn & (1 << 11)) == 0) {
12698 tcg_gen_addi_i32(addr, addr, -offset);
12701 if (s->v8m_stackcheck) {
12703 * Here 'addr' is the lower of "old SP" and "new SP";
12704 * if this is a pop that starts below the limit and ends
12705 * above it, it is UNKNOWN whether the limit check triggers;
12706 * we choose to trigger.
12708 gen_helper_v8m_stackcheck(cpu_env, addr);
12711 for (i = 0; i < 8; i++) {
12712 if (insn & (1 << i)) {
12713 if (insn & (1 << 11)) {
12715 tmp = tcg_temp_new_i32();
12716 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
12717 store_reg(s, i, tmp);
12720 tmp = load_reg(s, i);
12721 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
12722 tcg_temp_free_i32(tmp);
12724 /* advance to the next address. */
12725 tcg_gen_addi_i32(addr, addr, 4);
12729 if (insn & (1 << 8)) {
12730 if (insn & (1 << 11)) {
12732 tmp = tcg_temp_new_i32();
12733 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
12734 /* don't set the pc until the rest of the instruction
12738 tmp = load_reg(s, 14);
12739 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
12740 tcg_temp_free_i32(tmp);
12742 tcg_gen_addi_i32(addr, addr, 4);
12744 if ((insn & (1 << 11)) == 0) {
12745 tcg_gen_addi_i32(addr, addr, -offset);
12747 /* write back the new stack pointer */
12748 store_reg(s, 13, addr);
12749 /* set the new PC value */
12750 if ((insn & 0x0900) == 0x0900) {
12751 store_reg_from_load(s, 15, tmp);
12755 case 1: case 3: case 9: case 11: /* czb */
12757 tmp = load_reg(s, rm);
12758 arm_gen_condlabel(s);
12759 if (insn & (1 << 11))
12760 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
12762 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
12763 tcg_temp_free_i32(tmp);
12764 offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
12765 val = (uint32_t)s->pc + 2;
12770 case 15: /* IT, nop-hint. */
12771 if ((insn & 0xf) == 0) {
12772 gen_nop_hint(s, (insn >> 4) & 0xf);
12776 s->condexec_cond = (insn >> 4) & 0xe;
12777 s->condexec_mask = insn & 0x1f;
12778 /* No actual code generated for this insn, just setup state. */
12781 case 0xe: /* bkpt */
12783 int imm8 = extract32(insn, 0, 8);
12785 gen_exception_bkpt_insn(s, 2, syn_aa32_bkpt(imm8, true));
12789 case 0xa: /* rev, and hlt */
12791 int op1 = extract32(insn, 6, 2);
12795 int imm6 = extract32(insn, 0, 6);
12801 /* Otherwise this is rev */
12803 rn = (insn >> 3) & 0x7;
12805 tmp = load_reg(s, rn);
12807 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
12808 case 1: gen_rev16(tmp); break;
12809 case 3: gen_revsh(tmp); break;
12811 g_assert_not_reached();
12813 store_reg(s, rd, tmp);
12818 switch ((insn >> 5) & 7) {
12822 if (((insn >> 3) & 1) != !!(s->be_data == MO_BE)) {
12823 gen_helper_setend(cpu_env);
12824 s->base.is_jmp = DISAS_UPDATE;
12833 if (arm_dc_feature(s, ARM_FEATURE_M)) {
12834 tmp = tcg_const_i32((insn & (1 << 4)) != 0);
12837 addr = tcg_const_i32(19);
12838 gen_helper_v7m_msr(cpu_env, addr, tmp);
12839 tcg_temp_free_i32(addr);
12843 addr = tcg_const_i32(16);
12844 gen_helper_v7m_msr(cpu_env, addr, tmp);
12845 tcg_temp_free_i32(addr);
12847 tcg_temp_free_i32(tmp);
12850 if (insn & (1 << 4)) {
12851 shift = CPSR_A | CPSR_I | CPSR_F;
12855 gen_set_psr_im(s, ((insn & 7) << 6), 0, shift);
12870 /* load/store multiple */
12871 TCGv_i32 loaded_var = NULL;
12872 rn = (insn >> 8) & 0x7;
12873 addr = load_reg(s, rn);
12874 for (i = 0; i < 8; i++) {
12875 if (insn & (1 << i)) {
12876 if (insn & (1 << 11)) {
12878 tmp = tcg_temp_new_i32();
12879 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
12883 store_reg(s, i, tmp);
12887 tmp = load_reg(s, i);
12888 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
12889 tcg_temp_free_i32(tmp);
12891 /* advance to the next address */
12892 tcg_gen_addi_i32(addr, addr, 4);
12895 if ((insn & (1 << rn)) == 0) {
12896 /* base reg not in list: base register writeback */
12897 store_reg(s, rn, addr);
12899 /* base reg in list: if load, complete it now */
12900 if (insn & (1 << 11)) {
12901 store_reg(s, rn, loaded_var);
12903 tcg_temp_free_i32(addr);
12908 /* conditional branch or swi */
12909 cond = (insn >> 8) & 0xf;
12915 gen_set_pc_im(s, s->pc);
12916 s->svc_imm = extract32(insn, 0, 8);
12917 s->base.is_jmp = DISAS_SWI;
12920 /* generate a conditional jump to next instruction */
12921 arm_skip_unless(s, cond);
12923 /* jump to the offset */
12924 val = (uint32_t)s->pc + 2;
12925 offset = ((int32_t)insn << 24) >> 24;
12926 val += offset << 1;
12931 if (insn & (1 << 11)) {
12932 /* thumb_insn_is_16bit() ensures we can't get here for
12933 * a Thumb2 CPU, so this must be a thumb1 split BL/BLX:
12934 * 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF)
12936 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
12938 offset = ((insn & 0x7ff) << 1);
12939 tmp = load_reg(s, 14);
12940 tcg_gen_addi_i32(tmp, tmp, offset);
12941 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
12943 tmp2 = tcg_temp_new_i32();
12944 tcg_gen_movi_i32(tmp2, s->pc | 1);
12945 store_reg(s, 14, tmp2);
12949 /* unconditional branch */
12950 val = (uint32_t)s->pc;
12951 offset = ((int32_t)insn << 21) >> 21;
12952 val += (offset << 1) + 2;
12957 /* thumb_insn_is_16bit() ensures we can't get here for
12958 * a Thumb2 CPU, so this must be a thumb1 split BL/BLX.
12960 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
12962 if (insn & (1 << 11)) {
12963 /* 0b1111_1xxx_xxxx_xxxx : BL suffix */
12964 offset = ((insn & 0x7ff) << 1) | 1;
12965 tmp = load_reg(s, 14);
12966 tcg_gen_addi_i32(tmp, tmp, offset);
12968 tmp2 = tcg_temp_new_i32();
12969 tcg_gen_movi_i32(tmp2, s->pc | 1);
12970 store_reg(s, 14, tmp2);
12973 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix */
12974 uint32_t uoffset = ((int32_t)insn << 21) >> 9;
12976 tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + uoffset);
12983 gen_exception_insn(s, 2, EXCP_UDEF, syn_uncategorized(),
12984 default_exception_el(s));
12987 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
12989 /* Return true if the insn at dc->pc might cross a page boundary.
12990 * (False positives are OK, false negatives are not.)
12991 * We know this is a Thumb insn, and our caller ensures we are
12992 * only called if dc->pc is less than 4 bytes from the page
12993 * boundary, so we cross the page if the first 16 bits indicate
12994 * that this is a 32 bit insn.
12996 uint16_t insn = arm_lduw_code(env, s->pc, s->sctlr_b);
12998 return !thumb_insn_is_16bit(s, insn);
13001 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
13003 DisasContext *dc = container_of(dcbase, DisasContext, base);
13004 CPUARMState *env = cs->env_ptr;
13005 ARMCPU *cpu = arm_env_get_cpu(env);
13007 dc->isar = &cpu->isar;
13008 dc->pc = dc->base.pc_first;
13012 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
13013 * there is no secure EL1, so we route exceptions to EL3.
13015 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
13016 !arm_el_is_aa64(env, 3);
13017 dc->thumb = ARM_TBFLAG_THUMB(dc->base.tb->flags);
13018 dc->sctlr_b = ARM_TBFLAG_SCTLR_B(dc->base.tb->flags);
13019 dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
13020 dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(dc->base.tb->flags) & 0xf) << 1;
13021 dc->condexec_cond = ARM_TBFLAG_CONDEXEC(dc->base.tb->flags) >> 4;
13022 dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
13023 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
13024 #if !defined(CONFIG_USER_ONLY)
13025 dc->user = (dc->current_el == 0);
13027 dc->ns = ARM_TBFLAG_NS(dc->base.tb->flags);
13028 dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
13029 dc->vfp_enabled = ARM_TBFLAG_VFPEN(dc->base.tb->flags);
13030 dc->vec_len = ARM_TBFLAG_VECLEN(dc->base.tb->flags);
13031 dc->vec_stride = ARM_TBFLAG_VECSTRIDE(dc->base.tb->flags);
13032 dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(dc->base.tb->flags);
13033 dc->v7m_handler_mode = ARM_TBFLAG_HANDLER(dc->base.tb->flags);
13034 dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
13035 regime_is_secure(env, dc->mmu_idx);
13036 dc->v8m_stackcheck = ARM_TBFLAG_STACKCHECK(dc->base.tb->flags);
13037 dc->cp_regs = cpu->cp_regs;
13038 dc->features = env->features;
13040 /* Single step state. The code-generation logic here is:
13042 * generate code with no special handling for single-stepping (except
13043 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
13044 * this happens anyway because those changes are all system register or
13046 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
13047 * emit code for one insn
13048 * emit code to clear PSTATE.SS
13049 * emit code to generate software step exception for completed step
13050 * end TB (as usual for having generated an exception)
13051 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
13052 * emit code to generate a software step exception
13055 dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
13056 dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
13057 dc->is_ldex = false;
13058 dc->ss_same_el = false; /* Can't be true since EL_d must be AArch64 */
13060 dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
13062 /* If architectural single step active, limit to 1. */
13063 if (is_singlestepping(dc)) {
13064 dc->base.max_insns = 1;
13067 /* ARM is a fixed-length ISA. Bound the number of insns to execute
13068 to those left on the page. */
13070 int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
13071 dc->base.max_insns = MIN(dc->base.max_insns, bound);
13074 cpu_F0s = tcg_temp_new_i32();
13075 cpu_F1s = tcg_temp_new_i32();
13076 cpu_F0d = tcg_temp_new_i64();
13077 cpu_F1d = tcg_temp_new_i64();
13080 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
13081 cpu_M0 = tcg_temp_new_i64();
13084 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
13086 DisasContext *dc = container_of(dcbase, DisasContext, base);
13088 /* A note on handling of the condexec (IT) bits:
13090 * We want to avoid the overhead of having to write the updated condexec
13091 * bits back to the CPUARMState for every instruction in an IT block. So:
13092 * (1) if the condexec bits are not already zero then we write
13093 * zero back into the CPUARMState now. This avoids complications trying
13094 * to do it at the end of the block. (For example if we don't do this
13095 * it's hard to identify whether we can safely skip writing condexec
13096 * at the end of the TB, which we definitely want to do for the case
13097 * where a TB doesn't do anything with the IT state at all.)
13098 * (2) if we are going to leave the TB then we call gen_set_condexec()
13099 * which will write the correct value into CPUARMState if zero is wrong.
13100 * This is done both for leaving the TB at the end, and for leaving
13101 * it because of an exception we know will happen, which is done in
13102 * gen_exception_insn(). The latter is necessary because we need to
13103 * leave the TB with the PC/IT state just prior to execution of the
13104 * instruction which caused the exception.
13105 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
13106 * then the CPUARMState will be wrong and we need to reset it.
13107 * This is handled in the same way as restoration of the
13108 * PC in these situations; we save the value of the condexec bits
13109 * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
13110 * then uses this to restore them after an exception.
13112 * Note that there are no instructions which can read the condexec
13113 * bits, and none which can write non-static values to them, so
13114 * we don't need to care about whether CPUARMState is correct in the
13118 /* Reset the conditional execution bits immediately. This avoids
13119 complications trying to do it at the end of the block. */
13120 if (dc->condexec_mask || dc->condexec_cond) {
13121 TCGv_i32 tmp = tcg_temp_new_i32();
13122 tcg_gen_movi_i32(tmp, 0);
13123 store_cpu_field(tmp, condexec_bits);
13127 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
13129 DisasContext *dc = container_of(dcbase, DisasContext, base);
13131 tcg_gen_insn_start(dc->pc,
13132 (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
13134 dc->insn_start = tcg_last_op();
13137 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
13138 const CPUBreakpoint *bp)
13140 DisasContext *dc = container_of(dcbase, DisasContext, base);
13142 if (bp->flags & BP_CPU) {
13143 gen_set_condexec(dc);
13144 gen_set_pc_im(dc, dc->pc);
13145 gen_helper_check_breakpoints(cpu_env);
13146 /* End the TB early; it's likely not going to be executed */
13147 dc->base.is_jmp = DISAS_TOO_MANY;
13149 gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
13150 /* The address covered by the breakpoint must be
13151 included in [tb->pc, tb->pc + tb->size) in order
13152 to for it to be properly cleared -- thus we
13153 increment the PC here so that the logic setting
13154 tb->size below does the right thing. */
13155 /* TODO: Advance PC by correct instruction length to
13156 * avoid disassembler error messages */
13158 dc->base.is_jmp = DISAS_NORETURN;
13164 static bool arm_pre_translate_insn(DisasContext *dc)
13166 #ifdef CONFIG_USER_ONLY
13167 /* Intercept jump to the magic kernel page. */
13168 if (dc->pc >= 0xffff0000) {
13169 /* We always get here via a jump, so know we are not in a
13170 conditional execution block. */
13171 gen_exception_internal(EXCP_KERNEL_TRAP);
13172 dc->base.is_jmp = DISAS_NORETURN;
13177 if (dc->ss_active && !dc->pstate_ss) {
13178 /* Singlestep state is Active-pending.
13179 * If we're in this state at the start of a TB then either
13180 * a) we just took an exception to an EL which is being debugged
13181 * and this is the first insn in the exception handler
13182 * b) debug exceptions were masked and we just unmasked them
13183 * without changing EL (eg by clearing PSTATE.D)
13184 * In either case we're going to take a swstep exception in the
13185 * "did not step an insn" case, and so the syndrome ISV and EX
13186 * bits should be zero.
13188 assert(dc->base.num_insns == 1);
13189 gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
13190 default_exception_el(dc));
13191 dc->base.is_jmp = DISAS_NORETURN;
13198 static void arm_post_translate_insn(DisasContext *dc)
13200 if (dc->condjmp && !dc->base.is_jmp) {
13201 gen_set_label(dc->condlabel);
13204 dc->base.pc_next = dc->pc;
13205 translator_loop_temp_check(&dc->base);
13208 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
13210 DisasContext *dc = container_of(dcbase, DisasContext, base);
13211 CPUARMState *env = cpu->env_ptr;
13214 if (arm_pre_translate_insn(dc)) {
13218 insn = arm_ldl_code(env, dc->pc, dc->sctlr_b);
13221 disas_arm_insn(dc, insn);
13223 arm_post_translate_insn(dc);
13225 /* ARM is a fixed-length ISA. We performed the cross-page check
13226 in init_disas_context by adjusting max_insns. */
13229 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
13231 /* Return true if this Thumb insn is always unconditional,
13232 * even inside an IT block. This is true of only a very few
13233 * instructions: BKPT, HLT, and SG.
13235 * A larger class of instructions are UNPREDICTABLE if used
13236 * inside an IT block; we do not need to detect those here, because
13237 * what we do by default (perform the cc check and update the IT
13238 * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
13239 * choice for those situations.
13241 * insn is either a 16-bit or a 32-bit instruction; the two are
13242 * distinguishable because for the 16-bit case the top 16 bits
13243 * are zeroes, and that isn't a valid 32-bit encoding.
13245 if ((insn & 0xffffff00) == 0xbe00) {
13250 if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
13251 !arm_dc_feature(s, ARM_FEATURE_M)) {
13252 /* HLT: v8A only. This is unconditional even when it is going to
13253 * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
13254 * For v7 cores this was a plain old undefined encoding and so
13255 * honours its cc check. (We might be using the encoding as
13256 * a semihosting trap, but we don't change the cc check behaviour
13257 * on that account, because a debugger connected to a real v7A
13258 * core and emulating semihosting traps by catching the UNDEF
13259 * exception would also only see cases where the cc check passed.
13260 * No guest code should be trying to do a HLT semihosting trap
13261 * in an IT block anyway.
13266 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
13267 arm_dc_feature(s, ARM_FEATURE_M)) {
13275 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
13277 DisasContext *dc = container_of(dcbase, DisasContext, base);
13278 CPUARMState *env = cpu->env_ptr;
13282 if (arm_pre_translate_insn(dc)) {
13286 insn = arm_lduw_code(env, dc->pc, dc->sctlr_b);
13287 is_16bit = thumb_insn_is_16bit(dc, insn);
13290 uint32_t insn2 = arm_lduw_code(env, dc->pc, dc->sctlr_b);
13292 insn = insn << 16 | insn2;
13297 if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
13298 uint32_t cond = dc->condexec_cond;
13300 if (cond != 0x0e) { /* Skip conditional when condition is AL. */
13301 arm_skip_unless(dc, cond);
13306 disas_thumb_insn(dc, insn);
13308 disas_thumb2_insn(dc, insn);
13311 /* Advance the Thumb condexec condition. */
13312 if (dc->condexec_mask) {
13313 dc->condexec_cond = ((dc->condexec_cond & 0xe) |
13314 ((dc->condexec_mask >> 4) & 1));
13315 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
13316 if (dc->condexec_mask == 0) {
13317 dc->condexec_cond = 0;
13321 arm_post_translate_insn(dc);
13323 /* Thumb is a variable-length ISA. Stop translation when the next insn
13324 * will touch a new page. This ensures that prefetch aborts occur at
13327 * We want to stop the TB if the next insn starts in a new page,
13328 * or if it spans between this page and the next. This means that
13329 * if we're looking at the last halfword in the page we need to
13330 * see if it's a 16-bit Thumb insn (which will fit in this TB)
13331 * or a 32-bit Thumb insn (which won't).
13332 * This is to avoid generating a silly TB with a single 16-bit insn
13333 * in it at the end of this page (which would execute correctly
13334 * but isn't very efficient).
13336 if (dc->base.is_jmp == DISAS_NEXT
13337 && (dc->pc - dc->page_start >= TARGET_PAGE_SIZE
13338 || (dc->pc - dc->page_start >= TARGET_PAGE_SIZE - 3
13339 && insn_crosses_page(env, dc)))) {
13340 dc->base.is_jmp = DISAS_TOO_MANY;
13344 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
13346 DisasContext *dc = container_of(dcbase, DisasContext, base);
13348 if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
13349 /* FIXME: This can theoretically happen with self-modifying code. */
13350 cpu_abort(cpu, "IO on conditional branch instruction");
13353 /* At this stage dc->condjmp will only be set when the skipped
13354 instruction was a conditional branch or trap, and the PC has
13355 already been written. */
13356 gen_set_condexec(dc);
13357 if (dc->base.is_jmp == DISAS_BX_EXCRET) {
13358 /* Exception return branches need some special case code at the
13359 * end of the TB, which is complex enough that it has to
13360 * handle the single-step vs not and the condition-failed
13361 * insn codepath itself.
13363 gen_bx_excret_final_code(dc);
13364 } else if (unlikely(is_singlestepping(dc))) {
13365 /* Unconditional and "condition passed" instruction codepath. */
13366 switch (dc->base.is_jmp) {
13368 gen_ss_advance(dc);
13369 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
13370 default_exception_el(dc));
13373 gen_ss_advance(dc);
13374 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
13377 gen_ss_advance(dc);
13378 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
13381 case DISAS_TOO_MANY:
13383 gen_set_pc_im(dc, dc->pc);
13386 /* FIXME: Single stepping a WFI insn will not halt the CPU. */
13387 gen_singlestep_exception(dc);
13389 case DISAS_NORETURN:
13393 /* While branches must always occur at the end of an IT block,
13394 there are a few other things that can cause us to terminate
13395 the TB in the middle of an IT block:
13396 - Exception generating instructions (bkpt, swi, undefined).
13398 - Hardware watchpoints.
13399 Hardware breakpoints have already been handled and skip this code.
13401 switch(dc->base.is_jmp) {
13403 case DISAS_TOO_MANY:
13404 gen_goto_tb(dc, 1, dc->pc);
13410 gen_set_pc_im(dc, dc->pc);
13413 /* indicate that the hash table must be used to find the next TB */
13414 tcg_gen_exit_tb(NULL, 0);
13416 case DISAS_NORETURN:
13417 /* nothing more to generate */
13421 TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
13422 !(dc->insn & (1U << 31))) ? 2 : 4);
13424 gen_helper_wfi(cpu_env, tmp);
13425 tcg_temp_free_i32(tmp);
13426 /* The helper doesn't necessarily throw an exception, but we
13427 * must go back to the main loop to check for interrupts anyway.
13429 tcg_gen_exit_tb(NULL, 0);
13433 gen_helper_wfe(cpu_env);
13436 gen_helper_yield(cpu_env);
13439 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
13440 default_exception_el(dc));
13443 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
13446 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
13452 /* "Condition failed" instruction codepath for the branch/trap insn */
13453 gen_set_label(dc->condlabel);
13454 gen_set_condexec(dc);
13455 if (unlikely(is_singlestepping(dc))) {
13456 gen_set_pc_im(dc, dc->pc);
13457 gen_singlestep_exception(dc);
13459 gen_goto_tb(dc, 1, dc->pc);
13463 /* Functions above can change dc->pc, so re-align db->pc_next */
13464 dc->base.pc_next = dc->pc;
13467 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
13469 DisasContext *dc = container_of(dcbase, DisasContext, base);
13471 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
13472 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
13475 static const TranslatorOps arm_translator_ops = {
13476 .init_disas_context = arm_tr_init_disas_context,
13477 .tb_start = arm_tr_tb_start,
13478 .insn_start = arm_tr_insn_start,
13479 .breakpoint_check = arm_tr_breakpoint_check,
13480 .translate_insn = arm_tr_translate_insn,
13481 .tb_stop = arm_tr_tb_stop,
13482 .disas_log = arm_tr_disas_log,
13485 static const TranslatorOps thumb_translator_ops = {
13486 .init_disas_context = arm_tr_init_disas_context,
13487 .tb_start = arm_tr_tb_start,
13488 .insn_start = arm_tr_insn_start,
13489 .breakpoint_check = arm_tr_breakpoint_check,
13490 .translate_insn = thumb_tr_translate_insn,
13491 .tb_stop = arm_tr_tb_stop,
13492 .disas_log = arm_tr_disas_log,
13495 /* generate intermediate code for basic block 'tb'. */
13496 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb)
13499 const TranslatorOps *ops = &arm_translator_ops;
13501 if (ARM_TBFLAG_THUMB(tb->flags)) {
13502 ops = &thumb_translator_ops;
13504 #ifdef TARGET_AARCH64
13505 if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
13506 ops = &aarch64_translator_ops;
13510 translator_loop(ops, &dc.base, cpu, tb);
13513 void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
13516 ARMCPU *cpu = ARM_CPU(cs);
13517 CPUARMState *env = &cpu->env;
13521 aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags);
13525 for(i=0;i<16;i++) {
13526 cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
13528 cpu_fprintf(f, "\n");
13530 cpu_fprintf(f, " ");
13533 if (arm_feature(env, ARM_FEATURE_M)) {
13534 uint32_t xpsr = xpsr_read(env);
13536 const char *ns_status = "";
13538 if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
13539 ns_status = env->v7m.secure ? "S " : "NS ";
13542 if (xpsr & XPSR_EXCP) {
13545 if (env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_NPRIV_MASK) {
13546 mode = "unpriv-thread";
13548 mode = "priv-thread";
13552 cpu_fprintf(f, "XPSR=%08x %c%c%c%c %c %s%s\n",
13554 xpsr & XPSR_N ? 'N' : '-',
13555 xpsr & XPSR_Z ? 'Z' : '-',
13556 xpsr & XPSR_C ? 'C' : '-',
13557 xpsr & XPSR_V ? 'V' : '-',
13558 xpsr & XPSR_T ? 'T' : 'A',
13562 uint32_t psr = cpsr_read(env);
13563 const char *ns_status = "";
13565 if (arm_feature(env, ARM_FEATURE_EL3) &&
13566 (psr & CPSR_M) != ARM_CPU_MODE_MON) {
13567 ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
13570 cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%s%d\n",
13572 psr & CPSR_N ? 'N' : '-',
13573 psr & CPSR_Z ? 'Z' : '-',
13574 psr & CPSR_C ? 'C' : '-',
13575 psr & CPSR_V ? 'V' : '-',
13576 psr & CPSR_T ? 'T' : 'A',
13578 aarch32_mode_name(psr), (psr & 0x10) ? 32 : 26);
13581 if (flags & CPU_DUMP_FPU) {
13582 int numvfpregs = 0;
13583 if (arm_feature(env, ARM_FEATURE_VFP)) {
13586 if (arm_feature(env, ARM_FEATURE_VFP3)) {
13589 for (i = 0; i < numvfpregs; i++) {
13590 uint64_t v = *aa32_vfp_dreg(env, i);
13591 cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
13592 i * 2, (uint32_t)v,
13593 i * 2 + 1, (uint32_t)(v >> 32),
13596 cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
13600 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
13601 target_ulong *data)
13605 env->condexec_bits = 0;
13606 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
13608 env->regs[15] = data[0];
13609 env->condexec_bits = data[1];
13610 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;