4 * Copyright (c) 2003 Fabrice Bellard
5 * Copyright (c) 2005-2007 CodeSourcery
6 * Copyright (c) 2007 OpenedHand, Ltd.
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include "qemu/osdep.h"
24 #include "internals.h"
25 #include "disas/disas.h"
26 #include "exec/exec-all.h"
28 #include "tcg-op-gvec.h"
30 #include "qemu/bitops.h"
32 #include "exec/semihost.h"
34 #include "exec/helper-proto.h"
35 #include "exec/helper-gen.h"
37 #include "trace-tcg.h"
41 #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
42 #define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
43 /* currently all emulated v5 cores are also v5TE, so don't bother */
44 #define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
45 #define ENABLE_ARCH_5J arm_dc_feature(s, ARM_FEATURE_JAZELLE)
46 #define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
47 #define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
48 #define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
49 #define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
50 #define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
54 #include "translate.h"
56 #if defined(CONFIG_USER_ONLY)
59 #define IS_USER(s) (s->user)
62 /* We reuse the same 64-bit temporaries for efficiency. */
63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
64 static TCGv_i32 cpu_R[16];
65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
66 TCGv_i64 cpu_exclusive_addr;
67 TCGv_i64 cpu_exclusive_val;
69 /* FIXME: These should be removed. */
70 static TCGv_i32 cpu_F0s, cpu_F1s;
71 static TCGv_i64 cpu_F0d, cpu_F1d;
73 #include "exec/gen-icount.h"
75 static const char *regnames[] =
76 { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
77 "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
79 /* Function prototypes for gen_ functions calling Neon helpers. */
80 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
83 /* initialize TCG globals. */
84 void arm_translate_init(void)
88 for (i = 0; i < 16; i++) {
89 cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
90 offsetof(CPUARMState, regs[i]),
93 cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
94 cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
95 cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
96 cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
98 cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
99 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
100 cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
101 offsetof(CPUARMState, exclusive_val), "exclusive_val");
103 a64_translate_init();
106 /* Flags for the disas_set_da_iss info argument:
107 * lower bits hold the Rt register number, higher bits are flags.
109 typedef enum ISSInfo {
112 ISSInvalid = (1 << 5),
113 ISSIsAcqRel = (1 << 6),
114 ISSIsWrite = (1 << 7),
115 ISSIs16Bit = (1 << 8),
118 /* Save the syndrome information for a Data Abort */
119 static void disas_set_da_iss(DisasContext *s, TCGMemOp memop, ISSInfo issinfo)
122 int sas = memop & MO_SIZE;
123 bool sse = memop & MO_SIGN;
124 bool is_acqrel = issinfo & ISSIsAcqRel;
125 bool is_write = issinfo & ISSIsWrite;
126 bool is_16bit = issinfo & ISSIs16Bit;
127 int srt = issinfo & ISSRegMask;
129 if (issinfo & ISSInvalid) {
130 /* Some callsites want to conditionally provide ISS info,
131 * eg "only if this was not a writeback"
137 /* For AArch32, insns where the src/dest is R15 never generate
138 * ISS information. Catching that here saves checking at all
144 syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
145 0, 0, 0, is_write, 0, is_16bit);
146 disas_set_insn_syndrome(s, syn);
149 static inline int get_a32_user_mem_index(DisasContext *s)
151 /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
153 * if PL2, UNPREDICTABLE (we choose to implement as if PL0)
154 * otherwise, access as if at PL0.
156 switch (s->mmu_idx) {
157 case ARMMMUIdx_S1E2: /* this one is UNPREDICTABLE */
158 case ARMMMUIdx_S12NSE0:
159 case ARMMMUIdx_S12NSE1:
160 return arm_to_core_mmu_idx(ARMMMUIdx_S12NSE0);
162 case ARMMMUIdx_S1SE0:
163 case ARMMMUIdx_S1SE1:
164 return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0);
165 case ARMMMUIdx_MUser:
166 case ARMMMUIdx_MPriv:
167 return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
168 case ARMMMUIdx_MUserNegPri:
169 case ARMMMUIdx_MPrivNegPri:
170 return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
171 case ARMMMUIdx_MSUser:
172 case ARMMMUIdx_MSPriv:
173 return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
174 case ARMMMUIdx_MSUserNegPri:
175 case ARMMMUIdx_MSPrivNegPri:
176 return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
179 g_assert_not_reached();
183 static inline TCGv_i32 load_cpu_offset(int offset)
185 TCGv_i32 tmp = tcg_temp_new_i32();
186 tcg_gen_ld_i32(tmp, cpu_env, offset);
190 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
192 static inline void store_cpu_offset(TCGv_i32 var, int offset)
194 tcg_gen_st_i32(var, cpu_env, offset);
195 tcg_temp_free_i32(var);
198 #define store_cpu_field(var, name) \
199 store_cpu_offset(var, offsetof(CPUARMState, name))
201 /* Set a variable to the value of a CPU register. */
202 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
206 /* normally, since we updated PC, we need only to add one insn */
208 addr = (long)s->pc + 2;
210 addr = (long)s->pc + 4;
211 tcg_gen_movi_i32(var, addr);
213 tcg_gen_mov_i32(var, cpu_R[reg]);
217 /* Create a new temporary and set it to the value of a CPU register. */
218 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
220 TCGv_i32 tmp = tcg_temp_new_i32();
221 load_reg_var(s, tmp, reg);
225 /* Set a CPU register. The source must be a temporary and will be
227 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
230 /* In Thumb mode, we must ignore bit 0.
231 * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
232 * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
233 * We choose to ignore [1:0] in ARM mode for all architecture versions.
235 tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
236 s->base.is_jmp = DISAS_JUMP;
238 tcg_gen_mov_i32(cpu_R[reg], var);
239 tcg_temp_free_i32(var);
242 /* Value extensions. */
243 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
244 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
245 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
246 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
248 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
249 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
252 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
254 TCGv_i32 tmp_mask = tcg_const_i32(mask);
255 gen_helper_cpsr_write(cpu_env, var, tmp_mask);
256 tcg_temp_free_i32(tmp_mask);
258 /* Set NZCV flags from the high 4 bits of var. */
259 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
261 static void gen_exception_internal(int excp)
263 TCGv_i32 tcg_excp = tcg_const_i32(excp);
265 assert(excp_is_internal(excp));
266 gen_helper_exception_internal(cpu_env, tcg_excp);
267 tcg_temp_free_i32(tcg_excp);
270 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
272 TCGv_i32 tcg_excp = tcg_const_i32(excp);
273 TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
274 TCGv_i32 tcg_el = tcg_const_i32(target_el);
276 gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
279 tcg_temp_free_i32(tcg_el);
280 tcg_temp_free_i32(tcg_syn);
281 tcg_temp_free_i32(tcg_excp);
284 static void gen_ss_advance(DisasContext *s)
286 /* If the singlestep state is Active-not-pending, advance to
291 gen_helper_clear_pstate_ss(cpu_env);
295 static void gen_step_complete_exception(DisasContext *s)
297 /* We just completed step of an insn. Move from Active-not-pending
298 * to Active-pending, and then also take the swstep exception.
299 * This corresponds to making the (IMPDEF) choice to prioritize
300 * swstep exceptions over asynchronous exceptions taken to an exception
301 * level where debug is disabled. This choice has the advantage that
302 * we do not need to maintain internal state corresponding to the
303 * ISV/EX syndrome bits between completion of the step and generation
304 * of the exception, and our syndrome information is always correct.
307 gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
308 default_exception_el(s));
309 s->base.is_jmp = DISAS_NORETURN;
312 static void gen_singlestep_exception(DisasContext *s)
314 /* Generate the right kind of exception for singlestep, which is
315 * either the architectural singlestep or EXCP_DEBUG for QEMU's
316 * gdb singlestepping.
319 gen_step_complete_exception(s);
321 gen_exception_internal(EXCP_DEBUG);
325 static inline bool is_singlestepping(DisasContext *s)
327 /* Return true if we are singlestepping either because of
328 * architectural singlestep or QEMU gdbstub singlestep. This does
329 * not include the command line '-singlestep' mode which is rather
330 * misnamed as it only means "one instruction per TB" and doesn't
331 * affect the code we generate.
333 return s->base.singlestep_enabled || s->ss_active;
336 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
338 TCGv_i32 tmp1 = tcg_temp_new_i32();
339 TCGv_i32 tmp2 = tcg_temp_new_i32();
340 tcg_gen_ext16s_i32(tmp1, a);
341 tcg_gen_ext16s_i32(tmp2, b);
342 tcg_gen_mul_i32(tmp1, tmp1, tmp2);
343 tcg_temp_free_i32(tmp2);
344 tcg_gen_sari_i32(a, a, 16);
345 tcg_gen_sari_i32(b, b, 16);
346 tcg_gen_mul_i32(b, b, a);
347 tcg_gen_mov_i32(a, tmp1);
348 tcg_temp_free_i32(tmp1);
351 /* Byteswap each halfword. */
352 static void gen_rev16(TCGv_i32 var)
354 TCGv_i32 tmp = tcg_temp_new_i32();
355 TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
356 tcg_gen_shri_i32(tmp, var, 8);
357 tcg_gen_and_i32(tmp, tmp, mask);
358 tcg_gen_and_i32(var, var, mask);
359 tcg_gen_shli_i32(var, var, 8);
360 tcg_gen_or_i32(var, var, tmp);
361 tcg_temp_free_i32(mask);
362 tcg_temp_free_i32(tmp);
365 /* Byteswap low halfword and sign extend. */
366 static void gen_revsh(TCGv_i32 var)
368 tcg_gen_ext16u_i32(var, var);
369 tcg_gen_bswap16_i32(var, var);
370 tcg_gen_ext16s_i32(var, var);
373 /* Return (b << 32) + a. Mark inputs as dead */
374 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv_i32 b)
376 TCGv_i64 tmp64 = tcg_temp_new_i64();
378 tcg_gen_extu_i32_i64(tmp64, b);
379 tcg_temp_free_i32(b);
380 tcg_gen_shli_i64(tmp64, tmp64, 32);
381 tcg_gen_add_i64(a, tmp64, a);
383 tcg_temp_free_i64(tmp64);
387 /* Return (b << 32) - a. Mark inputs as dead. */
388 static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv_i32 b)
390 TCGv_i64 tmp64 = tcg_temp_new_i64();
392 tcg_gen_extu_i32_i64(tmp64, b);
393 tcg_temp_free_i32(b);
394 tcg_gen_shli_i64(tmp64, tmp64, 32);
395 tcg_gen_sub_i64(a, tmp64, a);
397 tcg_temp_free_i64(tmp64);
401 /* 32x32->64 multiply. Marks inputs as dead. */
402 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
404 TCGv_i32 lo = tcg_temp_new_i32();
405 TCGv_i32 hi = tcg_temp_new_i32();
408 tcg_gen_mulu2_i32(lo, hi, a, b);
409 tcg_temp_free_i32(a);
410 tcg_temp_free_i32(b);
412 ret = tcg_temp_new_i64();
413 tcg_gen_concat_i32_i64(ret, lo, hi);
414 tcg_temp_free_i32(lo);
415 tcg_temp_free_i32(hi);
420 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
422 TCGv_i32 lo = tcg_temp_new_i32();
423 TCGv_i32 hi = tcg_temp_new_i32();
426 tcg_gen_muls2_i32(lo, hi, a, b);
427 tcg_temp_free_i32(a);
428 tcg_temp_free_i32(b);
430 ret = tcg_temp_new_i64();
431 tcg_gen_concat_i32_i64(ret, lo, hi);
432 tcg_temp_free_i32(lo);
433 tcg_temp_free_i32(hi);
438 /* Swap low and high halfwords. */
439 static void gen_swap_half(TCGv_i32 var)
441 TCGv_i32 tmp = tcg_temp_new_i32();
442 tcg_gen_shri_i32(tmp, var, 16);
443 tcg_gen_shli_i32(var, var, 16);
444 tcg_gen_or_i32(var, var, tmp);
445 tcg_temp_free_i32(tmp);
448 /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead.
449 tmp = (t0 ^ t1) & 0x8000;
452 t0 = (t0 + t1) ^ tmp;
455 static void gen_add16(TCGv_i32 t0, TCGv_i32 t1)
457 TCGv_i32 tmp = tcg_temp_new_i32();
458 tcg_gen_xor_i32(tmp, t0, t1);
459 tcg_gen_andi_i32(tmp, tmp, 0x8000);
460 tcg_gen_andi_i32(t0, t0, ~0x8000);
461 tcg_gen_andi_i32(t1, t1, ~0x8000);
462 tcg_gen_add_i32(t0, t0, t1);
463 tcg_gen_xor_i32(t0, t0, tmp);
464 tcg_temp_free_i32(tmp);
465 tcg_temp_free_i32(t1);
468 /* Set CF to the top bit of var. */
469 static void gen_set_CF_bit31(TCGv_i32 var)
471 tcg_gen_shri_i32(cpu_CF, var, 31);
474 /* Set N and Z flags from var. */
475 static inline void gen_logic_CC(TCGv_i32 var)
477 tcg_gen_mov_i32(cpu_NF, var);
478 tcg_gen_mov_i32(cpu_ZF, var);
482 static void gen_adc(TCGv_i32 t0, TCGv_i32 t1)
484 tcg_gen_add_i32(t0, t0, t1);
485 tcg_gen_add_i32(t0, t0, cpu_CF);
488 /* dest = T0 + T1 + CF. */
489 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
491 tcg_gen_add_i32(dest, t0, t1);
492 tcg_gen_add_i32(dest, dest, cpu_CF);
495 /* dest = T0 - T1 + CF - 1. */
496 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
498 tcg_gen_sub_i32(dest, t0, t1);
499 tcg_gen_add_i32(dest, dest, cpu_CF);
500 tcg_gen_subi_i32(dest, dest, 1);
503 /* dest = T0 + T1. Compute C, N, V and Z flags */
504 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
506 TCGv_i32 tmp = tcg_temp_new_i32();
507 tcg_gen_movi_i32(tmp, 0);
508 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
509 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
510 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
511 tcg_gen_xor_i32(tmp, t0, t1);
512 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
513 tcg_temp_free_i32(tmp);
514 tcg_gen_mov_i32(dest, cpu_NF);
517 /* dest = T0 + T1 + CF. Compute C, N, V and Z flags */
518 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
520 TCGv_i32 tmp = tcg_temp_new_i32();
521 if (TCG_TARGET_HAS_add2_i32) {
522 tcg_gen_movi_i32(tmp, 0);
523 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
524 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
526 TCGv_i64 q0 = tcg_temp_new_i64();
527 TCGv_i64 q1 = tcg_temp_new_i64();
528 tcg_gen_extu_i32_i64(q0, t0);
529 tcg_gen_extu_i32_i64(q1, t1);
530 tcg_gen_add_i64(q0, q0, q1);
531 tcg_gen_extu_i32_i64(q1, cpu_CF);
532 tcg_gen_add_i64(q0, q0, q1);
533 tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
534 tcg_temp_free_i64(q0);
535 tcg_temp_free_i64(q1);
537 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
538 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
539 tcg_gen_xor_i32(tmp, t0, t1);
540 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
541 tcg_temp_free_i32(tmp);
542 tcg_gen_mov_i32(dest, cpu_NF);
545 /* dest = T0 - T1. Compute C, N, V and Z flags */
546 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
549 tcg_gen_sub_i32(cpu_NF, t0, t1);
550 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
551 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
552 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
553 tmp = tcg_temp_new_i32();
554 tcg_gen_xor_i32(tmp, t0, t1);
555 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
556 tcg_temp_free_i32(tmp);
557 tcg_gen_mov_i32(dest, cpu_NF);
560 /* dest = T0 + ~T1 + CF. Compute C, N, V and Z flags */
561 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
563 TCGv_i32 tmp = tcg_temp_new_i32();
564 tcg_gen_not_i32(tmp, t1);
565 gen_adc_CC(dest, t0, tmp);
566 tcg_temp_free_i32(tmp);
569 #define GEN_SHIFT(name) \
570 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
572 TCGv_i32 tmp1, tmp2, tmp3; \
573 tmp1 = tcg_temp_new_i32(); \
574 tcg_gen_andi_i32(tmp1, t1, 0xff); \
575 tmp2 = tcg_const_i32(0); \
576 tmp3 = tcg_const_i32(0x1f); \
577 tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
578 tcg_temp_free_i32(tmp3); \
579 tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
580 tcg_gen_##name##_i32(dest, tmp2, tmp1); \
581 tcg_temp_free_i32(tmp2); \
582 tcg_temp_free_i32(tmp1); \
588 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
591 tmp1 = tcg_temp_new_i32();
592 tcg_gen_andi_i32(tmp1, t1, 0xff);
593 tmp2 = tcg_const_i32(0x1f);
594 tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
595 tcg_temp_free_i32(tmp2);
596 tcg_gen_sar_i32(dest, t0, tmp1);
597 tcg_temp_free_i32(tmp1);
600 static void tcg_gen_abs_i32(TCGv_i32 dest, TCGv_i32 src)
602 TCGv_i32 c0 = tcg_const_i32(0);
603 TCGv_i32 tmp = tcg_temp_new_i32();
604 tcg_gen_neg_i32(tmp, src);
605 tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp);
606 tcg_temp_free_i32(c0);
607 tcg_temp_free_i32(tmp);
610 static void shifter_out_im(TCGv_i32 var, int shift)
613 tcg_gen_andi_i32(cpu_CF, var, 1);
615 tcg_gen_shri_i32(cpu_CF, var, shift);
617 tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
622 /* Shift by immediate. Includes special handling for shift == 0. */
623 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
624 int shift, int flags)
630 shifter_out_im(var, 32 - shift);
631 tcg_gen_shli_i32(var, var, shift);
637 tcg_gen_shri_i32(cpu_CF, var, 31);
639 tcg_gen_movi_i32(var, 0);
642 shifter_out_im(var, shift - 1);
643 tcg_gen_shri_i32(var, var, shift);
650 shifter_out_im(var, shift - 1);
653 tcg_gen_sari_i32(var, var, shift);
655 case 3: /* ROR/RRX */
658 shifter_out_im(var, shift - 1);
659 tcg_gen_rotri_i32(var, var, shift); break;
661 TCGv_i32 tmp = tcg_temp_new_i32();
662 tcg_gen_shli_i32(tmp, cpu_CF, 31);
664 shifter_out_im(var, 0);
665 tcg_gen_shri_i32(var, var, 1);
666 tcg_gen_or_i32(var, var, tmp);
667 tcg_temp_free_i32(tmp);
672 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
673 TCGv_i32 shift, int flags)
677 case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
678 case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
679 case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
680 case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
685 gen_shl(var, var, shift);
688 gen_shr(var, var, shift);
691 gen_sar(var, var, shift);
693 case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
694 tcg_gen_rotr_i32(var, var, shift); break;
697 tcg_temp_free_i32(shift);
700 #define PAS_OP(pfx) \
702 case 0: gen_pas_helper(glue(pfx,add16)); break; \
703 case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
704 case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
705 case 3: gen_pas_helper(glue(pfx,sub16)); break; \
706 case 4: gen_pas_helper(glue(pfx,add8)); break; \
707 case 7: gen_pas_helper(glue(pfx,sub8)); break; \
709 static void gen_arm_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
714 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
716 tmp = tcg_temp_new_ptr();
717 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
719 tcg_temp_free_ptr(tmp);
722 tmp = tcg_temp_new_ptr();
723 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
725 tcg_temp_free_ptr(tmp);
727 #undef gen_pas_helper
728 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
741 #undef gen_pas_helper
746 /* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings. */
747 #define PAS_OP(pfx) \
749 case 0: gen_pas_helper(glue(pfx,add8)); break; \
750 case 1: gen_pas_helper(glue(pfx,add16)); break; \
751 case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
752 case 4: gen_pas_helper(glue(pfx,sub8)); break; \
753 case 5: gen_pas_helper(glue(pfx,sub16)); break; \
754 case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
756 static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv_i32 a, TCGv_i32 b)
761 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
763 tmp = tcg_temp_new_ptr();
764 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
766 tcg_temp_free_ptr(tmp);
769 tmp = tcg_temp_new_ptr();
770 tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
772 tcg_temp_free_ptr(tmp);
774 #undef gen_pas_helper
775 #define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
788 #undef gen_pas_helper
794 * Generate a conditional based on ARM condition code cc.
795 * This is common between ARM and Aarch64 targets.
797 void arm_test_cc(DisasCompare *cmp, int cc)
828 case 8: /* hi: C && !Z */
829 case 9: /* ls: !C || Z -> !(C && !Z) */
831 value = tcg_temp_new_i32();
833 /* CF is 1 for C, so -CF is an all-bits-set mask for C;
834 ZF is non-zero for !Z; so AND the two subexpressions. */
835 tcg_gen_neg_i32(value, cpu_CF);
836 tcg_gen_and_i32(value, value, cpu_ZF);
839 case 10: /* ge: N == V -> N ^ V == 0 */
840 case 11: /* lt: N != V -> N ^ V != 0 */
841 /* Since we're only interested in the sign bit, == 0 is >= 0. */
843 value = tcg_temp_new_i32();
845 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
848 case 12: /* gt: !Z && N == V */
849 case 13: /* le: Z || N != V */
851 value = tcg_temp_new_i32();
853 /* (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate
854 * the sign bit then AND with ZF to yield the result. */
855 tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
856 tcg_gen_sari_i32(value, value, 31);
857 tcg_gen_andc_i32(value, cpu_ZF, value);
860 case 14: /* always */
861 case 15: /* always */
862 /* Use the ALWAYS condition, which will fold early.
863 * It doesn't matter what we use for the value. */
864 cond = TCG_COND_ALWAYS;
869 fprintf(stderr, "Bad condition code 0x%x\n", cc);
874 cond = tcg_invert_cond(cond);
880 cmp->value_global = global;
883 void arm_free_cc(DisasCompare *cmp)
885 if (!cmp->value_global) {
886 tcg_temp_free_i32(cmp->value);
890 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
892 tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
895 void arm_gen_test_cc(int cc, TCGLabel *label)
898 arm_test_cc(&cmp, cc);
899 arm_jump_cc(&cmp, label);
903 static const uint8_t table_logic_cc[16] = {
922 static inline void gen_set_condexec(DisasContext *s)
924 if (s->condexec_mask) {
925 uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
926 TCGv_i32 tmp = tcg_temp_new_i32();
927 tcg_gen_movi_i32(tmp, val);
928 store_cpu_field(tmp, condexec_bits);
932 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
934 tcg_gen_movi_i32(cpu_R[15], val);
937 /* Set PC and Thumb state from an immediate address. */
938 static inline void gen_bx_im(DisasContext *s, uint32_t addr)
942 s->base.is_jmp = DISAS_JUMP;
943 if (s->thumb != (addr & 1)) {
944 tmp = tcg_temp_new_i32();
945 tcg_gen_movi_i32(tmp, addr & 1);
946 tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
947 tcg_temp_free_i32(tmp);
949 tcg_gen_movi_i32(cpu_R[15], addr & ~1);
952 /* Set PC and Thumb state from var. var is marked as dead. */
953 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
955 s->base.is_jmp = DISAS_JUMP;
956 tcg_gen_andi_i32(cpu_R[15], var, ~1);
957 tcg_gen_andi_i32(var, var, 1);
958 store_cpu_field(var, thumb);
961 /* Set PC and Thumb state from var. var is marked as dead.
962 * For M-profile CPUs, include logic to detect exception-return
963 * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
964 * and BX reg, and no others, and happens only for code in Handler mode.
966 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
968 /* Generate the same code here as for a simple bx, but flag via
969 * s->base.is_jmp that we need to do the rest of the work later.
972 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
973 (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
974 s->base.is_jmp = DISAS_BX_EXCRET;
978 static inline void gen_bx_excret_final_code(DisasContext *s)
980 /* Generate the code to finish possible exception return and end the TB */
981 TCGLabel *excret_label = gen_new_label();
984 if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
985 /* Covers FNC_RETURN and EXC_RETURN magic */
986 min_magic = FNC_RETURN_MIN_MAGIC;
988 /* EXC_RETURN magic only */
989 min_magic = EXC_RETURN_MIN_MAGIC;
992 /* Is the new PC value in the magic range indicating exception return? */
993 tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
994 /* No: end the TB as we would for a DISAS_JMP */
995 if (is_singlestepping(s)) {
996 gen_singlestep_exception(s);
1000 gen_set_label(excret_label);
1001 /* Yes: this is an exception return.
1002 * At this point in runtime env->regs[15] and env->thumb will hold
1003 * the exception-return magic number, which do_v7m_exception_exit()
1004 * will read. Nothing else will be able to see those values because
1005 * the cpu-exec main loop guarantees that we will always go straight
1006 * from raising the exception to the exception-handling code.
1008 * gen_ss_advance(s) does nothing on M profile currently but
1009 * calling it is conceptually the right thing as we have executed
1010 * this instruction (compare SWI, HVC, SMC handling).
1013 gen_exception_internal(EXCP_EXCEPTION_EXIT);
1016 static inline void gen_bxns(DisasContext *s, int rm)
1018 TCGv_i32 var = load_reg(s, rm);
1020 /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
1021 * we need to sync state before calling it, but:
1022 * - we don't need to do gen_set_pc_im() because the bxns helper will
1023 * always set the PC itself
1024 * - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
1025 * unless it's outside an IT block or the last insn in an IT block,
1026 * so we know that condexec == 0 (already set at the top of the TB)
1027 * is correct in the non-UNPREDICTABLE cases, and we can choose
1028 * "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
1030 gen_helper_v7m_bxns(cpu_env, var);
1031 tcg_temp_free_i32(var);
1032 s->base.is_jmp = DISAS_EXIT;
1035 static inline void gen_blxns(DisasContext *s, int rm)
1037 TCGv_i32 var = load_reg(s, rm);
1039 /* We don't need to sync condexec state, for the same reason as bxns.
1040 * We do however need to set the PC, because the blxns helper reads it.
1041 * The blxns helper may throw an exception.
1043 gen_set_pc_im(s, s->pc);
1044 gen_helper_v7m_blxns(cpu_env, var);
1045 tcg_temp_free_i32(var);
1046 s->base.is_jmp = DISAS_EXIT;
1049 /* Variant of store_reg which uses branch&exchange logic when storing
1050 to r15 in ARM architecture v7 and above. The source must be a temporary
1051 and will be marked as dead. */
1052 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
1054 if (reg == 15 && ENABLE_ARCH_7) {
1057 store_reg(s, reg, var);
1061 /* Variant of store_reg which uses branch&exchange logic when storing
1062 * to r15 in ARM architecture v5T and above. This is used for storing
1063 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
1064 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
1065 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
1067 if (reg == 15 && ENABLE_ARCH_5) {
1068 gen_bx_excret(s, var);
1070 store_reg(s, reg, var);
1074 #ifdef CONFIG_USER_ONLY
1075 #define IS_USER_ONLY 1
1077 #define IS_USER_ONLY 0
1080 /* Abstractions of "generate code to do a guest load/store for
1081 * AArch32", where a vaddr is always 32 bits (and is zero
1082 * extended if we're a 64 bit core) and data is also
1083 * 32 bits unless specifically doing a 64 bit access.
1084 * These functions work like tcg_gen_qemu_{ld,st}* except
1085 * that the address argument is TCGv_i32 rather than TCGv.
1088 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, TCGMemOp op)
1090 TCGv addr = tcg_temp_new();
1091 tcg_gen_extu_i32_tl(addr, a32);
1093 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1094 if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
1095 tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
1100 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1101 int index, TCGMemOp opc)
1103 TCGv addr = gen_aa32_addr(s, a32, opc);
1104 tcg_gen_qemu_ld_i32(val, addr, index, opc);
1105 tcg_temp_free(addr);
1108 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
1109 int index, TCGMemOp opc)
1111 TCGv addr = gen_aa32_addr(s, a32, opc);
1112 tcg_gen_qemu_st_i32(val, addr, index, opc);
1113 tcg_temp_free(addr);
1116 #define DO_GEN_LD(SUFF, OPC) \
1117 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
1118 TCGv_i32 a32, int index) \
1120 gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data); \
1122 static inline void gen_aa32_ld##SUFF##_iss(DisasContext *s, \
1124 TCGv_i32 a32, int index, \
1127 gen_aa32_ld##SUFF(s, val, a32, index); \
1128 disas_set_da_iss(s, OPC, issinfo); \
1131 #define DO_GEN_ST(SUFF, OPC) \
1132 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
1133 TCGv_i32 a32, int index) \
1135 gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data); \
1137 static inline void gen_aa32_st##SUFF##_iss(DisasContext *s, \
1139 TCGv_i32 a32, int index, \
1142 gen_aa32_st##SUFF(s, val, a32, index); \
1143 disas_set_da_iss(s, OPC, issinfo | ISSIsWrite); \
1146 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
1148 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1149 if (!IS_USER_ONLY && s->sctlr_b) {
1150 tcg_gen_rotri_i64(val, val, 32);
1154 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1155 int index, TCGMemOp opc)
1157 TCGv addr = gen_aa32_addr(s, a32, opc);
1158 tcg_gen_qemu_ld_i64(val, addr, index, opc);
1159 gen_aa32_frob64(s, val);
1160 tcg_temp_free(addr);
1163 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
1164 TCGv_i32 a32, int index)
1166 gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1169 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1170 int index, TCGMemOp opc)
1172 TCGv addr = gen_aa32_addr(s, a32, opc);
1174 /* Not needed for user-mode BE32, where we use MO_BE instead. */
1175 if (!IS_USER_ONLY && s->sctlr_b) {
1176 TCGv_i64 tmp = tcg_temp_new_i64();
1177 tcg_gen_rotri_i64(tmp, val, 32);
1178 tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1179 tcg_temp_free_i64(tmp);
1181 tcg_gen_qemu_st_i64(val, addr, index, opc);
1183 tcg_temp_free(addr);
1186 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1187 TCGv_i32 a32, int index)
1189 gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1192 DO_GEN_LD(8s, MO_SB)
1193 DO_GEN_LD(8u, MO_UB)
1194 DO_GEN_LD(16s, MO_SW)
1195 DO_GEN_LD(16u, MO_UW)
1196 DO_GEN_LD(32u, MO_UL)
1198 DO_GEN_ST(16, MO_UW)
1199 DO_GEN_ST(32, MO_UL)
1201 static inline void gen_hvc(DisasContext *s, int imm16)
1203 /* The pre HVC helper handles cases when HVC gets trapped
1204 * as an undefined insn by runtime configuration (ie before
1205 * the insn really executes).
1207 gen_set_pc_im(s, s->pc - 4);
1208 gen_helper_pre_hvc(cpu_env);
1209 /* Otherwise we will treat this as a real exception which
1210 * happens after execution of the insn. (The distinction matters
1211 * for the PC value reported to the exception handler and also
1212 * for single stepping.)
1215 gen_set_pc_im(s, s->pc);
1216 s->base.is_jmp = DISAS_HVC;
1219 static inline void gen_smc(DisasContext *s)
1221 /* As with HVC, we may take an exception either before or after
1222 * the insn executes.
1226 gen_set_pc_im(s, s->pc - 4);
1227 tmp = tcg_const_i32(syn_aa32_smc());
1228 gen_helper_pre_smc(cpu_env, tmp);
1229 tcg_temp_free_i32(tmp);
1230 gen_set_pc_im(s, s->pc);
1231 s->base.is_jmp = DISAS_SMC;
1234 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
1236 gen_set_condexec(s);
1237 gen_set_pc_im(s, s->pc - offset);
1238 gen_exception_internal(excp);
1239 s->base.is_jmp = DISAS_NORETURN;
1242 static void gen_exception_insn(DisasContext *s, int offset, int excp,
1243 int syn, uint32_t target_el)
1245 gen_set_condexec(s);
1246 gen_set_pc_im(s, s->pc - offset);
1247 gen_exception(excp, syn, target_el);
1248 s->base.is_jmp = DISAS_NORETURN;
1251 static void gen_exception_bkpt_insn(DisasContext *s, int offset, uint32_t syn)
1255 gen_set_condexec(s);
1256 gen_set_pc_im(s, s->pc - offset);
1257 tcg_syn = tcg_const_i32(syn);
1258 gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1259 tcg_temp_free_i32(tcg_syn);
1260 s->base.is_jmp = DISAS_NORETURN;
1263 /* Force a TB lookup after an instruction that changes the CPU state. */
1264 static inline void gen_lookup_tb(DisasContext *s)
1266 tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
1267 s->base.is_jmp = DISAS_EXIT;
1270 static inline void gen_hlt(DisasContext *s, int imm)
1272 /* HLT. This has two purposes.
1273 * Architecturally, it is an external halting debug instruction.
1274 * Since QEMU doesn't implement external debug, we treat this as
1275 * it is required for halting debug disabled: it will UNDEF.
1276 * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1277 * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1278 * must trigger semihosting even for ARMv7 and earlier, where
1279 * HLT was an undefined encoding.
1280 * In system mode, we don't allow userspace access to
1281 * semihosting, to provide some semblance of security
1282 * (and for consistency with our 32-bit semihosting).
1284 if (semihosting_enabled() &&
1285 #ifndef CONFIG_USER_ONLY
1286 s->current_el != 0 &&
1288 (imm == (s->thumb ? 0x3c : 0xf000))) {
1289 gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1293 gen_exception_insn(s, s->thumb ? 2 : 4, EXCP_UDEF, syn_uncategorized(),
1294 default_exception_el(s));
1297 static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
1300 int val, rm, shift, shiftop;
1303 if (!(insn & (1 << 25))) {
1306 if (!(insn & (1 << 23)))
1309 tcg_gen_addi_i32(var, var, val);
1311 /* shift/register */
1313 shift = (insn >> 7) & 0x1f;
1314 shiftop = (insn >> 5) & 3;
1315 offset = load_reg(s, rm);
1316 gen_arm_shift_im(offset, shiftop, shift, 0);
1317 if (!(insn & (1 << 23)))
1318 tcg_gen_sub_i32(var, var, offset);
1320 tcg_gen_add_i32(var, var, offset);
1321 tcg_temp_free_i32(offset);
1325 static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
1326 int extra, TCGv_i32 var)
1331 if (insn & (1 << 22)) {
1333 val = (insn & 0xf) | ((insn >> 4) & 0xf0);
1334 if (!(insn & (1 << 23)))
1338 tcg_gen_addi_i32(var, var, val);
1342 tcg_gen_addi_i32(var, var, extra);
1344 offset = load_reg(s, rm);
1345 if (!(insn & (1 << 23)))
1346 tcg_gen_sub_i32(var, var, offset);
1348 tcg_gen_add_i32(var, var, offset);
1349 tcg_temp_free_i32(offset);
1353 static TCGv_ptr get_fpstatus_ptr(int neon)
1355 TCGv_ptr statusptr = tcg_temp_new_ptr();
1358 offset = offsetof(CPUARMState, vfp.standard_fp_status);
1360 offset = offsetof(CPUARMState, vfp.fp_status);
1362 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1366 #define VFP_OP2(name) \
1367 static inline void gen_vfp_##name(int dp) \
1369 TCGv_ptr fpst = get_fpstatus_ptr(0); \
1371 gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst); \
1373 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst); \
1375 tcg_temp_free_ptr(fpst); \
1385 static inline void gen_vfp_F1_mul(int dp)
1387 /* Like gen_vfp_mul() but put result in F1 */
1388 TCGv_ptr fpst = get_fpstatus_ptr(0);
1390 gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
1392 gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
1394 tcg_temp_free_ptr(fpst);
1397 static inline void gen_vfp_F1_neg(int dp)
1399 /* Like gen_vfp_neg() but put result in F1 */
1401 gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
1403 gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
1407 static inline void gen_vfp_abs(int dp)
1410 gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
1412 gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
1415 static inline void gen_vfp_neg(int dp)
1418 gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
1420 gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
1423 static inline void gen_vfp_sqrt(int dp)
1426 gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1428 gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1431 static inline void gen_vfp_cmp(int dp)
1434 gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1436 gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1439 static inline void gen_vfp_cmpe(int dp)
1442 gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1444 gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1447 static inline void gen_vfp_F1_ld0(int dp)
1450 tcg_gen_movi_i64(cpu_F1d, 0);
1452 tcg_gen_movi_i32(cpu_F1s, 0);
1455 #define VFP_GEN_ITOF(name) \
1456 static inline void gen_vfp_##name(int dp, int neon) \
1458 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1460 gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
1462 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1464 tcg_temp_free_ptr(statusptr); \
1471 #define VFP_GEN_FTOI(name) \
1472 static inline void gen_vfp_##name(int dp, int neon) \
1474 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1476 gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
1478 gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1480 tcg_temp_free_ptr(statusptr); \
1489 #define VFP_GEN_FIX(name, round) \
1490 static inline void gen_vfp_##name(int dp, int shift, int neon) \
1492 TCGv_i32 tmp_shift = tcg_const_i32(shift); \
1493 TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1495 gen_helper_vfp_##name##d##round(cpu_F0d, cpu_F0d, tmp_shift, \
1498 gen_helper_vfp_##name##s##round(cpu_F0s, cpu_F0s, tmp_shift, \
1501 tcg_temp_free_i32(tmp_shift); \
1502 tcg_temp_free_ptr(statusptr); \
1504 VFP_GEN_FIX(tosh, _round_to_zero)
1505 VFP_GEN_FIX(tosl, _round_to_zero)
1506 VFP_GEN_FIX(touh, _round_to_zero)
1507 VFP_GEN_FIX(toul, _round_to_zero)
1514 static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv_i32 addr)
1517 gen_aa32_ld64(s, cpu_F0d, addr, get_mem_index(s));
1519 gen_aa32_ld32u(s, cpu_F0s, addr, get_mem_index(s));
1523 static inline void gen_vfp_st(DisasContext *s, int dp, TCGv_i32 addr)
1526 gen_aa32_st64(s, cpu_F0d, addr, get_mem_index(s));
1528 gen_aa32_st32(s, cpu_F0s, addr, get_mem_index(s));
1532 static inline long vfp_reg_offset(bool dp, unsigned reg)
1535 return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1537 long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1539 ofs += offsetof(CPU_DoubleU, l.upper);
1541 ofs += offsetof(CPU_DoubleU, l.lower);
1547 /* Return the offset of a 32-bit piece of a NEON register.
1548 zero is the least significant end of the register. */
1550 neon_reg_offset (int reg, int n)
1554 return vfp_reg_offset(0, sreg);
1557 static TCGv_i32 neon_load_reg(int reg, int pass)
1559 TCGv_i32 tmp = tcg_temp_new_i32();
1560 tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1564 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1566 tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1567 tcg_temp_free_i32(var);
1570 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1572 tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1575 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1577 tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1580 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1582 TCGv_ptr ret = tcg_temp_new_ptr();
1583 tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1587 #define tcg_gen_ld_f32 tcg_gen_ld_i32
1588 #define tcg_gen_ld_f64 tcg_gen_ld_i64
1589 #define tcg_gen_st_f32 tcg_gen_st_i32
1590 #define tcg_gen_st_f64 tcg_gen_st_i64
1592 static inline void gen_mov_F0_vreg(int dp, int reg)
1595 tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1597 tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1600 static inline void gen_mov_F1_vreg(int dp, int reg)
1603 tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1605 tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1608 static inline void gen_mov_vreg_F0(int dp, int reg)
1611 tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1613 tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1616 #define ARM_CP_RW_BIT (1 << 20)
1618 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1620 tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1623 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1625 tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1628 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1630 TCGv_i32 var = tcg_temp_new_i32();
1631 tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1635 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1637 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1638 tcg_temp_free_i32(var);
1641 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1643 iwmmxt_store_reg(cpu_M0, rn);
1646 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1648 iwmmxt_load_reg(cpu_M0, rn);
1651 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1653 iwmmxt_load_reg(cpu_V1, rn);
1654 tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1657 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1659 iwmmxt_load_reg(cpu_V1, rn);
1660 tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1663 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1665 iwmmxt_load_reg(cpu_V1, rn);
1666 tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1669 #define IWMMXT_OP(name) \
1670 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1672 iwmmxt_load_reg(cpu_V1, rn); \
1673 gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1676 #define IWMMXT_OP_ENV(name) \
1677 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1679 iwmmxt_load_reg(cpu_V1, rn); \
1680 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1683 #define IWMMXT_OP_ENV_SIZE(name) \
1684 IWMMXT_OP_ENV(name##b) \
1685 IWMMXT_OP_ENV(name##w) \
1686 IWMMXT_OP_ENV(name##l)
1688 #define IWMMXT_OP_ENV1(name) \
1689 static inline void gen_op_iwmmxt_##name##_M0(void) \
1691 gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1705 IWMMXT_OP_ENV_SIZE(unpackl)
1706 IWMMXT_OP_ENV_SIZE(unpackh)
1708 IWMMXT_OP_ENV1(unpacklub)
1709 IWMMXT_OP_ENV1(unpackluw)
1710 IWMMXT_OP_ENV1(unpacklul)
1711 IWMMXT_OP_ENV1(unpackhub)
1712 IWMMXT_OP_ENV1(unpackhuw)
1713 IWMMXT_OP_ENV1(unpackhul)
1714 IWMMXT_OP_ENV1(unpacklsb)
1715 IWMMXT_OP_ENV1(unpacklsw)
1716 IWMMXT_OP_ENV1(unpacklsl)
1717 IWMMXT_OP_ENV1(unpackhsb)
1718 IWMMXT_OP_ENV1(unpackhsw)
1719 IWMMXT_OP_ENV1(unpackhsl)
1721 IWMMXT_OP_ENV_SIZE(cmpeq)
1722 IWMMXT_OP_ENV_SIZE(cmpgtu)
1723 IWMMXT_OP_ENV_SIZE(cmpgts)
1725 IWMMXT_OP_ENV_SIZE(mins)
1726 IWMMXT_OP_ENV_SIZE(minu)
1727 IWMMXT_OP_ENV_SIZE(maxs)
1728 IWMMXT_OP_ENV_SIZE(maxu)
1730 IWMMXT_OP_ENV_SIZE(subn)
1731 IWMMXT_OP_ENV_SIZE(addn)
1732 IWMMXT_OP_ENV_SIZE(subu)
1733 IWMMXT_OP_ENV_SIZE(addu)
1734 IWMMXT_OP_ENV_SIZE(subs)
1735 IWMMXT_OP_ENV_SIZE(adds)
1737 IWMMXT_OP_ENV(avgb0)
1738 IWMMXT_OP_ENV(avgb1)
1739 IWMMXT_OP_ENV(avgw0)
1740 IWMMXT_OP_ENV(avgw1)
1742 IWMMXT_OP_ENV(packuw)
1743 IWMMXT_OP_ENV(packul)
1744 IWMMXT_OP_ENV(packuq)
1745 IWMMXT_OP_ENV(packsw)
1746 IWMMXT_OP_ENV(packsl)
1747 IWMMXT_OP_ENV(packsq)
1749 static void gen_op_iwmmxt_set_mup(void)
1752 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1753 tcg_gen_ori_i32(tmp, tmp, 2);
1754 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1757 static void gen_op_iwmmxt_set_cup(void)
1760 tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1761 tcg_gen_ori_i32(tmp, tmp, 1);
1762 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1765 static void gen_op_iwmmxt_setpsr_nz(void)
1767 TCGv_i32 tmp = tcg_temp_new_i32();
1768 gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1769 store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1772 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1774 iwmmxt_load_reg(cpu_V1, rn);
1775 tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1776 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1779 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1786 rd = (insn >> 16) & 0xf;
1787 tmp = load_reg(s, rd);
1789 offset = (insn & 0xff) << ((insn >> 7) & 2);
1790 if (insn & (1 << 24)) {
1792 if (insn & (1 << 23))
1793 tcg_gen_addi_i32(tmp, tmp, offset);
1795 tcg_gen_addi_i32(tmp, tmp, -offset);
1796 tcg_gen_mov_i32(dest, tmp);
1797 if (insn & (1 << 21))
1798 store_reg(s, rd, tmp);
1800 tcg_temp_free_i32(tmp);
1801 } else if (insn & (1 << 21)) {
1803 tcg_gen_mov_i32(dest, tmp);
1804 if (insn & (1 << 23))
1805 tcg_gen_addi_i32(tmp, tmp, offset);
1807 tcg_gen_addi_i32(tmp, tmp, -offset);
1808 store_reg(s, rd, tmp);
1809 } else if (!(insn & (1 << 23)))
1814 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1816 int rd = (insn >> 0) & 0xf;
1819 if (insn & (1 << 8)) {
1820 if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1823 tmp = iwmmxt_load_creg(rd);
1826 tmp = tcg_temp_new_i32();
1827 iwmmxt_load_reg(cpu_V0, rd);
1828 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1830 tcg_gen_andi_i32(tmp, tmp, mask);
1831 tcg_gen_mov_i32(dest, tmp);
1832 tcg_temp_free_i32(tmp);
1836 /* Disassemble an iwMMXt instruction. Returns nonzero if an error occurred
1837 (ie. an undefined instruction). */
1838 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1841 int rdhi, rdlo, rd0, rd1, i;
1843 TCGv_i32 tmp, tmp2, tmp3;
1845 if ((insn & 0x0e000e00) == 0x0c000000) {
1846 if ((insn & 0x0fe00ff0) == 0x0c400000) {
1848 rdlo = (insn >> 12) & 0xf;
1849 rdhi = (insn >> 16) & 0xf;
1850 if (insn & ARM_CP_RW_BIT) { /* TMRRC */
1851 iwmmxt_load_reg(cpu_V0, wrd);
1852 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1853 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1854 tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
1855 } else { /* TMCRR */
1856 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1857 iwmmxt_store_reg(cpu_V0, wrd);
1858 gen_op_iwmmxt_set_mup();
1863 wrd = (insn >> 12) & 0xf;
1864 addr = tcg_temp_new_i32();
1865 if (gen_iwmmxt_address(s, insn, addr)) {
1866 tcg_temp_free_i32(addr);
1869 if (insn & ARM_CP_RW_BIT) {
1870 if ((insn >> 28) == 0xf) { /* WLDRW wCx */
1871 tmp = tcg_temp_new_i32();
1872 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1873 iwmmxt_store_creg(wrd, tmp);
1876 if (insn & (1 << 8)) {
1877 if (insn & (1 << 22)) { /* WLDRD */
1878 gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1880 } else { /* WLDRW wRd */
1881 tmp = tcg_temp_new_i32();
1882 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1885 tmp = tcg_temp_new_i32();
1886 if (insn & (1 << 22)) { /* WLDRH */
1887 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1888 } else { /* WLDRB */
1889 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1893 tcg_gen_extu_i32_i64(cpu_M0, tmp);
1894 tcg_temp_free_i32(tmp);
1896 gen_op_iwmmxt_movq_wRn_M0(wrd);
1899 if ((insn >> 28) == 0xf) { /* WSTRW wCx */
1900 tmp = iwmmxt_load_creg(wrd);
1901 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1903 gen_op_iwmmxt_movq_M0_wRn(wrd);
1904 tmp = tcg_temp_new_i32();
1905 if (insn & (1 << 8)) {
1906 if (insn & (1 << 22)) { /* WSTRD */
1907 gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1908 } else { /* WSTRW wRd */
1909 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1910 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1913 if (insn & (1 << 22)) { /* WSTRH */
1914 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1915 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1916 } else { /* WSTRB */
1917 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1918 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1922 tcg_temp_free_i32(tmp);
1924 tcg_temp_free_i32(addr);
1928 if ((insn & 0x0f000000) != 0x0e000000)
1931 switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1932 case 0x000: /* WOR */
1933 wrd = (insn >> 12) & 0xf;
1934 rd0 = (insn >> 0) & 0xf;
1935 rd1 = (insn >> 16) & 0xf;
1936 gen_op_iwmmxt_movq_M0_wRn(rd0);
1937 gen_op_iwmmxt_orq_M0_wRn(rd1);
1938 gen_op_iwmmxt_setpsr_nz();
1939 gen_op_iwmmxt_movq_wRn_M0(wrd);
1940 gen_op_iwmmxt_set_mup();
1941 gen_op_iwmmxt_set_cup();
1943 case 0x011: /* TMCR */
1946 rd = (insn >> 12) & 0xf;
1947 wrd = (insn >> 16) & 0xf;
1949 case ARM_IWMMXT_wCID:
1950 case ARM_IWMMXT_wCASF:
1952 case ARM_IWMMXT_wCon:
1953 gen_op_iwmmxt_set_cup();
1955 case ARM_IWMMXT_wCSSF:
1956 tmp = iwmmxt_load_creg(wrd);
1957 tmp2 = load_reg(s, rd);
1958 tcg_gen_andc_i32(tmp, tmp, tmp2);
1959 tcg_temp_free_i32(tmp2);
1960 iwmmxt_store_creg(wrd, tmp);
1962 case ARM_IWMMXT_wCGR0:
1963 case ARM_IWMMXT_wCGR1:
1964 case ARM_IWMMXT_wCGR2:
1965 case ARM_IWMMXT_wCGR3:
1966 gen_op_iwmmxt_set_cup();
1967 tmp = load_reg(s, rd);
1968 iwmmxt_store_creg(wrd, tmp);
1974 case 0x100: /* WXOR */
1975 wrd = (insn >> 12) & 0xf;
1976 rd0 = (insn >> 0) & 0xf;
1977 rd1 = (insn >> 16) & 0xf;
1978 gen_op_iwmmxt_movq_M0_wRn(rd0);
1979 gen_op_iwmmxt_xorq_M0_wRn(rd1);
1980 gen_op_iwmmxt_setpsr_nz();
1981 gen_op_iwmmxt_movq_wRn_M0(wrd);
1982 gen_op_iwmmxt_set_mup();
1983 gen_op_iwmmxt_set_cup();
1985 case 0x111: /* TMRC */
1988 rd = (insn >> 12) & 0xf;
1989 wrd = (insn >> 16) & 0xf;
1990 tmp = iwmmxt_load_creg(wrd);
1991 store_reg(s, rd, tmp);
1993 case 0x300: /* WANDN */
1994 wrd = (insn >> 12) & 0xf;
1995 rd0 = (insn >> 0) & 0xf;
1996 rd1 = (insn >> 16) & 0xf;
1997 gen_op_iwmmxt_movq_M0_wRn(rd0);
1998 tcg_gen_neg_i64(cpu_M0, cpu_M0);
1999 gen_op_iwmmxt_andq_M0_wRn(rd1);
2000 gen_op_iwmmxt_setpsr_nz();
2001 gen_op_iwmmxt_movq_wRn_M0(wrd);
2002 gen_op_iwmmxt_set_mup();
2003 gen_op_iwmmxt_set_cup();
2005 case 0x200: /* WAND */
2006 wrd = (insn >> 12) & 0xf;
2007 rd0 = (insn >> 0) & 0xf;
2008 rd1 = (insn >> 16) & 0xf;
2009 gen_op_iwmmxt_movq_M0_wRn(rd0);
2010 gen_op_iwmmxt_andq_M0_wRn(rd1);
2011 gen_op_iwmmxt_setpsr_nz();
2012 gen_op_iwmmxt_movq_wRn_M0(wrd);
2013 gen_op_iwmmxt_set_mup();
2014 gen_op_iwmmxt_set_cup();
2016 case 0x810: case 0xa10: /* WMADD */
2017 wrd = (insn >> 12) & 0xf;
2018 rd0 = (insn >> 0) & 0xf;
2019 rd1 = (insn >> 16) & 0xf;
2020 gen_op_iwmmxt_movq_M0_wRn(rd0);
2021 if (insn & (1 << 21))
2022 gen_op_iwmmxt_maddsq_M0_wRn(rd1);
2024 gen_op_iwmmxt_madduq_M0_wRn(rd1);
2025 gen_op_iwmmxt_movq_wRn_M0(wrd);
2026 gen_op_iwmmxt_set_mup();
2028 case 0x10e: case 0x50e: case 0x90e: case 0xd0e: /* WUNPCKIL */
2029 wrd = (insn >> 12) & 0xf;
2030 rd0 = (insn >> 16) & 0xf;
2031 rd1 = (insn >> 0) & 0xf;
2032 gen_op_iwmmxt_movq_M0_wRn(rd0);
2033 switch ((insn >> 22) & 3) {
2035 gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
2038 gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
2041 gen_op_iwmmxt_unpackll_M0_wRn(rd1);
2046 gen_op_iwmmxt_movq_wRn_M0(wrd);
2047 gen_op_iwmmxt_set_mup();
2048 gen_op_iwmmxt_set_cup();
2050 case 0x10c: case 0x50c: case 0x90c: case 0xd0c: /* WUNPCKIH */
2051 wrd = (insn >> 12) & 0xf;
2052 rd0 = (insn >> 16) & 0xf;
2053 rd1 = (insn >> 0) & 0xf;
2054 gen_op_iwmmxt_movq_M0_wRn(rd0);
2055 switch ((insn >> 22) & 3) {
2057 gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
2060 gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
2063 gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
2068 gen_op_iwmmxt_movq_wRn_M0(wrd);
2069 gen_op_iwmmxt_set_mup();
2070 gen_op_iwmmxt_set_cup();
2072 case 0x012: case 0x112: case 0x412: case 0x512: /* WSAD */
2073 wrd = (insn >> 12) & 0xf;
2074 rd0 = (insn >> 16) & 0xf;
2075 rd1 = (insn >> 0) & 0xf;
2076 gen_op_iwmmxt_movq_M0_wRn(rd0);
2077 if (insn & (1 << 22))
2078 gen_op_iwmmxt_sadw_M0_wRn(rd1);
2080 gen_op_iwmmxt_sadb_M0_wRn(rd1);
2081 if (!(insn & (1 << 20)))
2082 gen_op_iwmmxt_addl_M0_wRn(wrd);
2083 gen_op_iwmmxt_movq_wRn_M0(wrd);
2084 gen_op_iwmmxt_set_mup();
2086 case 0x010: case 0x110: case 0x210: case 0x310: /* WMUL */
2087 wrd = (insn >> 12) & 0xf;
2088 rd0 = (insn >> 16) & 0xf;
2089 rd1 = (insn >> 0) & 0xf;
2090 gen_op_iwmmxt_movq_M0_wRn(rd0);
2091 if (insn & (1 << 21)) {
2092 if (insn & (1 << 20))
2093 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
2095 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
2097 if (insn & (1 << 20))
2098 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
2100 gen_op_iwmmxt_mululw_M0_wRn(rd1);
2102 gen_op_iwmmxt_movq_wRn_M0(wrd);
2103 gen_op_iwmmxt_set_mup();
2105 case 0x410: case 0x510: case 0x610: case 0x710: /* WMAC */
2106 wrd = (insn >> 12) & 0xf;
2107 rd0 = (insn >> 16) & 0xf;
2108 rd1 = (insn >> 0) & 0xf;
2109 gen_op_iwmmxt_movq_M0_wRn(rd0);
2110 if (insn & (1 << 21))
2111 gen_op_iwmmxt_macsw_M0_wRn(rd1);
2113 gen_op_iwmmxt_macuw_M0_wRn(rd1);
2114 if (!(insn & (1 << 20))) {
2115 iwmmxt_load_reg(cpu_V1, wrd);
2116 tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
2118 gen_op_iwmmxt_movq_wRn_M0(wrd);
2119 gen_op_iwmmxt_set_mup();
2121 case 0x006: case 0x406: case 0x806: case 0xc06: /* WCMPEQ */
2122 wrd = (insn >> 12) & 0xf;
2123 rd0 = (insn >> 16) & 0xf;
2124 rd1 = (insn >> 0) & 0xf;
2125 gen_op_iwmmxt_movq_M0_wRn(rd0);
2126 switch ((insn >> 22) & 3) {
2128 gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
2131 gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
2134 gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
2139 gen_op_iwmmxt_movq_wRn_M0(wrd);
2140 gen_op_iwmmxt_set_mup();
2141 gen_op_iwmmxt_set_cup();
2143 case 0x800: case 0x900: case 0xc00: case 0xd00: /* WAVG2 */
2144 wrd = (insn >> 12) & 0xf;
2145 rd0 = (insn >> 16) & 0xf;
2146 rd1 = (insn >> 0) & 0xf;
2147 gen_op_iwmmxt_movq_M0_wRn(rd0);
2148 if (insn & (1 << 22)) {
2149 if (insn & (1 << 20))
2150 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
2152 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
2154 if (insn & (1 << 20))
2155 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
2157 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
2159 gen_op_iwmmxt_movq_wRn_M0(wrd);
2160 gen_op_iwmmxt_set_mup();
2161 gen_op_iwmmxt_set_cup();
2163 case 0x802: case 0x902: case 0xa02: case 0xb02: /* WALIGNR */
2164 wrd = (insn >> 12) & 0xf;
2165 rd0 = (insn >> 16) & 0xf;
2166 rd1 = (insn >> 0) & 0xf;
2167 gen_op_iwmmxt_movq_M0_wRn(rd0);
2168 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
2169 tcg_gen_andi_i32(tmp, tmp, 7);
2170 iwmmxt_load_reg(cpu_V1, rd1);
2171 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2172 tcg_temp_free_i32(tmp);
2173 gen_op_iwmmxt_movq_wRn_M0(wrd);
2174 gen_op_iwmmxt_set_mup();
2176 case 0x601: case 0x605: case 0x609: case 0x60d: /* TINSR */
2177 if (((insn >> 6) & 3) == 3)
2179 rd = (insn >> 12) & 0xf;
2180 wrd = (insn >> 16) & 0xf;
2181 tmp = load_reg(s, rd);
2182 gen_op_iwmmxt_movq_M0_wRn(wrd);
2183 switch ((insn >> 6) & 3) {
2185 tmp2 = tcg_const_i32(0xff);
2186 tmp3 = tcg_const_i32((insn & 7) << 3);
2189 tmp2 = tcg_const_i32(0xffff);
2190 tmp3 = tcg_const_i32((insn & 3) << 4);
2193 tmp2 = tcg_const_i32(0xffffffff);
2194 tmp3 = tcg_const_i32((insn & 1) << 5);
2200 gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
2201 tcg_temp_free_i32(tmp3);
2202 tcg_temp_free_i32(tmp2);
2203 tcg_temp_free_i32(tmp);
2204 gen_op_iwmmxt_movq_wRn_M0(wrd);
2205 gen_op_iwmmxt_set_mup();
2207 case 0x107: case 0x507: case 0x907: case 0xd07: /* TEXTRM */
2208 rd = (insn >> 12) & 0xf;
2209 wrd = (insn >> 16) & 0xf;
2210 if (rd == 15 || ((insn >> 22) & 3) == 3)
2212 gen_op_iwmmxt_movq_M0_wRn(wrd);
2213 tmp = tcg_temp_new_i32();
2214 switch ((insn >> 22) & 3) {
2216 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
2217 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2219 tcg_gen_ext8s_i32(tmp, tmp);
2221 tcg_gen_andi_i32(tmp, tmp, 0xff);
2225 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
2226 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2228 tcg_gen_ext16s_i32(tmp, tmp);
2230 tcg_gen_andi_i32(tmp, tmp, 0xffff);
2234 tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
2235 tcg_gen_extrl_i64_i32(tmp, cpu_M0);
2238 store_reg(s, rd, tmp);
2240 case 0x117: case 0x517: case 0x917: case 0xd17: /* TEXTRC */
2241 if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2243 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2244 switch ((insn >> 22) & 3) {
2246 tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
2249 tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
2252 tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
2255 tcg_gen_shli_i32(tmp, tmp, 28);
2257 tcg_temp_free_i32(tmp);
2259 case 0x401: case 0x405: case 0x409: case 0x40d: /* TBCST */
2260 if (((insn >> 6) & 3) == 3)
2262 rd = (insn >> 12) & 0xf;
2263 wrd = (insn >> 16) & 0xf;
2264 tmp = load_reg(s, rd);
2265 switch ((insn >> 6) & 3) {
2267 gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
2270 gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
2273 gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
2276 tcg_temp_free_i32(tmp);
2277 gen_op_iwmmxt_movq_wRn_M0(wrd);
2278 gen_op_iwmmxt_set_mup();
2280 case 0x113: case 0x513: case 0x913: case 0xd13: /* TANDC */
2281 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2283 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2284 tmp2 = tcg_temp_new_i32();
2285 tcg_gen_mov_i32(tmp2, tmp);
2286 switch ((insn >> 22) & 3) {
2288 for (i = 0; i < 7; i ++) {
2289 tcg_gen_shli_i32(tmp2, tmp2, 4);
2290 tcg_gen_and_i32(tmp, tmp, tmp2);
2294 for (i = 0; i < 3; i ++) {
2295 tcg_gen_shli_i32(tmp2, tmp2, 8);
2296 tcg_gen_and_i32(tmp, tmp, tmp2);
2300 tcg_gen_shli_i32(tmp2, tmp2, 16);
2301 tcg_gen_and_i32(tmp, tmp, tmp2);
2305 tcg_temp_free_i32(tmp2);
2306 tcg_temp_free_i32(tmp);
2308 case 0x01c: case 0x41c: case 0x81c: case 0xc1c: /* WACC */
2309 wrd = (insn >> 12) & 0xf;
2310 rd0 = (insn >> 16) & 0xf;
2311 gen_op_iwmmxt_movq_M0_wRn(rd0);
2312 switch ((insn >> 22) & 3) {
2314 gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2317 gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2320 gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2325 gen_op_iwmmxt_movq_wRn_M0(wrd);
2326 gen_op_iwmmxt_set_mup();
2328 case 0x115: case 0x515: case 0x915: case 0xd15: /* TORC */
2329 if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2331 tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2332 tmp2 = tcg_temp_new_i32();
2333 tcg_gen_mov_i32(tmp2, tmp);
2334 switch ((insn >> 22) & 3) {
2336 for (i = 0; i < 7; i ++) {
2337 tcg_gen_shli_i32(tmp2, tmp2, 4);
2338 tcg_gen_or_i32(tmp, tmp, tmp2);
2342 for (i = 0; i < 3; i ++) {
2343 tcg_gen_shli_i32(tmp2, tmp2, 8);
2344 tcg_gen_or_i32(tmp, tmp, tmp2);
2348 tcg_gen_shli_i32(tmp2, tmp2, 16);
2349 tcg_gen_or_i32(tmp, tmp, tmp2);
2353 tcg_temp_free_i32(tmp2);
2354 tcg_temp_free_i32(tmp);
2356 case 0x103: case 0x503: case 0x903: case 0xd03: /* TMOVMSK */
2357 rd = (insn >> 12) & 0xf;
2358 rd0 = (insn >> 16) & 0xf;
2359 if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2361 gen_op_iwmmxt_movq_M0_wRn(rd0);
2362 tmp = tcg_temp_new_i32();
2363 switch ((insn >> 22) & 3) {
2365 gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2368 gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2371 gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2374 store_reg(s, rd, tmp);
2376 case 0x106: case 0x306: case 0x506: case 0x706: /* WCMPGT */
2377 case 0x906: case 0xb06: case 0xd06: case 0xf06:
2378 wrd = (insn >> 12) & 0xf;
2379 rd0 = (insn >> 16) & 0xf;
2380 rd1 = (insn >> 0) & 0xf;
2381 gen_op_iwmmxt_movq_M0_wRn(rd0);
2382 switch ((insn >> 22) & 3) {
2384 if (insn & (1 << 21))
2385 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2387 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2390 if (insn & (1 << 21))
2391 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2393 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2396 if (insn & (1 << 21))
2397 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2399 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2404 gen_op_iwmmxt_movq_wRn_M0(wrd);
2405 gen_op_iwmmxt_set_mup();
2406 gen_op_iwmmxt_set_cup();
2408 case 0x00e: case 0x20e: case 0x40e: case 0x60e: /* WUNPCKEL */
2409 case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2410 wrd = (insn >> 12) & 0xf;
2411 rd0 = (insn >> 16) & 0xf;
2412 gen_op_iwmmxt_movq_M0_wRn(rd0);
2413 switch ((insn >> 22) & 3) {
2415 if (insn & (1 << 21))
2416 gen_op_iwmmxt_unpacklsb_M0();
2418 gen_op_iwmmxt_unpacklub_M0();
2421 if (insn & (1 << 21))
2422 gen_op_iwmmxt_unpacklsw_M0();
2424 gen_op_iwmmxt_unpackluw_M0();
2427 if (insn & (1 << 21))
2428 gen_op_iwmmxt_unpacklsl_M0();
2430 gen_op_iwmmxt_unpacklul_M0();
2435 gen_op_iwmmxt_movq_wRn_M0(wrd);
2436 gen_op_iwmmxt_set_mup();
2437 gen_op_iwmmxt_set_cup();
2439 case 0x00c: case 0x20c: case 0x40c: case 0x60c: /* WUNPCKEH */
2440 case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2441 wrd = (insn >> 12) & 0xf;
2442 rd0 = (insn >> 16) & 0xf;
2443 gen_op_iwmmxt_movq_M0_wRn(rd0);
2444 switch ((insn >> 22) & 3) {
2446 if (insn & (1 << 21))
2447 gen_op_iwmmxt_unpackhsb_M0();
2449 gen_op_iwmmxt_unpackhub_M0();
2452 if (insn & (1 << 21))
2453 gen_op_iwmmxt_unpackhsw_M0();
2455 gen_op_iwmmxt_unpackhuw_M0();
2458 if (insn & (1 << 21))
2459 gen_op_iwmmxt_unpackhsl_M0();
2461 gen_op_iwmmxt_unpackhul_M0();
2466 gen_op_iwmmxt_movq_wRn_M0(wrd);
2467 gen_op_iwmmxt_set_mup();
2468 gen_op_iwmmxt_set_cup();
2470 case 0x204: case 0x604: case 0xa04: case 0xe04: /* WSRL */
2471 case 0x214: case 0x614: case 0xa14: case 0xe14:
2472 if (((insn >> 22) & 3) == 0)
2474 wrd = (insn >> 12) & 0xf;
2475 rd0 = (insn >> 16) & 0xf;
2476 gen_op_iwmmxt_movq_M0_wRn(rd0);
2477 tmp = tcg_temp_new_i32();
2478 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2479 tcg_temp_free_i32(tmp);
2482 switch ((insn >> 22) & 3) {
2484 gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2487 gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2490 gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2493 tcg_temp_free_i32(tmp);
2494 gen_op_iwmmxt_movq_wRn_M0(wrd);
2495 gen_op_iwmmxt_set_mup();
2496 gen_op_iwmmxt_set_cup();
2498 case 0x004: case 0x404: case 0x804: case 0xc04: /* WSRA */
2499 case 0x014: case 0x414: case 0x814: case 0xc14:
2500 if (((insn >> 22) & 3) == 0)
2502 wrd = (insn >> 12) & 0xf;
2503 rd0 = (insn >> 16) & 0xf;
2504 gen_op_iwmmxt_movq_M0_wRn(rd0);
2505 tmp = tcg_temp_new_i32();
2506 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2507 tcg_temp_free_i32(tmp);
2510 switch ((insn >> 22) & 3) {
2512 gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2515 gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2518 gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2521 tcg_temp_free_i32(tmp);
2522 gen_op_iwmmxt_movq_wRn_M0(wrd);
2523 gen_op_iwmmxt_set_mup();
2524 gen_op_iwmmxt_set_cup();
2526 case 0x104: case 0x504: case 0x904: case 0xd04: /* WSLL */
2527 case 0x114: case 0x514: case 0x914: case 0xd14:
2528 if (((insn >> 22) & 3) == 0)
2530 wrd = (insn >> 12) & 0xf;
2531 rd0 = (insn >> 16) & 0xf;
2532 gen_op_iwmmxt_movq_M0_wRn(rd0);
2533 tmp = tcg_temp_new_i32();
2534 if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2535 tcg_temp_free_i32(tmp);
2538 switch ((insn >> 22) & 3) {
2540 gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2543 gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2546 gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2549 tcg_temp_free_i32(tmp);
2550 gen_op_iwmmxt_movq_wRn_M0(wrd);
2551 gen_op_iwmmxt_set_mup();
2552 gen_op_iwmmxt_set_cup();
2554 case 0x304: case 0x704: case 0xb04: case 0xf04: /* WROR */
2555 case 0x314: case 0x714: case 0xb14: case 0xf14:
2556 if (((insn >> 22) & 3) == 0)
2558 wrd = (insn >> 12) & 0xf;
2559 rd0 = (insn >> 16) & 0xf;
2560 gen_op_iwmmxt_movq_M0_wRn(rd0);
2561 tmp = tcg_temp_new_i32();
2562 switch ((insn >> 22) & 3) {
2564 if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2565 tcg_temp_free_i32(tmp);
2568 gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2571 if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2572 tcg_temp_free_i32(tmp);
2575 gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2578 if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2579 tcg_temp_free_i32(tmp);
2582 gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2585 tcg_temp_free_i32(tmp);
2586 gen_op_iwmmxt_movq_wRn_M0(wrd);
2587 gen_op_iwmmxt_set_mup();
2588 gen_op_iwmmxt_set_cup();
2590 case 0x116: case 0x316: case 0x516: case 0x716: /* WMIN */
2591 case 0x916: case 0xb16: case 0xd16: case 0xf16:
2592 wrd = (insn >> 12) & 0xf;
2593 rd0 = (insn >> 16) & 0xf;
2594 rd1 = (insn >> 0) & 0xf;
2595 gen_op_iwmmxt_movq_M0_wRn(rd0);
2596 switch ((insn >> 22) & 3) {
2598 if (insn & (1 << 21))
2599 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2601 gen_op_iwmmxt_minub_M0_wRn(rd1);
2604 if (insn & (1 << 21))
2605 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2607 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2610 if (insn & (1 << 21))
2611 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2613 gen_op_iwmmxt_minul_M0_wRn(rd1);
2618 gen_op_iwmmxt_movq_wRn_M0(wrd);
2619 gen_op_iwmmxt_set_mup();
2621 case 0x016: case 0x216: case 0x416: case 0x616: /* WMAX */
2622 case 0x816: case 0xa16: case 0xc16: case 0xe16:
2623 wrd = (insn >> 12) & 0xf;
2624 rd0 = (insn >> 16) & 0xf;
2625 rd1 = (insn >> 0) & 0xf;
2626 gen_op_iwmmxt_movq_M0_wRn(rd0);
2627 switch ((insn >> 22) & 3) {
2629 if (insn & (1 << 21))
2630 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2632 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2635 if (insn & (1 << 21))
2636 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2638 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2641 if (insn & (1 << 21))
2642 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2644 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2649 gen_op_iwmmxt_movq_wRn_M0(wrd);
2650 gen_op_iwmmxt_set_mup();
2652 case 0x002: case 0x102: case 0x202: case 0x302: /* WALIGNI */
2653 case 0x402: case 0x502: case 0x602: case 0x702:
2654 wrd = (insn >> 12) & 0xf;
2655 rd0 = (insn >> 16) & 0xf;
2656 rd1 = (insn >> 0) & 0xf;
2657 gen_op_iwmmxt_movq_M0_wRn(rd0);
2658 tmp = tcg_const_i32((insn >> 20) & 3);
2659 iwmmxt_load_reg(cpu_V1, rd1);
2660 gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2661 tcg_temp_free_i32(tmp);
2662 gen_op_iwmmxt_movq_wRn_M0(wrd);
2663 gen_op_iwmmxt_set_mup();
2665 case 0x01a: case 0x11a: case 0x21a: case 0x31a: /* WSUB */
2666 case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2667 case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2668 case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2669 wrd = (insn >> 12) & 0xf;
2670 rd0 = (insn >> 16) & 0xf;
2671 rd1 = (insn >> 0) & 0xf;
2672 gen_op_iwmmxt_movq_M0_wRn(rd0);
2673 switch ((insn >> 20) & 0xf) {
2675 gen_op_iwmmxt_subnb_M0_wRn(rd1);
2678 gen_op_iwmmxt_subub_M0_wRn(rd1);
2681 gen_op_iwmmxt_subsb_M0_wRn(rd1);
2684 gen_op_iwmmxt_subnw_M0_wRn(rd1);
2687 gen_op_iwmmxt_subuw_M0_wRn(rd1);
2690 gen_op_iwmmxt_subsw_M0_wRn(rd1);
2693 gen_op_iwmmxt_subnl_M0_wRn(rd1);
2696 gen_op_iwmmxt_subul_M0_wRn(rd1);
2699 gen_op_iwmmxt_subsl_M0_wRn(rd1);
2704 gen_op_iwmmxt_movq_wRn_M0(wrd);
2705 gen_op_iwmmxt_set_mup();
2706 gen_op_iwmmxt_set_cup();
2708 case 0x01e: case 0x11e: case 0x21e: case 0x31e: /* WSHUFH */
2709 case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2710 case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2711 case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2712 wrd = (insn >> 12) & 0xf;
2713 rd0 = (insn >> 16) & 0xf;
2714 gen_op_iwmmxt_movq_M0_wRn(rd0);
2715 tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2716 gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2717 tcg_temp_free_i32(tmp);
2718 gen_op_iwmmxt_movq_wRn_M0(wrd);
2719 gen_op_iwmmxt_set_mup();
2720 gen_op_iwmmxt_set_cup();
2722 case 0x018: case 0x118: case 0x218: case 0x318: /* WADD */
2723 case 0x418: case 0x518: case 0x618: case 0x718:
2724 case 0x818: case 0x918: case 0xa18: case 0xb18:
2725 case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2726 wrd = (insn >> 12) & 0xf;
2727 rd0 = (insn >> 16) & 0xf;
2728 rd1 = (insn >> 0) & 0xf;
2729 gen_op_iwmmxt_movq_M0_wRn(rd0);
2730 switch ((insn >> 20) & 0xf) {
2732 gen_op_iwmmxt_addnb_M0_wRn(rd1);
2735 gen_op_iwmmxt_addub_M0_wRn(rd1);
2738 gen_op_iwmmxt_addsb_M0_wRn(rd1);
2741 gen_op_iwmmxt_addnw_M0_wRn(rd1);
2744 gen_op_iwmmxt_adduw_M0_wRn(rd1);
2747 gen_op_iwmmxt_addsw_M0_wRn(rd1);
2750 gen_op_iwmmxt_addnl_M0_wRn(rd1);
2753 gen_op_iwmmxt_addul_M0_wRn(rd1);
2756 gen_op_iwmmxt_addsl_M0_wRn(rd1);
2761 gen_op_iwmmxt_movq_wRn_M0(wrd);
2762 gen_op_iwmmxt_set_mup();
2763 gen_op_iwmmxt_set_cup();
2765 case 0x008: case 0x108: case 0x208: case 0x308: /* WPACK */
2766 case 0x408: case 0x508: case 0x608: case 0x708:
2767 case 0x808: case 0x908: case 0xa08: case 0xb08:
2768 case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2769 if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2771 wrd = (insn >> 12) & 0xf;
2772 rd0 = (insn >> 16) & 0xf;
2773 rd1 = (insn >> 0) & 0xf;
2774 gen_op_iwmmxt_movq_M0_wRn(rd0);
2775 switch ((insn >> 22) & 3) {
2777 if (insn & (1 << 21))
2778 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2780 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2783 if (insn & (1 << 21))
2784 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2786 gen_op_iwmmxt_packul_M0_wRn(rd1);
2789 if (insn & (1 << 21))
2790 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2792 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2795 gen_op_iwmmxt_movq_wRn_M0(wrd);
2796 gen_op_iwmmxt_set_mup();
2797 gen_op_iwmmxt_set_cup();
2799 case 0x201: case 0x203: case 0x205: case 0x207:
2800 case 0x209: case 0x20b: case 0x20d: case 0x20f:
2801 case 0x211: case 0x213: case 0x215: case 0x217:
2802 case 0x219: case 0x21b: case 0x21d: case 0x21f:
2803 wrd = (insn >> 5) & 0xf;
2804 rd0 = (insn >> 12) & 0xf;
2805 rd1 = (insn >> 0) & 0xf;
2806 if (rd0 == 0xf || rd1 == 0xf)
2808 gen_op_iwmmxt_movq_M0_wRn(wrd);
2809 tmp = load_reg(s, rd0);
2810 tmp2 = load_reg(s, rd1);
2811 switch ((insn >> 16) & 0xf) {
2812 case 0x0: /* TMIA */
2813 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2815 case 0x8: /* TMIAPH */
2816 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2818 case 0xc: case 0xd: case 0xe: case 0xf: /* TMIAxy */
2819 if (insn & (1 << 16))
2820 tcg_gen_shri_i32(tmp, tmp, 16);
2821 if (insn & (1 << 17))
2822 tcg_gen_shri_i32(tmp2, tmp2, 16);
2823 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2826 tcg_temp_free_i32(tmp2);
2827 tcg_temp_free_i32(tmp);
2830 tcg_temp_free_i32(tmp2);
2831 tcg_temp_free_i32(tmp);
2832 gen_op_iwmmxt_movq_wRn_M0(wrd);
2833 gen_op_iwmmxt_set_mup();
2842 /* Disassemble an XScale DSP instruction. Returns nonzero if an error occurred
2843 (ie. an undefined instruction). */
2844 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2846 int acc, rd0, rd1, rdhi, rdlo;
2849 if ((insn & 0x0ff00f10) == 0x0e200010) {
2850 /* Multiply with Internal Accumulate Format */
2851 rd0 = (insn >> 12) & 0xf;
2853 acc = (insn >> 5) & 7;
2858 tmp = load_reg(s, rd0);
2859 tmp2 = load_reg(s, rd1);
2860 switch ((insn >> 16) & 0xf) {
2862 gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2864 case 0x8: /* MIAPH */
2865 gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2867 case 0xc: /* MIABB */
2868 case 0xd: /* MIABT */
2869 case 0xe: /* MIATB */
2870 case 0xf: /* MIATT */
2871 if (insn & (1 << 16))
2872 tcg_gen_shri_i32(tmp, tmp, 16);
2873 if (insn & (1 << 17))
2874 tcg_gen_shri_i32(tmp2, tmp2, 16);
2875 gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2880 tcg_temp_free_i32(tmp2);
2881 tcg_temp_free_i32(tmp);
2883 gen_op_iwmmxt_movq_wRn_M0(acc);
2887 if ((insn & 0x0fe00ff8) == 0x0c400000) {
2888 /* Internal Accumulator Access Format */
2889 rdhi = (insn >> 16) & 0xf;
2890 rdlo = (insn >> 12) & 0xf;
2896 if (insn & ARM_CP_RW_BIT) { /* MRA */
2897 iwmmxt_load_reg(cpu_V0, acc);
2898 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2899 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
2900 tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0);
2901 tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2903 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2904 iwmmxt_store_reg(cpu_V0, acc);
2912 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2913 #define VFP_SREG(insn, bigbit, smallbit) \
2914 ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2915 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2916 if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
2917 reg = (((insn) >> (bigbit)) & 0x0f) \
2918 | (((insn) >> ((smallbit) - 4)) & 0x10); \
2920 if (insn & (1 << (smallbit))) \
2922 reg = ((insn) >> (bigbit)) & 0x0f; \
2925 #define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2926 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2927 #define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
2928 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2929 #define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
2930 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2932 /* Move between integer and VFP cores. */
2933 static TCGv_i32 gen_vfp_mrs(void)
2935 TCGv_i32 tmp = tcg_temp_new_i32();
2936 tcg_gen_mov_i32(tmp, cpu_F0s);
2940 static void gen_vfp_msr(TCGv_i32 tmp)
2942 tcg_gen_mov_i32(cpu_F0s, tmp);
2943 tcg_temp_free_i32(tmp);
2946 static void gen_neon_dup_u8(TCGv_i32 var, int shift)
2948 TCGv_i32 tmp = tcg_temp_new_i32();
2950 tcg_gen_shri_i32(var, var, shift);
2951 tcg_gen_ext8u_i32(var, var);
2952 tcg_gen_shli_i32(tmp, var, 8);
2953 tcg_gen_or_i32(var, var, tmp);
2954 tcg_gen_shli_i32(tmp, var, 16);
2955 tcg_gen_or_i32(var, var, tmp);
2956 tcg_temp_free_i32(tmp);
2959 static void gen_neon_dup_low16(TCGv_i32 var)
2961 TCGv_i32 tmp = tcg_temp_new_i32();
2962 tcg_gen_ext16u_i32(var, var);
2963 tcg_gen_shli_i32(tmp, var, 16);
2964 tcg_gen_or_i32(var, var, tmp);
2965 tcg_temp_free_i32(tmp);
2968 static void gen_neon_dup_high16(TCGv_i32 var)
2970 TCGv_i32 tmp = tcg_temp_new_i32();
2971 tcg_gen_andi_i32(var, var, 0xffff0000);
2972 tcg_gen_shri_i32(tmp, var, 16);
2973 tcg_gen_or_i32(var, var, tmp);
2974 tcg_temp_free_i32(tmp);
2977 static TCGv_i32 gen_load_and_replicate(DisasContext *s, TCGv_i32 addr, int size)
2979 /* Load a single Neon element and replicate into a 32 bit TCG reg */
2980 TCGv_i32 tmp = tcg_temp_new_i32();
2983 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
2984 gen_neon_dup_u8(tmp, 0);
2987 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
2988 gen_neon_dup_low16(tmp);
2991 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
2993 default: /* Avoid compiler warnings. */
2999 static int handle_vsel(uint32_t insn, uint32_t rd, uint32_t rn, uint32_t rm,
3002 uint32_t cc = extract32(insn, 20, 2);
3005 TCGv_i64 frn, frm, dest;
3006 TCGv_i64 tmp, zero, zf, nf, vf;
3008 zero = tcg_const_i64(0);
3010 frn = tcg_temp_new_i64();
3011 frm = tcg_temp_new_i64();
3012 dest = tcg_temp_new_i64();
3014 zf = tcg_temp_new_i64();
3015 nf = tcg_temp_new_i64();
3016 vf = tcg_temp_new_i64();
3018 tcg_gen_extu_i32_i64(zf, cpu_ZF);
3019 tcg_gen_ext_i32_i64(nf, cpu_NF);
3020 tcg_gen_ext_i32_i64(vf, cpu_VF);
3022 tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
3023 tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
3026 tcg_gen_movcond_i64(TCG_COND_EQ, dest, zf, zero,
3030 tcg_gen_movcond_i64(TCG_COND_LT, dest, vf, zero,
3033 case 2: /* ge: N == V -> N ^ V == 0 */
3034 tmp = tcg_temp_new_i64();
3035 tcg_gen_xor_i64(tmp, vf, nf);
3036 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
3038 tcg_temp_free_i64(tmp);
3040 case 3: /* gt: !Z && N == V */
3041 tcg_gen_movcond_i64(TCG_COND_NE, dest, zf, zero,
3043 tmp = tcg_temp_new_i64();
3044 tcg_gen_xor_i64(tmp, vf, nf);
3045 tcg_gen_movcond_i64(TCG_COND_GE, dest, tmp, zero,
3047 tcg_temp_free_i64(tmp);
3050 tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
3051 tcg_temp_free_i64(frn);
3052 tcg_temp_free_i64(frm);
3053 tcg_temp_free_i64(dest);
3055 tcg_temp_free_i64(zf);
3056 tcg_temp_free_i64(nf);
3057 tcg_temp_free_i64(vf);
3059 tcg_temp_free_i64(zero);
3061 TCGv_i32 frn, frm, dest;
3064 zero = tcg_const_i32(0);
3066 frn = tcg_temp_new_i32();
3067 frm = tcg_temp_new_i32();
3068 dest = tcg_temp_new_i32();
3069 tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
3070 tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
3073 tcg_gen_movcond_i32(TCG_COND_EQ, dest, cpu_ZF, zero,
3077 tcg_gen_movcond_i32(TCG_COND_LT, dest, cpu_VF, zero,
3080 case 2: /* ge: N == V -> N ^ V == 0 */
3081 tmp = tcg_temp_new_i32();
3082 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
3083 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
3085 tcg_temp_free_i32(tmp);
3087 case 3: /* gt: !Z && N == V */
3088 tcg_gen_movcond_i32(TCG_COND_NE, dest, cpu_ZF, zero,
3090 tmp = tcg_temp_new_i32();
3091 tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
3092 tcg_gen_movcond_i32(TCG_COND_GE, dest, tmp, zero,
3094 tcg_temp_free_i32(tmp);
3097 tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
3098 tcg_temp_free_i32(frn);
3099 tcg_temp_free_i32(frm);
3100 tcg_temp_free_i32(dest);
3102 tcg_temp_free_i32(zero);
3108 static int handle_vminmaxnm(uint32_t insn, uint32_t rd, uint32_t rn,
3109 uint32_t rm, uint32_t dp)
3111 uint32_t vmin = extract32(insn, 6, 1);
3112 TCGv_ptr fpst = get_fpstatus_ptr(0);
3115 TCGv_i64 frn, frm, dest;
3117 frn = tcg_temp_new_i64();
3118 frm = tcg_temp_new_i64();
3119 dest = tcg_temp_new_i64();
3121 tcg_gen_ld_f64(frn, cpu_env, vfp_reg_offset(dp, rn));
3122 tcg_gen_ld_f64(frm, cpu_env, vfp_reg_offset(dp, rm));
3124 gen_helper_vfp_minnumd(dest, frn, frm, fpst);
3126 gen_helper_vfp_maxnumd(dest, frn, frm, fpst);
3128 tcg_gen_st_f64(dest, cpu_env, vfp_reg_offset(dp, rd));
3129 tcg_temp_free_i64(frn);
3130 tcg_temp_free_i64(frm);
3131 tcg_temp_free_i64(dest);
3133 TCGv_i32 frn, frm, dest;
3135 frn = tcg_temp_new_i32();
3136 frm = tcg_temp_new_i32();
3137 dest = tcg_temp_new_i32();
3139 tcg_gen_ld_f32(frn, cpu_env, vfp_reg_offset(dp, rn));
3140 tcg_gen_ld_f32(frm, cpu_env, vfp_reg_offset(dp, rm));
3142 gen_helper_vfp_minnums(dest, frn, frm, fpst);
3144 gen_helper_vfp_maxnums(dest, frn, frm, fpst);
3146 tcg_gen_st_f32(dest, cpu_env, vfp_reg_offset(dp, rd));
3147 tcg_temp_free_i32(frn);
3148 tcg_temp_free_i32(frm);
3149 tcg_temp_free_i32(dest);
3152 tcg_temp_free_ptr(fpst);
3156 static int handle_vrint(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
3159 TCGv_ptr fpst = get_fpstatus_ptr(0);
3162 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
3163 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3168 tcg_op = tcg_temp_new_i64();
3169 tcg_res = tcg_temp_new_i64();
3170 tcg_gen_ld_f64(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
3171 gen_helper_rintd(tcg_res, tcg_op, fpst);
3172 tcg_gen_st_f64(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
3173 tcg_temp_free_i64(tcg_op);
3174 tcg_temp_free_i64(tcg_res);
3178 tcg_op = tcg_temp_new_i32();
3179 tcg_res = tcg_temp_new_i32();
3180 tcg_gen_ld_f32(tcg_op, cpu_env, vfp_reg_offset(dp, rm));
3181 gen_helper_rints(tcg_res, tcg_op, fpst);
3182 tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(dp, rd));
3183 tcg_temp_free_i32(tcg_op);
3184 tcg_temp_free_i32(tcg_res);
3187 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3188 tcg_temp_free_i32(tcg_rmode);
3190 tcg_temp_free_ptr(fpst);
3194 static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp,
3197 bool is_signed = extract32(insn, 7, 1);
3198 TCGv_ptr fpst = get_fpstatus_ptr(0);
3199 TCGv_i32 tcg_rmode, tcg_shift;
3201 tcg_shift = tcg_const_i32(0);
3203 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rounding));
3204 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3207 TCGv_i64 tcg_double, tcg_res;
3209 /* Rd is encoded as a single precision register even when the source
3210 * is double precision.
3212 rd = ((rd << 1) & 0x1e) | ((rd >> 4) & 0x1);
3213 tcg_double = tcg_temp_new_i64();
3214 tcg_res = tcg_temp_new_i64();
3215 tcg_tmp = tcg_temp_new_i32();
3216 tcg_gen_ld_f64(tcg_double, cpu_env, vfp_reg_offset(1, rm));
3218 gen_helper_vfp_tosld(tcg_res, tcg_double, tcg_shift, fpst);
3220 gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst);
3222 tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res);
3223 tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd));
3224 tcg_temp_free_i32(tcg_tmp);
3225 tcg_temp_free_i64(tcg_res);
3226 tcg_temp_free_i64(tcg_double);
3228 TCGv_i32 tcg_single, tcg_res;
3229 tcg_single = tcg_temp_new_i32();
3230 tcg_res = tcg_temp_new_i32();
3231 tcg_gen_ld_f32(tcg_single, cpu_env, vfp_reg_offset(0, rm));
3233 gen_helper_vfp_tosls(tcg_res, tcg_single, tcg_shift, fpst);
3235 gen_helper_vfp_touls(tcg_res, tcg_single, tcg_shift, fpst);
3237 tcg_gen_st_f32(tcg_res, cpu_env, vfp_reg_offset(0, rd));
3238 tcg_temp_free_i32(tcg_res);
3239 tcg_temp_free_i32(tcg_single);
3242 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3243 tcg_temp_free_i32(tcg_rmode);
3245 tcg_temp_free_i32(tcg_shift);
3247 tcg_temp_free_ptr(fpst);
3252 /* Table for converting the most common AArch32 encoding of
3253 * rounding mode to arm_fprounding order (which matches the
3254 * common AArch64 order); see ARM ARM pseudocode FPDecodeRM().
3256 static const uint8_t fp_decode_rm[] = {
3263 static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
3265 uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
3267 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3272 VFP_DREG_D(rd, insn);
3273 VFP_DREG_N(rn, insn);
3274 VFP_DREG_M(rm, insn);
3276 rd = VFP_SREG_D(insn);
3277 rn = VFP_SREG_N(insn);
3278 rm = VFP_SREG_M(insn);
3281 if ((insn & 0x0f800e50) == 0x0e000a00) {
3282 return handle_vsel(insn, rd, rn, rm, dp);
3283 } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
3284 return handle_vminmaxnm(insn, rd, rn, rm, dp);
3285 } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
3286 /* VRINTA, VRINTN, VRINTP, VRINTM */
3287 int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3288 return handle_vrint(insn, rd, rm, dp, rounding);
3289 } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
3290 /* VCVTA, VCVTN, VCVTP, VCVTM */
3291 int rounding = fp_decode_rm[extract32(insn, 16, 2)];
3292 return handle_vcvt(insn, rd, rm, dp, rounding);
3297 /* Disassemble a VFP instruction. Returns nonzero if an error occurred
3298 (ie. an undefined instruction). */
3299 static int disas_vfp_insn(DisasContext *s, uint32_t insn)
3301 uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
3307 if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
3311 /* FIXME: this access check should not take precedence over UNDEF
3312 * for invalid encodings; we will generate incorrect syndrome information
3313 * for attempts to execute invalid vfp/neon encodings with FP disabled.
3315 if (s->fp_excp_el) {
3316 gen_exception_insn(s, 4, EXCP_UDEF,
3317 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
3321 if (!s->vfp_enabled) {
3322 /* VFP disabled. Only allow fmxr/fmrx to/from some control regs. */
3323 if ((insn & 0x0fe00fff) != 0x0ee00a10)
3325 rn = (insn >> 16) & 0xf;
3326 if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC && rn != ARM_VFP_MVFR2
3327 && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0) {
3332 if (extract32(insn, 28, 4) == 0xf) {
3333 /* Encodings with T=1 (Thumb) or unconditional (ARM):
3334 * only used in v8 and above.
3336 return disas_vfp_v8_insn(s, insn);
3339 dp = ((insn & 0xf00) == 0xb00);
3340 switch ((insn >> 24) & 0xf) {
3342 if (insn & (1 << 4)) {
3343 /* single register transfer */
3344 rd = (insn >> 12) & 0xf;
3349 VFP_DREG_N(rn, insn);
3352 if (insn & 0x00c00060
3353 && !arm_dc_feature(s, ARM_FEATURE_NEON)) {
3357 pass = (insn >> 21) & 1;
3358 if (insn & (1 << 22)) {
3360 offset = ((insn >> 5) & 3) * 8;
3361 } else if (insn & (1 << 5)) {
3363 offset = (insn & (1 << 6)) ? 16 : 0;
3368 if (insn & ARM_CP_RW_BIT) {
3370 tmp = neon_load_reg(rn, pass);
3374 tcg_gen_shri_i32(tmp, tmp, offset);
3375 if (insn & (1 << 23))
3381 if (insn & (1 << 23)) {
3383 tcg_gen_shri_i32(tmp, tmp, 16);
3389 tcg_gen_sari_i32(tmp, tmp, 16);
3398 store_reg(s, rd, tmp);
3401 tmp = load_reg(s, rd);
3402 if (insn & (1 << 23)) {
3405 gen_neon_dup_u8(tmp, 0);
3406 } else if (size == 1) {
3407 gen_neon_dup_low16(tmp);
3409 for (n = 0; n <= pass * 2; n++) {
3410 tmp2 = tcg_temp_new_i32();
3411 tcg_gen_mov_i32(tmp2, tmp);
3412 neon_store_reg(rn, n, tmp2);
3414 neon_store_reg(rn, n, tmp);
3419 tmp2 = neon_load_reg(rn, pass);
3420 tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
3421 tcg_temp_free_i32(tmp2);
3424 tmp2 = neon_load_reg(rn, pass);
3425 tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
3426 tcg_temp_free_i32(tmp2);
3431 neon_store_reg(rn, pass, tmp);
3435 if ((insn & 0x6f) != 0x00)
3437 rn = VFP_SREG_N(insn);
3438 if (insn & ARM_CP_RW_BIT) {
3440 if (insn & (1 << 21)) {
3441 /* system register */
3446 /* VFP2 allows access to FSID from userspace.
3447 VFP3 restricts all id registers to privileged
3450 && arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3453 tmp = load_cpu_field(vfp.xregs[rn]);
3458 tmp = load_cpu_field(vfp.xregs[rn]);
3460 case ARM_VFP_FPINST:
3461 case ARM_VFP_FPINST2:
3462 /* Not present in VFP3. */
3464 || arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3467 tmp = load_cpu_field(vfp.xregs[rn]);
3471 tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
3472 tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
3474 tmp = tcg_temp_new_i32();
3475 gen_helper_vfp_get_fpscr(tmp, cpu_env);
3479 if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
3486 || !arm_dc_feature(s, ARM_FEATURE_MVFR)) {
3489 tmp = load_cpu_field(vfp.xregs[rn]);
3495 gen_mov_F0_vreg(0, rn);
3496 tmp = gen_vfp_mrs();
3499 /* Set the 4 flag bits in the CPSR. */
3501 tcg_temp_free_i32(tmp);
3503 store_reg(s, rd, tmp);
3507 if (insn & (1 << 21)) {
3509 /* system register */
3514 /* Writes are ignored. */
3517 tmp = load_reg(s, rd);
3518 gen_helper_vfp_set_fpscr(cpu_env, tmp);
3519 tcg_temp_free_i32(tmp);
3525 /* TODO: VFP subarchitecture support.
3526 * For now, keep the EN bit only */
3527 tmp = load_reg(s, rd);
3528 tcg_gen_andi_i32(tmp, tmp, 1 << 30);
3529 store_cpu_field(tmp, vfp.xregs[rn]);
3532 case ARM_VFP_FPINST:
3533 case ARM_VFP_FPINST2:
3537 tmp = load_reg(s, rd);
3538 store_cpu_field(tmp, vfp.xregs[rn]);
3544 tmp = load_reg(s, rd);
3546 gen_mov_vreg_F0(0, rn);
3551 /* data processing */
3552 /* The opcode is in bits 23, 21, 20 and 6. */
3553 op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
3557 rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
3559 /* rn is register number */
3560 VFP_DREG_N(rn, insn);
3563 if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
3564 ((rn & 0x1e) == 0x6))) {
3565 /* Integer or single/half precision destination. */
3566 rd = VFP_SREG_D(insn);
3568 VFP_DREG_D(rd, insn);
3571 (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
3572 ((rn & 0x1e) == 0x4))) {
3573 /* VCVT from int or half precision is always from S reg
3574 * regardless of dp bit. VCVT with immediate frac_bits
3575 * has same format as SREG_M.
3577 rm = VFP_SREG_M(insn);
3579 VFP_DREG_M(rm, insn);
3582 rn = VFP_SREG_N(insn);
3583 if (op == 15 && rn == 15) {
3584 /* Double precision destination. */
3585 VFP_DREG_D(rd, insn);
3587 rd = VFP_SREG_D(insn);
3589 /* NB that we implicitly rely on the encoding for the frac_bits
3590 * in VCVT of fixed to float being the same as that of an SREG_M
3592 rm = VFP_SREG_M(insn);
3595 veclen = s->vec_len;
3596 if (op == 15 && rn > 3)
3599 /* Shut up compiler warnings. */
3610 /* Figure out what type of vector operation this is. */
3611 if ((rd & bank_mask) == 0) {
3616 delta_d = (s->vec_stride >> 1) + 1;
3618 delta_d = s->vec_stride + 1;
3620 if ((rm & bank_mask) == 0) {
3621 /* mixed scalar/vector */
3630 /* Load the initial operands. */
3635 /* Integer source */
3636 gen_mov_F0_vreg(0, rm);
3641 gen_mov_F0_vreg(dp, rd);
3642 gen_mov_F1_vreg(dp, rm);
3646 /* Compare with zero */
3647 gen_mov_F0_vreg(dp, rd);
3658 /* Source and destination the same. */
3659 gen_mov_F0_vreg(dp, rd);
3665 /* VCVTB, VCVTT: only present with the halfprec extension
3666 * UNPREDICTABLE if bit 8 is set prior to ARMv8
3667 * (we choose to UNDEF)
3669 if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
3670 !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
3673 if (!extract32(rn, 1, 1)) {
3674 /* Half precision source. */
3675 gen_mov_F0_vreg(0, rm);
3678 /* Otherwise fall through */
3680 /* One source operand. */
3681 gen_mov_F0_vreg(dp, rm);
3685 /* Two source operands. */
3686 gen_mov_F0_vreg(dp, rn);
3687 gen_mov_F1_vreg(dp, rm);
3691 /* Perform the calculation. */
3693 case 0: /* VMLA: fd + (fn * fm) */
3694 /* Note that order of inputs to the add matters for NaNs */
3696 gen_mov_F0_vreg(dp, rd);
3699 case 1: /* VMLS: fd + -(fn * fm) */
3702 gen_mov_F0_vreg(dp, rd);
3705 case 2: /* VNMLS: -fd + (fn * fm) */
3706 /* Note that it isn't valid to replace (-A + B) with (B - A)
3707 * or similar plausible looking simplifications
3708 * because this will give wrong results for NaNs.
3711 gen_mov_F0_vreg(dp, rd);
3715 case 3: /* VNMLA: -fd + -(fn * fm) */
3718 gen_mov_F0_vreg(dp, rd);
3722 case 4: /* mul: fn * fm */
3725 case 5: /* nmul: -(fn * fm) */
3729 case 6: /* add: fn + fm */
3732 case 7: /* sub: fn - fm */
3735 case 8: /* div: fn / fm */
3738 case 10: /* VFNMA : fd = muladd(-fd, fn, fm) */
3739 case 11: /* VFNMS : fd = muladd(-fd, -fn, fm) */
3740 case 12: /* VFMA : fd = muladd( fd, fn, fm) */
3741 case 13: /* VFMS : fd = muladd( fd, -fn, fm) */
3742 /* These are fused multiply-add, and must be done as one
3743 * floating point operation with no rounding between the
3744 * multiplication and addition steps.
3745 * NB that doing the negations here as separate steps is
3746 * correct : an input NaN should come out with its sign bit
3747 * flipped if it is a negated-input.
3749 if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
3757 gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
3759 frd = tcg_temp_new_i64();
3760 tcg_gen_ld_f64(frd, cpu_env, vfp_reg_offset(dp, rd));
3763 gen_helper_vfp_negd(frd, frd);
3765 fpst = get_fpstatus_ptr(0);
3766 gen_helper_vfp_muladdd(cpu_F0d, cpu_F0d,
3767 cpu_F1d, frd, fpst);
3768 tcg_temp_free_ptr(fpst);
3769 tcg_temp_free_i64(frd);
3775 gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
3777 frd = tcg_temp_new_i32();
3778 tcg_gen_ld_f32(frd, cpu_env, vfp_reg_offset(dp, rd));
3780 gen_helper_vfp_negs(frd, frd);
3782 fpst = get_fpstatus_ptr(0);
3783 gen_helper_vfp_muladds(cpu_F0s, cpu_F0s,
3784 cpu_F1s, frd, fpst);
3785 tcg_temp_free_ptr(fpst);
3786 tcg_temp_free_i32(frd);
3789 case 14: /* fconst */
3790 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3794 n = (insn << 12) & 0x80000000;
3795 i = ((insn >> 12) & 0x70) | (insn & 0xf);
3802 tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3809 tcg_gen_movi_i32(cpu_F0s, n);
3812 case 15: /* extension space */
3826 case 4: /* vcvtb.f32.f16, vcvtb.f64.f16 */
3827 tmp = gen_vfp_mrs();
3828 tcg_gen_ext16u_i32(tmp, tmp);
3830 gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3833 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3836 tcg_temp_free_i32(tmp);
3838 case 5: /* vcvtt.f32.f16, vcvtt.f64.f16 */
3839 tmp = gen_vfp_mrs();
3840 tcg_gen_shri_i32(tmp, tmp, 16);
3842 gen_helper_vfp_fcvt_f16_to_f64(cpu_F0d, tmp,
3845 gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp,
3848 tcg_temp_free_i32(tmp);
3850 case 6: /* vcvtb.f16.f32, vcvtb.f16.f64 */
3851 tmp = tcg_temp_new_i32();
3853 gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3856 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3859 gen_mov_F0_vreg(0, rd);
3860 tmp2 = gen_vfp_mrs();
3861 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
3862 tcg_gen_or_i32(tmp, tmp, tmp2);
3863 tcg_temp_free_i32(tmp2);
3866 case 7: /* vcvtt.f16.f32, vcvtt.f16.f64 */
3867 tmp = tcg_temp_new_i32();
3869 gen_helper_vfp_fcvt_f64_to_f16(tmp, cpu_F0d,
3872 gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s,
3875 tcg_gen_shli_i32(tmp, tmp, 16);
3876 gen_mov_F0_vreg(0, rd);
3877 tmp2 = gen_vfp_mrs();
3878 tcg_gen_ext16u_i32(tmp2, tmp2);
3879 tcg_gen_or_i32(tmp, tmp, tmp2);
3880 tcg_temp_free_i32(tmp2);
3892 case 11: /* cmpez */
3896 case 12: /* vrintr */
3898 TCGv_ptr fpst = get_fpstatus_ptr(0);
3900 gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3902 gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3904 tcg_temp_free_ptr(fpst);
3907 case 13: /* vrintz */
3909 TCGv_ptr fpst = get_fpstatus_ptr(0);
3911 tcg_rmode = tcg_const_i32(float_round_to_zero);
3912 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3914 gen_helper_rintd(cpu_F0d, cpu_F0d, fpst);
3916 gen_helper_rints(cpu_F0s, cpu_F0s, fpst);
3918 gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
3919 tcg_temp_free_i32(tcg_rmode);
3920 tcg_temp_free_ptr(fpst);
3923 case 14: /* vrintx */
3925 TCGv_ptr fpst = get_fpstatus_ptr(0);
3927 gen_helper_rintd_exact(cpu_F0d, cpu_F0d, fpst);
3929 gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpst);
3931 tcg_temp_free_ptr(fpst);
3934 case 15: /* single<->double conversion */
3936 gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
3938 gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
3940 case 16: /* fuito */
3941 gen_vfp_uito(dp, 0);
3943 case 17: /* fsito */
3944 gen_vfp_sito(dp, 0);
3946 case 20: /* fshto */
3947 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3950 gen_vfp_shto(dp, 16 - rm, 0);
3952 case 21: /* fslto */
3953 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3956 gen_vfp_slto(dp, 32 - rm, 0);
3958 case 22: /* fuhto */
3959 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3962 gen_vfp_uhto(dp, 16 - rm, 0);
3964 case 23: /* fulto */
3965 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3968 gen_vfp_ulto(dp, 32 - rm, 0);
3970 case 24: /* ftoui */
3971 gen_vfp_toui(dp, 0);
3973 case 25: /* ftouiz */
3974 gen_vfp_touiz(dp, 0);
3976 case 26: /* ftosi */
3977 gen_vfp_tosi(dp, 0);
3979 case 27: /* ftosiz */
3980 gen_vfp_tosiz(dp, 0);
3982 case 28: /* ftosh */
3983 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3986 gen_vfp_tosh(dp, 16 - rm, 0);
3988 case 29: /* ftosl */
3989 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3992 gen_vfp_tosl(dp, 32 - rm, 0);
3994 case 30: /* ftouh */
3995 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
3998 gen_vfp_touh(dp, 16 - rm, 0);
4000 case 31: /* ftoul */
4001 if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
4004 gen_vfp_toul(dp, 32 - rm, 0);
4006 default: /* undefined */
4010 default: /* undefined */
4014 /* Write back the result. */
4015 if (op == 15 && (rn >= 8 && rn <= 11)) {
4016 /* Comparison, do nothing. */
4017 } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
4018 (rn & 0x1e) == 0x6)) {
4019 /* VCVT double to int: always integer result.
4020 * VCVT double to half precision is always a single
4023 gen_mov_vreg_F0(0, rd);
4024 } else if (op == 15 && rn == 15) {
4026 gen_mov_vreg_F0(!dp, rd);
4028 gen_mov_vreg_F0(dp, rd);
4031 /* break out of the loop if we have finished */
4035 if (op == 15 && delta_m == 0) {
4036 /* single source one-many */
4038 rd = ((rd + delta_d) & (bank_mask - 1))
4040 gen_mov_vreg_F0(dp, rd);
4044 /* Setup the next operands. */
4046 rd = ((rd + delta_d) & (bank_mask - 1))
4050 /* One source operand. */
4051 rm = ((rm + delta_m) & (bank_mask - 1))
4053 gen_mov_F0_vreg(dp, rm);
4055 /* Two source operands. */
4056 rn = ((rn + delta_d) & (bank_mask - 1))
4058 gen_mov_F0_vreg(dp, rn);
4060 rm = ((rm + delta_m) & (bank_mask - 1))
4062 gen_mov_F1_vreg(dp, rm);
4070 if ((insn & 0x03e00000) == 0x00400000) {
4071 /* two-register transfer */
4072 rn = (insn >> 16) & 0xf;
4073 rd = (insn >> 12) & 0xf;
4075 VFP_DREG_M(rm, insn);
4077 rm = VFP_SREG_M(insn);
4080 if (insn & ARM_CP_RW_BIT) {
4083 gen_mov_F0_vreg(0, rm * 2);
4084 tmp = gen_vfp_mrs();
4085 store_reg(s, rd, tmp);
4086 gen_mov_F0_vreg(0, rm * 2 + 1);
4087 tmp = gen_vfp_mrs();
4088 store_reg(s, rn, tmp);
4090 gen_mov_F0_vreg(0, rm);
4091 tmp = gen_vfp_mrs();
4092 store_reg(s, rd, tmp);
4093 gen_mov_F0_vreg(0, rm + 1);
4094 tmp = gen_vfp_mrs();
4095 store_reg(s, rn, tmp);
4100 tmp = load_reg(s, rd);
4102 gen_mov_vreg_F0(0, rm * 2);
4103 tmp = load_reg(s, rn);
4105 gen_mov_vreg_F0(0, rm * 2 + 1);
4107 tmp = load_reg(s, rd);
4109 gen_mov_vreg_F0(0, rm);
4110 tmp = load_reg(s, rn);
4112 gen_mov_vreg_F0(0, rm + 1);
4117 rn = (insn >> 16) & 0xf;
4119 VFP_DREG_D(rd, insn);
4121 rd = VFP_SREG_D(insn);
4122 if ((insn & 0x01200000) == 0x01000000) {
4123 /* Single load/store */
4124 offset = (insn & 0xff) << 2;
4125 if ((insn & (1 << 23)) == 0)
4127 if (s->thumb && rn == 15) {
4128 /* This is actually UNPREDICTABLE */
4129 addr = tcg_temp_new_i32();
4130 tcg_gen_movi_i32(addr, s->pc & ~2);
4132 addr = load_reg(s, rn);
4134 tcg_gen_addi_i32(addr, addr, offset);
4135 if (insn & (1 << 20)) {
4136 gen_vfp_ld(s, dp, addr);
4137 gen_mov_vreg_F0(dp, rd);
4139 gen_mov_F0_vreg(dp, rd);
4140 gen_vfp_st(s, dp, addr);
4142 tcg_temp_free_i32(addr);
4144 /* load/store multiple */
4145 int w = insn & (1 << 21);
4147 n = (insn >> 1) & 0x7f;
4151 if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
4152 /* P == U , W == 1 => UNDEF */
4155 if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
4156 /* UNPREDICTABLE cases for bad immediates: we choose to
4157 * UNDEF to avoid generating huge numbers of TCG ops
4161 if (rn == 15 && w) {
4162 /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
4166 if (s->thumb && rn == 15) {
4167 /* This is actually UNPREDICTABLE */
4168 addr = tcg_temp_new_i32();
4169 tcg_gen_movi_i32(addr, s->pc & ~2);
4171 addr = load_reg(s, rn);
4173 if (insn & (1 << 24)) /* pre-decrement */
4174 tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
4180 for (i = 0; i < n; i++) {
4181 if (insn & ARM_CP_RW_BIT) {
4183 gen_vfp_ld(s, dp, addr);
4184 gen_mov_vreg_F0(dp, rd + i);
4187 gen_mov_F0_vreg(dp, rd + i);
4188 gen_vfp_st(s, dp, addr);
4190 tcg_gen_addi_i32(addr, addr, offset);
4194 if (insn & (1 << 24))
4195 offset = -offset * n;
4196 else if (dp && (insn & 1))
4202 tcg_gen_addi_i32(addr, addr, offset);
4203 store_reg(s, rn, addr);
4205 tcg_temp_free_i32(addr);
4211 /* Should never happen. */
4217 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
4219 #ifndef CONFIG_USER_ONLY
4220 return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
4221 ((s->pc - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
4227 static void gen_goto_ptr(void)
4229 tcg_gen_lookup_and_goto_ptr();
4232 /* This will end the TB but doesn't guarantee we'll return to
4233 * cpu_loop_exec. Any live exit_requests will be processed as we
4234 * enter the next TB.
4236 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
4238 if (use_goto_tb(s, dest)) {
4240 gen_set_pc_im(s, dest);
4241 tcg_gen_exit_tb((uintptr_t)s->base.tb + n);
4243 gen_set_pc_im(s, dest);
4246 s->base.is_jmp = DISAS_NORETURN;
4249 static inline void gen_jmp (DisasContext *s, uint32_t dest)
4251 if (unlikely(is_singlestepping(s))) {
4252 /* An indirect jump so that we still trigger the debug exception. */
4257 gen_goto_tb(s, 0, dest);
4261 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
4264 tcg_gen_sari_i32(t0, t0, 16);
4268 tcg_gen_sari_i32(t1, t1, 16);
4271 tcg_gen_mul_i32(t0, t0, t1);
4274 /* Return the mask of PSR bits set by a MSR instruction. */
4275 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
4280 if (flags & (1 << 0))
4282 if (flags & (1 << 1))
4284 if (flags & (1 << 2))
4286 if (flags & (1 << 3))
4289 /* Mask out undefined bits. */
4290 mask &= ~CPSR_RESERVED;
4291 if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
4294 if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
4295 mask &= ~CPSR_Q; /* V5TE in reality*/
4297 if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
4298 mask &= ~(CPSR_E | CPSR_GE);
4300 if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
4303 /* Mask out execution state and reserved bits. */
4305 mask &= ~(CPSR_EXEC | CPSR_RESERVED);
4307 /* Mask out privileged bits. */
4313 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
4314 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
4318 /* ??? This is also undefined in system mode. */
4322 tmp = load_cpu_field(spsr);
4323 tcg_gen_andi_i32(tmp, tmp, ~mask);
4324 tcg_gen_andi_i32(t0, t0, mask);
4325 tcg_gen_or_i32(tmp, tmp, t0);
4326 store_cpu_field(tmp, spsr);
4328 gen_set_cpsr(t0, mask);
4330 tcg_temp_free_i32(t0);
4335 /* Returns nonzero if access to the PSR is not permitted. */
4336 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
4339 tmp = tcg_temp_new_i32();
4340 tcg_gen_movi_i32(tmp, val);
4341 return gen_set_psr(s, mask, spsr, tmp);
4344 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
4345 int *tgtmode, int *regno)
4347 /* Decode the r and sysm fields of MSR/MRS banked accesses into
4348 * the target mode and register number, and identify the various
4349 * unpredictable cases.
4350 * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
4351 * + executed in user mode
4352 * + using R15 as the src/dest register
4353 * + accessing an unimplemented register
4354 * + accessing a register that's inaccessible at current PL/security state*
4355 * + accessing a register that you could access with a different insn
4356 * We choose to UNDEF in all these cases.
4357 * Since we don't know which of the various AArch32 modes we are in
4358 * we have to defer some checks to runtime.
4359 * Accesses to Monitor mode registers from Secure EL1 (which implies
4360 * that EL3 is AArch64) must trap to EL3.
4362 * If the access checks fail this function will emit code to take
4363 * an exception and return false. Otherwise it will return true,
4364 * and set *tgtmode and *regno appropriately.
4366 int exc_target = default_exception_el(s);
4368 /* These instructions are present only in ARMv8, or in ARMv7 with the
4369 * Virtualization Extensions.
4371 if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
4372 !arm_dc_feature(s, ARM_FEATURE_EL2)) {
4376 if (IS_USER(s) || rn == 15) {
4380 /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
4381 * of registers into (r, sysm).
4384 /* SPSRs for other modes */
4386 case 0xe: /* SPSR_fiq */
4387 *tgtmode = ARM_CPU_MODE_FIQ;
4389 case 0x10: /* SPSR_irq */
4390 *tgtmode = ARM_CPU_MODE_IRQ;
4392 case 0x12: /* SPSR_svc */
4393 *tgtmode = ARM_CPU_MODE_SVC;
4395 case 0x14: /* SPSR_abt */
4396 *tgtmode = ARM_CPU_MODE_ABT;
4398 case 0x16: /* SPSR_und */
4399 *tgtmode = ARM_CPU_MODE_UND;
4401 case 0x1c: /* SPSR_mon */
4402 *tgtmode = ARM_CPU_MODE_MON;
4404 case 0x1e: /* SPSR_hyp */
4405 *tgtmode = ARM_CPU_MODE_HYP;
4407 default: /* unallocated */
4410 /* We arbitrarily assign SPSR a register number of 16. */
4413 /* general purpose registers for other modes */
4415 case 0x0 ... 0x6: /* 0b00xxx : r8_usr ... r14_usr */
4416 *tgtmode = ARM_CPU_MODE_USR;
4419 case 0x8 ... 0xe: /* 0b01xxx : r8_fiq ... r14_fiq */
4420 *tgtmode = ARM_CPU_MODE_FIQ;
4423 case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
4424 *tgtmode = ARM_CPU_MODE_IRQ;
4425 *regno = sysm & 1 ? 13 : 14;
4427 case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
4428 *tgtmode = ARM_CPU_MODE_SVC;
4429 *regno = sysm & 1 ? 13 : 14;
4431 case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
4432 *tgtmode = ARM_CPU_MODE_ABT;
4433 *regno = sysm & 1 ? 13 : 14;
4435 case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
4436 *tgtmode = ARM_CPU_MODE_UND;
4437 *regno = sysm & 1 ? 13 : 14;
4439 case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
4440 *tgtmode = ARM_CPU_MODE_MON;
4441 *regno = sysm & 1 ? 13 : 14;
4443 case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
4444 *tgtmode = ARM_CPU_MODE_HYP;
4445 /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
4446 *regno = sysm & 1 ? 13 : 17;
4448 default: /* unallocated */
4453 /* Catch the 'accessing inaccessible register' cases we can detect
4454 * at translate time.
4457 case ARM_CPU_MODE_MON:
4458 if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
4461 if (s->current_el == 1) {
4462 /* If we're in Secure EL1 (which implies that EL3 is AArch64)
4463 * then accesses to Mon registers trap to EL3
4469 case ARM_CPU_MODE_HYP:
4470 /* Note that we can forbid accesses from EL2 here because they
4471 * must be from Hyp mode itself
4473 if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 3) {
4484 /* If we get here then some access check did not pass */
4485 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), exc_target);
4489 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
4491 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
4492 int tgtmode = 0, regno = 0;
4494 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) {
4498 /* Sync state because msr_banked() can raise exceptions */
4499 gen_set_condexec(s);
4500 gen_set_pc_im(s, s->pc - 4);
4501 tcg_reg = load_reg(s, rn);
4502 tcg_tgtmode = tcg_const_i32(tgtmode);
4503 tcg_regno = tcg_const_i32(regno);
4504 gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
4505 tcg_temp_free_i32(tcg_tgtmode);
4506 tcg_temp_free_i32(tcg_regno);
4507 tcg_temp_free_i32(tcg_reg);
4508 s->base.is_jmp = DISAS_UPDATE;
4511 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
4513 TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
4514 int tgtmode = 0, regno = 0;
4516 if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, ®no)) {
4520 /* Sync state because mrs_banked() can raise exceptions */
4521 gen_set_condexec(s);
4522 gen_set_pc_im(s, s->pc - 4);
4523 tcg_reg = tcg_temp_new_i32();
4524 tcg_tgtmode = tcg_const_i32(tgtmode);
4525 tcg_regno = tcg_const_i32(regno);
4526 gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
4527 tcg_temp_free_i32(tcg_tgtmode);
4528 tcg_temp_free_i32(tcg_regno);
4529 store_reg(s, rn, tcg_reg);
4530 s->base.is_jmp = DISAS_UPDATE;
4533 /* Store value to PC as for an exception return (ie don't
4534 * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
4535 * will do the masking based on the new value of the Thumb bit.
4537 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
4539 tcg_gen_mov_i32(cpu_R[15], pc);
4540 tcg_temp_free_i32(pc);
4543 /* Generate a v6 exception return. Marks both values as dead. */
4544 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
4546 store_pc_exc_ret(s, pc);
4547 /* The cpsr_write_eret helper will mask the low bits of PC
4548 * appropriately depending on the new Thumb bit, so it must
4549 * be called after storing the new PC.
4551 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
4554 gen_helper_cpsr_write_eret(cpu_env, cpsr);
4555 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
4558 tcg_temp_free_i32(cpsr);
4559 /* Must exit loop to check un-masked IRQs */
4560 s->base.is_jmp = DISAS_EXIT;
4563 /* Generate an old-style exception return. Marks pc as dead. */
4564 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
4566 gen_rfe(s, pc, load_cpu_field(spsr));
4570 * For WFI we will halt the vCPU until an IRQ. For WFE and YIELD we
4571 * only call the helper when running single threaded TCG code to ensure
4572 * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
4573 * just skip this instruction. Currently the SEV/SEVL instructions
4574 * which are *one* of many ways to wake the CPU from WFE are not
4575 * implemented so we can't sleep like WFI does.
4577 static void gen_nop_hint(DisasContext *s, int val)
4580 /* When running in MTTCG we don't generate jumps to the yield and
4581 * WFE helpers as it won't affect the scheduling of other vCPUs.
4582 * If we wanted to more completely model WFE/SEV so we don't busy
4583 * spin unnecessarily we would need to do something more involved.
4586 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
4587 gen_set_pc_im(s, s->pc);
4588 s->base.is_jmp = DISAS_YIELD;
4592 gen_set_pc_im(s, s->pc);
4593 s->base.is_jmp = DISAS_WFI;
4596 if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
4597 gen_set_pc_im(s, s->pc);
4598 s->base.is_jmp = DISAS_WFE;
4603 /* TODO: Implement SEV, SEVL and WFE. May help SMP performance. */
4609 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
4611 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
4614 case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
4615 case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
4616 case 2: tcg_gen_add_i32(t0, t0, t1); break;
4621 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
4624 case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
4625 case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
4626 case 2: tcg_gen_sub_i32(t0, t1, t0); break;
4631 /* 32-bit pairwise ops end up the same as the elementwise versions. */
4632 #define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32
4633 #define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32
4634 #define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32
4635 #define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32
4637 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
4638 switch ((size << 1) | u) { \
4640 gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
4643 gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
4646 gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
4649 gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
4652 gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
4655 gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
4657 default: return 1; \
4660 #define GEN_NEON_INTEGER_OP(name) do { \
4661 switch ((size << 1) | u) { \
4663 gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
4666 gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
4669 gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
4672 gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
4675 gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
4678 gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
4680 default: return 1; \
4683 static TCGv_i32 neon_load_scratch(int scratch)
4685 TCGv_i32 tmp = tcg_temp_new_i32();
4686 tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4690 static void neon_store_scratch(int scratch, TCGv_i32 var)
4692 tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
4693 tcg_temp_free_i32(var);
4696 static inline TCGv_i32 neon_get_scalar(int size, int reg)
4700 tmp = neon_load_reg(reg & 7, reg >> 4);
4702 gen_neon_dup_high16(tmp);
4704 gen_neon_dup_low16(tmp);
4707 tmp = neon_load_reg(reg & 15, reg >> 4);
4712 static int gen_neon_unzip(int rd, int rm, int size, int q)
4716 if (!q && size == 2) {
4719 pd = vfp_reg_ptr(true, rd);
4720 pm = vfp_reg_ptr(true, rm);
4724 gen_helper_neon_qunzip8(pd, pm);
4727 gen_helper_neon_qunzip16(pd, pm);
4730 gen_helper_neon_qunzip32(pd, pm);
4738 gen_helper_neon_unzip8(pd, pm);
4741 gen_helper_neon_unzip16(pd, pm);
4747 tcg_temp_free_ptr(pd);
4748 tcg_temp_free_ptr(pm);
4752 static int gen_neon_zip(int rd, int rm, int size, int q)
4756 if (!q && size == 2) {
4759 pd = vfp_reg_ptr(true, rd);
4760 pm = vfp_reg_ptr(true, rm);
4764 gen_helper_neon_qzip8(pd, pm);
4767 gen_helper_neon_qzip16(pd, pm);
4770 gen_helper_neon_qzip32(pd, pm);
4778 gen_helper_neon_zip8(pd, pm);
4781 gen_helper_neon_zip16(pd, pm);
4787 tcg_temp_free_ptr(pd);
4788 tcg_temp_free_ptr(pm);
4792 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
4796 rd = tcg_temp_new_i32();
4797 tmp = tcg_temp_new_i32();
4799 tcg_gen_shli_i32(rd, t0, 8);
4800 tcg_gen_andi_i32(rd, rd, 0xff00ff00);
4801 tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
4802 tcg_gen_or_i32(rd, rd, tmp);
4804 tcg_gen_shri_i32(t1, t1, 8);
4805 tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
4806 tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
4807 tcg_gen_or_i32(t1, t1, tmp);
4808 tcg_gen_mov_i32(t0, rd);
4810 tcg_temp_free_i32(tmp);
4811 tcg_temp_free_i32(rd);
4814 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
4818 rd = tcg_temp_new_i32();
4819 tmp = tcg_temp_new_i32();
4821 tcg_gen_shli_i32(rd, t0, 16);
4822 tcg_gen_andi_i32(tmp, t1, 0xffff);
4823 tcg_gen_or_i32(rd, rd, tmp);
4824 tcg_gen_shri_i32(t1, t1, 16);
4825 tcg_gen_andi_i32(tmp, t0, 0xffff0000);
4826 tcg_gen_or_i32(t1, t1, tmp);
4827 tcg_gen_mov_i32(t0, rd);
4829 tcg_temp_free_i32(tmp);
4830 tcg_temp_free_i32(rd);
4838 } neon_ls_element_type[11] = {
4852 /* Translate a NEON load/store element instruction. Return nonzero if the
4853 instruction is invalid. */
4854 static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
4873 /* FIXME: this access check should not take precedence over UNDEF
4874 * for invalid encodings; we will generate incorrect syndrome information
4875 * for attempts to execute invalid vfp/neon encodings with FP disabled.
4877 if (s->fp_excp_el) {
4878 gen_exception_insn(s, 4, EXCP_UDEF,
4879 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
4883 if (!s->vfp_enabled)
4885 VFP_DREG_D(rd, insn);
4886 rn = (insn >> 16) & 0xf;
4888 load = (insn & (1 << 21)) != 0;
4889 if ((insn & (1 << 23)) == 0) {
4890 /* Load store all elements. */
4891 op = (insn >> 8) & 0xf;
4892 size = (insn >> 6) & 3;
4895 /* Catch UNDEF cases for bad values of align field */
4898 if (((insn >> 5) & 1) == 1) {
4903 if (((insn >> 4) & 3) == 3) {
4910 nregs = neon_ls_element_type[op].nregs;
4911 interleave = neon_ls_element_type[op].interleave;
4912 spacing = neon_ls_element_type[op].spacing;
4913 if (size == 3 && (interleave | spacing) != 1)
4915 addr = tcg_temp_new_i32();
4916 load_reg_var(s, addr, rn);
4917 stride = (1 << size) * interleave;
4918 for (reg = 0; reg < nregs; reg++) {
4919 if (interleave > 2 || (interleave == 2 && nregs == 2)) {
4920 load_reg_var(s, addr, rn);
4921 tcg_gen_addi_i32(addr, addr, (1 << size) * reg);
4922 } else if (interleave == 2 && nregs == 4 && reg == 2) {
4923 load_reg_var(s, addr, rn);
4924 tcg_gen_addi_i32(addr, addr, 1 << size);
4927 tmp64 = tcg_temp_new_i64();
4929 gen_aa32_ld64(s, tmp64, addr, get_mem_index(s));
4930 neon_store_reg64(tmp64, rd);
4932 neon_load_reg64(tmp64, rd);
4933 gen_aa32_st64(s, tmp64, addr, get_mem_index(s));
4935 tcg_temp_free_i64(tmp64);
4936 tcg_gen_addi_i32(addr, addr, stride);
4938 for (pass = 0; pass < 2; pass++) {
4941 tmp = tcg_temp_new_i32();
4942 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
4943 neon_store_reg(rd, pass, tmp);
4945 tmp = neon_load_reg(rd, pass);
4946 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
4947 tcg_temp_free_i32(tmp);
4949 tcg_gen_addi_i32(addr, addr, stride);
4950 } else if (size == 1) {
4952 tmp = tcg_temp_new_i32();
4953 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
4954 tcg_gen_addi_i32(addr, addr, stride);
4955 tmp2 = tcg_temp_new_i32();
4956 gen_aa32_ld16u(s, tmp2, addr, get_mem_index(s));
4957 tcg_gen_addi_i32(addr, addr, stride);
4958 tcg_gen_shli_i32(tmp2, tmp2, 16);
4959 tcg_gen_or_i32(tmp, tmp, tmp2);
4960 tcg_temp_free_i32(tmp2);
4961 neon_store_reg(rd, pass, tmp);
4963 tmp = neon_load_reg(rd, pass);
4964 tmp2 = tcg_temp_new_i32();
4965 tcg_gen_shri_i32(tmp2, tmp, 16);
4966 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
4967 tcg_temp_free_i32(tmp);
4968 tcg_gen_addi_i32(addr, addr, stride);
4969 gen_aa32_st16(s, tmp2, addr, get_mem_index(s));
4970 tcg_temp_free_i32(tmp2);
4971 tcg_gen_addi_i32(addr, addr, stride);
4973 } else /* size == 0 */ {
4976 for (n = 0; n < 4; n++) {
4977 tmp = tcg_temp_new_i32();
4978 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
4979 tcg_gen_addi_i32(addr, addr, stride);
4983 tcg_gen_shli_i32(tmp, tmp, n * 8);
4984 tcg_gen_or_i32(tmp2, tmp2, tmp);
4985 tcg_temp_free_i32(tmp);
4988 neon_store_reg(rd, pass, tmp2);
4990 tmp2 = neon_load_reg(rd, pass);
4991 for (n = 0; n < 4; n++) {
4992 tmp = tcg_temp_new_i32();
4994 tcg_gen_mov_i32(tmp, tmp2);
4996 tcg_gen_shri_i32(tmp, tmp2, n * 8);
4998 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
4999 tcg_temp_free_i32(tmp);
5000 tcg_gen_addi_i32(addr, addr, stride);
5002 tcg_temp_free_i32(tmp2);
5009 tcg_temp_free_i32(addr);
5012 size = (insn >> 10) & 3;
5014 /* Load single element to all lanes. */
5015 int a = (insn >> 4) & 1;
5019 size = (insn >> 6) & 3;
5020 nregs = ((insn >> 8) & 3) + 1;
5023 if (nregs != 4 || a == 0) {
5026 /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
5029 if (nregs == 1 && a == 1 && size == 0) {
5032 if (nregs == 3 && a == 1) {
5035 addr = tcg_temp_new_i32();
5036 load_reg_var(s, addr, rn);
5038 /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
5039 tmp = gen_load_and_replicate(s, addr, size);
5040 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
5041 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
5042 if (insn & (1 << 5)) {
5043 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
5044 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
5046 tcg_temp_free_i32(tmp);
5048 /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
5049 stride = (insn & (1 << 5)) ? 2 : 1;
5050 for (reg = 0; reg < nregs; reg++) {
5051 tmp = gen_load_and_replicate(s, addr, size);
5052 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
5053 tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
5054 tcg_temp_free_i32(tmp);
5055 tcg_gen_addi_i32(addr, addr, 1 << size);
5059 tcg_temp_free_i32(addr);
5060 stride = (1 << size) * nregs;
5062 /* Single element. */
5063 int idx = (insn >> 4) & 0xf;
5064 pass = (insn >> 7) & 1;
5067 shift = ((insn >> 5) & 3) * 8;
5071 shift = ((insn >> 6) & 1) * 16;
5072 stride = (insn & (1 << 5)) ? 2 : 1;
5076 stride = (insn & (1 << 6)) ? 2 : 1;
5081 nregs = ((insn >> 8) & 3) + 1;
5082 /* Catch the UNDEF cases. This is unavoidably a bit messy. */
5085 if (((idx & (1 << size)) != 0) ||
5086 (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
5091 if ((idx & 1) != 0) {
5096 if (size == 2 && (idx & 2) != 0) {
5101 if ((size == 2) && ((idx & 3) == 3)) {
5108 if ((rd + stride * (nregs - 1)) > 31) {
5109 /* Attempts to write off the end of the register file
5110 * are UNPREDICTABLE; we choose to UNDEF because otherwise
5111 * the neon_load_reg() would write off the end of the array.
5115 addr = tcg_temp_new_i32();
5116 load_reg_var(s, addr, rn);
5117 for (reg = 0; reg < nregs; reg++) {
5119 tmp = tcg_temp_new_i32();
5122 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
5125 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
5128 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
5130 default: /* Avoid compiler warnings. */
5134 tmp2 = neon_load_reg(rd, pass);
5135 tcg_gen_deposit_i32(tmp, tmp2, tmp,
5136 shift, size ? 16 : 8);
5137 tcg_temp_free_i32(tmp2);
5139 neon_store_reg(rd, pass, tmp);
5140 } else { /* Store */
5141 tmp = neon_load_reg(rd, pass);
5143 tcg_gen_shri_i32(tmp, tmp, shift);
5146 gen_aa32_st8(s, tmp, addr, get_mem_index(s));
5149 gen_aa32_st16(s, tmp, addr, get_mem_index(s));
5152 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
5155 tcg_temp_free_i32(tmp);
5158 tcg_gen_addi_i32(addr, addr, 1 << size);
5160 tcg_temp_free_i32(addr);
5161 stride = nregs * (1 << size);
5167 base = load_reg(s, rn);
5169 tcg_gen_addi_i32(base, base, stride);
5172 index = load_reg(s, rm);
5173 tcg_gen_add_i32(base, base, index);
5174 tcg_temp_free_i32(index);
5176 store_reg(s, rn, base);
5181 /* Bitwise select. dest = c ? t : f. Clobbers T and F. */
5182 static void gen_neon_bsl(TCGv_i32 dest, TCGv_i32 t, TCGv_i32 f, TCGv_i32 c)
5184 tcg_gen_and_i32(t, t, c);
5185 tcg_gen_andc_i32(f, f, c);
5186 tcg_gen_or_i32(dest, t, f);
5189 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
5192 case 0: gen_helper_neon_narrow_u8(dest, src); break;
5193 case 1: gen_helper_neon_narrow_u16(dest, src); break;
5194 case 2: tcg_gen_extrl_i64_i32(dest, src); break;
5199 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
5202 case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
5203 case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
5204 case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
5209 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
5212 case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
5213 case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
5214 case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
5219 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
5222 case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
5223 case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
5224 case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
5229 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
5235 case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
5236 case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
5241 case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
5242 case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
5249 case 1: gen_helper_neon_shl_u16(var, var, shift); break;
5250 case 2: gen_helper_neon_shl_u32(var, var, shift); break;
5255 case 1: gen_helper_neon_shl_s16(var, var, shift); break;
5256 case 2: gen_helper_neon_shl_s32(var, var, shift); break;
5263 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
5267 case 0: gen_helper_neon_widen_u8(dest, src); break;
5268 case 1: gen_helper_neon_widen_u16(dest, src); break;
5269 case 2: tcg_gen_extu_i32_i64(dest, src); break;
5274 case 0: gen_helper_neon_widen_s8(dest, src); break;
5275 case 1: gen_helper_neon_widen_s16(dest, src); break;
5276 case 2: tcg_gen_ext_i32_i64(dest, src); break;
5280 tcg_temp_free_i32(src);
5283 static inline void gen_neon_addl(int size)
5286 case 0: gen_helper_neon_addl_u16(CPU_V001); break;
5287 case 1: gen_helper_neon_addl_u32(CPU_V001); break;
5288 case 2: tcg_gen_add_i64(CPU_V001); break;
5293 static inline void gen_neon_subl(int size)
5296 case 0: gen_helper_neon_subl_u16(CPU_V001); break;
5297 case 1: gen_helper_neon_subl_u32(CPU_V001); break;
5298 case 2: tcg_gen_sub_i64(CPU_V001); break;
5303 static inline void gen_neon_negl(TCGv_i64 var, int size)
5306 case 0: gen_helper_neon_negl_u16(var, var); break;
5307 case 1: gen_helper_neon_negl_u32(var, var); break;
5309 tcg_gen_neg_i64(var, var);
5315 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
5318 case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
5319 case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
5324 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
5329 switch ((size << 1) | u) {
5330 case 0: gen_helper_neon_mull_s8(dest, a, b); break;
5331 case 1: gen_helper_neon_mull_u8(dest, a, b); break;
5332 case 2: gen_helper_neon_mull_s16(dest, a, b); break;
5333 case 3: gen_helper_neon_mull_u16(dest, a, b); break;
5335 tmp = gen_muls_i64_i32(a, b);
5336 tcg_gen_mov_i64(dest, tmp);
5337 tcg_temp_free_i64(tmp);
5340 tmp = gen_mulu_i64_i32(a, b);
5341 tcg_gen_mov_i64(dest, tmp);
5342 tcg_temp_free_i64(tmp);
5347 /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
5348 Don't forget to clean them now. */
5350 tcg_temp_free_i32(a);
5351 tcg_temp_free_i32(b);
5355 static void gen_neon_narrow_op(int op, int u, int size,
5356 TCGv_i32 dest, TCGv_i64 src)
5360 gen_neon_unarrow_sats(size, dest, src);
5362 gen_neon_narrow(size, dest, src);
5366 gen_neon_narrow_satu(size, dest, src);
5368 gen_neon_narrow_sats(size, dest, src);
5373 /* Symbolic constants for op fields for Neon 3-register same-length.
5374 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
5377 #define NEON_3R_VHADD 0
5378 #define NEON_3R_VQADD 1
5379 #define NEON_3R_VRHADD 2
5380 #define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
5381 #define NEON_3R_VHSUB 4
5382 #define NEON_3R_VQSUB 5
5383 #define NEON_3R_VCGT 6
5384 #define NEON_3R_VCGE 7
5385 #define NEON_3R_VSHL 8
5386 #define NEON_3R_VQSHL 9
5387 #define NEON_3R_VRSHL 10
5388 #define NEON_3R_VQRSHL 11
5389 #define NEON_3R_VMAX 12
5390 #define NEON_3R_VMIN 13
5391 #define NEON_3R_VABD 14
5392 #define NEON_3R_VABA 15
5393 #define NEON_3R_VADD_VSUB 16
5394 #define NEON_3R_VTST_VCEQ 17
5395 #define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
5396 #define NEON_3R_VMUL 19
5397 #define NEON_3R_VPMAX 20
5398 #define NEON_3R_VPMIN 21
5399 #define NEON_3R_VQDMULH_VQRDMULH 22
5400 #define NEON_3R_VPADD_VQRDMLAH 23
5401 #define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
5402 #define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
5403 #define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
5404 #define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
5405 #define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
5406 #define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
5407 #define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
5408 #define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
5410 static const uint8_t neon_3r_sizes[] = {
5411 [NEON_3R_VHADD] = 0x7,
5412 [NEON_3R_VQADD] = 0xf,
5413 [NEON_3R_VRHADD] = 0x7,
5414 [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
5415 [NEON_3R_VHSUB] = 0x7,
5416 [NEON_3R_VQSUB] = 0xf,
5417 [NEON_3R_VCGT] = 0x7,
5418 [NEON_3R_VCGE] = 0x7,
5419 [NEON_3R_VSHL] = 0xf,
5420 [NEON_3R_VQSHL] = 0xf,
5421 [NEON_3R_VRSHL] = 0xf,
5422 [NEON_3R_VQRSHL] = 0xf,
5423 [NEON_3R_VMAX] = 0x7,
5424 [NEON_3R_VMIN] = 0x7,
5425 [NEON_3R_VABD] = 0x7,
5426 [NEON_3R_VABA] = 0x7,
5427 [NEON_3R_VADD_VSUB] = 0xf,
5428 [NEON_3R_VTST_VCEQ] = 0x7,
5429 [NEON_3R_VML] = 0x7,
5430 [NEON_3R_VMUL] = 0x7,
5431 [NEON_3R_VPMAX] = 0x7,
5432 [NEON_3R_VPMIN] = 0x7,
5433 [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
5434 [NEON_3R_VPADD_VQRDMLAH] = 0x7,
5435 [NEON_3R_SHA] = 0xf, /* size field encodes op type */
5436 [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
5437 [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
5438 [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
5439 [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
5440 [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
5441 [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
5442 [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
5445 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
5446 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
5449 #define NEON_2RM_VREV64 0
5450 #define NEON_2RM_VREV32 1
5451 #define NEON_2RM_VREV16 2
5452 #define NEON_2RM_VPADDL 4
5453 #define NEON_2RM_VPADDL_U 5
5454 #define NEON_2RM_AESE 6 /* Includes AESD */
5455 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
5456 #define NEON_2RM_VCLS 8
5457 #define NEON_2RM_VCLZ 9
5458 #define NEON_2RM_VCNT 10
5459 #define NEON_2RM_VMVN 11
5460 #define NEON_2RM_VPADAL 12
5461 #define NEON_2RM_VPADAL_U 13
5462 #define NEON_2RM_VQABS 14
5463 #define NEON_2RM_VQNEG 15
5464 #define NEON_2RM_VCGT0 16
5465 #define NEON_2RM_VCGE0 17
5466 #define NEON_2RM_VCEQ0 18
5467 #define NEON_2RM_VCLE0 19
5468 #define NEON_2RM_VCLT0 20
5469 #define NEON_2RM_SHA1H 21
5470 #define NEON_2RM_VABS 22
5471 #define NEON_2RM_VNEG 23
5472 #define NEON_2RM_VCGT0_F 24
5473 #define NEON_2RM_VCGE0_F 25
5474 #define NEON_2RM_VCEQ0_F 26
5475 #define NEON_2RM_VCLE0_F 27
5476 #define NEON_2RM_VCLT0_F 28
5477 #define NEON_2RM_VABS_F 30
5478 #define NEON_2RM_VNEG_F 31
5479 #define NEON_2RM_VSWP 32
5480 #define NEON_2RM_VTRN 33
5481 #define NEON_2RM_VUZP 34
5482 #define NEON_2RM_VZIP 35
5483 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
5484 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
5485 #define NEON_2RM_VSHLL 38
5486 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
5487 #define NEON_2RM_VRINTN 40
5488 #define NEON_2RM_VRINTX 41
5489 #define NEON_2RM_VRINTA 42
5490 #define NEON_2RM_VRINTZ 43
5491 #define NEON_2RM_VCVT_F16_F32 44
5492 #define NEON_2RM_VRINTM 45
5493 #define NEON_2RM_VCVT_F32_F16 46
5494 #define NEON_2RM_VRINTP 47
5495 #define NEON_2RM_VCVTAU 48
5496 #define NEON_2RM_VCVTAS 49
5497 #define NEON_2RM_VCVTNU 50
5498 #define NEON_2RM_VCVTNS 51
5499 #define NEON_2RM_VCVTPU 52
5500 #define NEON_2RM_VCVTPS 53
5501 #define NEON_2RM_VCVTMU 54
5502 #define NEON_2RM_VCVTMS 55
5503 #define NEON_2RM_VRECPE 56
5504 #define NEON_2RM_VRSQRTE 57
5505 #define NEON_2RM_VRECPE_F 58
5506 #define NEON_2RM_VRSQRTE_F 59
5507 #define NEON_2RM_VCVT_FS 60
5508 #define NEON_2RM_VCVT_FU 61
5509 #define NEON_2RM_VCVT_SF 62
5510 #define NEON_2RM_VCVT_UF 63
5512 static int neon_2rm_is_float_op(int op)
5514 /* Return true if this neon 2reg-misc op is float-to-float */
5515 return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
5516 (op >= NEON_2RM_VRINTN && op <= NEON_2RM_VRINTZ) ||
5517 op == NEON_2RM_VRINTM ||
5518 (op >= NEON_2RM_VRINTP && op <= NEON_2RM_VCVTMS) ||
5519 op >= NEON_2RM_VRECPE_F);
5522 static bool neon_2rm_is_v8_op(int op)
5524 /* Return true if this neon 2reg-misc op is ARMv8 and up */
5526 case NEON_2RM_VRINTN:
5527 case NEON_2RM_VRINTA:
5528 case NEON_2RM_VRINTM:
5529 case NEON_2RM_VRINTP:
5530 case NEON_2RM_VRINTZ:
5531 case NEON_2RM_VRINTX:
5532 case NEON_2RM_VCVTAU:
5533 case NEON_2RM_VCVTAS:
5534 case NEON_2RM_VCVTNU:
5535 case NEON_2RM_VCVTNS:
5536 case NEON_2RM_VCVTPU:
5537 case NEON_2RM_VCVTPS:
5538 case NEON_2RM_VCVTMU:
5539 case NEON_2RM_VCVTMS:
5546 /* Each entry in this array has bit n set if the insn allows
5547 * size value n (otherwise it will UNDEF). Since unallocated
5548 * op values will have no bits set they always UNDEF.
5550 static const uint8_t neon_2rm_sizes[] = {
5551 [NEON_2RM_VREV64] = 0x7,
5552 [NEON_2RM_VREV32] = 0x3,
5553 [NEON_2RM_VREV16] = 0x1,
5554 [NEON_2RM_VPADDL] = 0x7,
5555 [NEON_2RM_VPADDL_U] = 0x7,
5556 [NEON_2RM_AESE] = 0x1,
5557 [NEON_2RM_AESMC] = 0x1,
5558 [NEON_2RM_VCLS] = 0x7,
5559 [NEON_2RM_VCLZ] = 0x7,
5560 [NEON_2RM_VCNT] = 0x1,
5561 [NEON_2RM_VMVN] = 0x1,
5562 [NEON_2RM_VPADAL] = 0x7,
5563 [NEON_2RM_VPADAL_U] = 0x7,
5564 [NEON_2RM_VQABS] = 0x7,
5565 [NEON_2RM_VQNEG] = 0x7,
5566 [NEON_2RM_VCGT0] = 0x7,
5567 [NEON_2RM_VCGE0] = 0x7,
5568 [NEON_2RM_VCEQ0] = 0x7,
5569 [NEON_2RM_VCLE0] = 0x7,
5570 [NEON_2RM_VCLT0] = 0x7,
5571 [NEON_2RM_SHA1H] = 0x4,
5572 [NEON_2RM_VABS] = 0x7,
5573 [NEON_2RM_VNEG] = 0x7,
5574 [NEON_2RM_VCGT0_F] = 0x4,
5575 [NEON_2RM_VCGE0_F] = 0x4,
5576 [NEON_2RM_VCEQ0_F] = 0x4,
5577 [NEON_2RM_VCLE0_F] = 0x4,
5578 [NEON_2RM_VCLT0_F] = 0x4,
5579 [NEON_2RM_VABS_F] = 0x4,
5580 [NEON_2RM_VNEG_F] = 0x4,
5581 [NEON_2RM_VSWP] = 0x1,
5582 [NEON_2RM_VTRN] = 0x7,
5583 [NEON_2RM_VUZP] = 0x7,
5584 [NEON_2RM_VZIP] = 0x7,
5585 [NEON_2RM_VMOVN] = 0x7,
5586 [NEON_2RM_VQMOVN] = 0x7,
5587 [NEON_2RM_VSHLL] = 0x7,
5588 [NEON_2RM_SHA1SU1] = 0x4,
5589 [NEON_2RM_VRINTN] = 0x4,
5590 [NEON_2RM_VRINTX] = 0x4,
5591 [NEON_2RM_VRINTA] = 0x4,
5592 [NEON_2RM_VRINTZ] = 0x4,
5593 [NEON_2RM_VCVT_F16_F32] = 0x2,
5594 [NEON_2RM_VRINTM] = 0x4,
5595 [NEON_2RM_VCVT_F32_F16] = 0x2,
5596 [NEON_2RM_VRINTP] = 0x4,
5597 [NEON_2RM_VCVTAU] = 0x4,
5598 [NEON_2RM_VCVTAS] = 0x4,
5599 [NEON_2RM_VCVTNU] = 0x4,
5600 [NEON_2RM_VCVTNS] = 0x4,
5601 [NEON_2RM_VCVTPU] = 0x4,
5602 [NEON_2RM_VCVTPS] = 0x4,
5603 [NEON_2RM_VCVTMU] = 0x4,
5604 [NEON_2RM_VCVTMS] = 0x4,
5605 [NEON_2RM_VRECPE] = 0x4,
5606 [NEON_2RM_VRSQRTE] = 0x4,
5607 [NEON_2RM_VRECPE_F] = 0x4,
5608 [NEON_2RM_VRSQRTE_F] = 0x4,
5609 [NEON_2RM_VCVT_FS] = 0x4,
5610 [NEON_2RM_VCVT_FU] = 0x4,
5611 [NEON_2RM_VCVT_SF] = 0x4,
5612 [NEON_2RM_VCVT_UF] = 0x4,
5616 /* Expand v8.1 simd helper. */
5617 static int do_v81_helper(DisasContext *s, gen_helper_gvec_3_ptr *fn,
5618 int q, int rd, int rn, int rm)
5620 if (arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
5621 int opr_sz = (1 + q) * 8;
5622 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
5623 vfp_reg_offset(1, rn),
5624 vfp_reg_offset(1, rm), cpu_env,
5625 opr_sz, opr_sz, 0, fn);
5631 /* Translate a NEON data processing instruction. Return nonzero if the
5632 instruction is invalid.
5633 We process data in a mixture of 32-bit and 64-bit chunks.
5634 Mostly we use 32-bit chunks so we can use normal scalar instructions. */
5636 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5648 TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5649 TCGv_ptr ptr1, ptr2, ptr3;
5652 /* FIXME: this access check should not take precedence over UNDEF
5653 * for invalid encodings; we will generate incorrect syndrome information
5654 * for attempts to execute invalid vfp/neon encodings with FP disabled.
5656 if (s->fp_excp_el) {
5657 gen_exception_insn(s, 4, EXCP_UDEF,
5658 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
5662 if (!s->vfp_enabled)
5664 q = (insn & (1 << 6)) != 0;
5665 u = (insn >> 24) & 1;
5666 VFP_DREG_D(rd, insn);
5667 VFP_DREG_N(rn, insn);
5668 VFP_DREG_M(rm, insn);
5669 size = (insn >> 20) & 3;
5670 if ((insn & (1 << 23)) == 0) {
5671 /* Three register same length. */
5672 op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
5673 /* Catch invalid op and bad size combinations: UNDEF */
5674 if ((neon_3r_sizes[op] & (1 << size)) == 0) {
5677 /* All insns of this form UNDEF for either this condition or the
5678 * superset of cases "Q==1"; we catch the latter later.
5680 if (q && ((rd | rn | rm) & 1)) {
5685 /* The SHA-1/SHA-256 3-register instructions require special
5686 * treatment here, as their size field is overloaded as an
5687 * op type selector, and they all consume their input in a
5693 if (!u) { /* SHA-1 */
5694 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
5697 ptr1 = vfp_reg_ptr(true, rd);
5698 ptr2 = vfp_reg_ptr(true, rn);
5699 ptr3 = vfp_reg_ptr(true, rm);
5700 tmp4 = tcg_const_i32(size);
5701 gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
5702 tcg_temp_free_i32(tmp4);
5703 } else { /* SHA-256 */
5704 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256) || size == 3) {
5707 ptr1 = vfp_reg_ptr(true, rd);
5708 ptr2 = vfp_reg_ptr(true, rn);
5709 ptr3 = vfp_reg_ptr(true, rm);
5712 gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
5715 gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
5718 gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
5722 tcg_temp_free_ptr(ptr1);
5723 tcg_temp_free_ptr(ptr2);
5724 tcg_temp_free_ptr(ptr3);
5727 case NEON_3R_VPADD_VQRDMLAH:
5734 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
5737 return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
5742 case NEON_3R_VFM_VQRDMLSH:
5753 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
5756 return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
5761 if (size == 3 && op != NEON_3R_LOGIC) {
5762 /* 64-bit element instructions. */
5763 for (pass = 0; pass < (q ? 2 : 1); pass++) {
5764 neon_load_reg64(cpu_V0, rn + pass);
5765 neon_load_reg64(cpu_V1, rm + pass);
5769 gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
5772 gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
5778 gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
5781 gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
5787 gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
5789 gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
5794 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5797 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5803 gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
5805 gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
5808 case NEON_3R_VQRSHL:
5810 gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
5813 gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
5817 case NEON_3R_VADD_VSUB:
5819 tcg_gen_sub_i64(CPU_V001);
5821 tcg_gen_add_i64(CPU_V001);
5827 neon_store_reg64(cpu_V0, rd + pass);
5836 case NEON_3R_VQRSHL:
5839 /* Shift instruction operands are reversed. */
5845 case NEON_3R_VPADD_VQRDMLAH:
5850 case NEON_3R_FLOAT_ARITH:
5851 pairwise = (u && size < 2); /* if VPADD (float) */
5853 case NEON_3R_FLOAT_MINMAX:
5854 pairwise = u; /* if VPMIN/VPMAX (float) */
5856 case NEON_3R_FLOAT_CMP:
5858 /* no encoding for U=0 C=1x */
5862 case NEON_3R_FLOAT_ACMP:
5867 case NEON_3R_FLOAT_MISC:
5868 /* VMAXNM/VMINNM in ARMv8 */
5869 if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
5874 if (u && (size != 0)) {
5875 /* UNDEF on invalid size for polynomial subcase */
5879 case NEON_3R_VFM_VQRDMLSH:
5880 if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
5888 if (pairwise && q) {
5889 /* All the pairwise insns UNDEF if Q is set */
5893 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5898 tmp = neon_load_reg(rn, 0);
5899 tmp2 = neon_load_reg(rn, 1);
5901 tmp = neon_load_reg(rm, 0);
5902 tmp2 = neon_load_reg(rm, 1);
5906 tmp = neon_load_reg(rn, pass);
5907 tmp2 = neon_load_reg(rm, pass);
5911 GEN_NEON_INTEGER_OP(hadd);
5914 GEN_NEON_INTEGER_OP_ENV(qadd);
5916 case NEON_3R_VRHADD:
5917 GEN_NEON_INTEGER_OP(rhadd);
5919 case NEON_3R_LOGIC: /* Logic ops. */
5920 switch ((u << 2) | size) {
5922 tcg_gen_and_i32(tmp, tmp, tmp2);
5925 tcg_gen_andc_i32(tmp, tmp, tmp2);
5928 tcg_gen_or_i32(tmp, tmp, tmp2);
5931 tcg_gen_orc_i32(tmp, tmp, tmp2);
5934 tcg_gen_xor_i32(tmp, tmp, tmp2);
5937 tmp3 = neon_load_reg(rd, pass);
5938 gen_neon_bsl(tmp, tmp, tmp2, tmp3);
5939 tcg_temp_free_i32(tmp3);
5942 tmp3 = neon_load_reg(rd, pass);
5943 gen_neon_bsl(tmp, tmp, tmp3, tmp2);
5944 tcg_temp_free_i32(tmp3);
5947 tmp3 = neon_load_reg(rd, pass);
5948 gen_neon_bsl(tmp, tmp3, tmp, tmp2);
5949 tcg_temp_free_i32(tmp3);
5954 GEN_NEON_INTEGER_OP(hsub);
5957 GEN_NEON_INTEGER_OP_ENV(qsub);
5960 GEN_NEON_INTEGER_OP(cgt);
5963 GEN_NEON_INTEGER_OP(cge);
5966 GEN_NEON_INTEGER_OP(shl);
5969 GEN_NEON_INTEGER_OP_ENV(qshl);
5972 GEN_NEON_INTEGER_OP(rshl);
5974 case NEON_3R_VQRSHL:
5975 GEN_NEON_INTEGER_OP_ENV(qrshl);
5978 GEN_NEON_INTEGER_OP(max);
5981 GEN_NEON_INTEGER_OP(min);
5984 GEN_NEON_INTEGER_OP(abd);
5987 GEN_NEON_INTEGER_OP(abd);
5988 tcg_temp_free_i32(tmp2);
5989 tmp2 = neon_load_reg(rd, pass);
5990 gen_neon_add(size, tmp, tmp2);
5992 case NEON_3R_VADD_VSUB:
5993 if (!u) { /* VADD */
5994 gen_neon_add(size, tmp, tmp2);
5997 case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
5998 case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
5999 case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
6004 case NEON_3R_VTST_VCEQ:
6005 if (!u) { /* VTST */
6007 case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
6008 case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
6009 case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
6014 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
6015 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
6016 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
6021 case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
6023 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6024 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6025 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6028 tcg_temp_free_i32(tmp2);
6029 tmp2 = neon_load_reg(rd, pass);
6031 gen_neon_rsb(size, tmp, tmp2);
6033 gen_neon_add(size, tmp, tmp2);
6037 if (u) { /* polynomial */
6038 gen_helper_neon_mul_p8(tmp, tmp, tmp2);
6039 } else { /* Integer */
6041 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6042 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6043 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6049 GEN_NEON_INTEGER_OP(pmax);
6052 GEN_NEON_INTEGER_OP(pmin);
6054 case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high. */
6055 if (!u) { /* VQDMULH */
6058 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6061 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6065 } else { /* VQRDMULH */
6068 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6071 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6077 case NEON_3R_VPADD_VQRDMLAH:
6079 case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
6080 case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
6081 case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
6085 case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
6087 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6088 switch ((u << 2) | size) {
6091 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6094 gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
6097 gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
6102 tcg_temp_free_ptr(fpstatus);
6105 case NEON_3R_FLOAT_MULTIPLY:
6107 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6108 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6110 tcg_temp_free_i32(tmp2);
6111 tmp2 = neon_load_reg(rd, pass);
6113 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6115 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6118 tcg_temp_free_ptr(fpstatus);
6121 case NEON_3R_FLOAT_CMP:
6123 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6125 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6128 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6130 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6133 tcg_temp_free_ptr(fpstatus);
6136 case NEON_3R_FLOAT_ACMP:
6138 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6140 gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
6142 gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
6144 tcg_temp_free_ptr(fpstatus);
6147 case NEON_3R_FLOAT_MINMAX:
6149 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6151 gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
6153 gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
6155 tcg_temp_free_ptr(fpstatus);
6158 case NEON_3R_FLOAT_MISC:
6161 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6163 gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
6165 gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
6167 tcg_temp_free_ptr(fpstatus);
6170 gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
6172 gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
6176 case NEON_3R_VFM_VQRDMLSH:
6178 /* VFMA, VFMS: fused multiply-add */
6179 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6180 TCGv_i32 tmp3 = neon_load_reg(rd, pass);
6183 gen_helper_vfp_negs(tmp, tmp);
6185 gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
6186 tcg_temp_free_i32(tmp3);
6187 tcg_temp_free_ptr(fpstatus);
6193 tcg_temp_free_i32(tmp2);
6195 /* Save the result. For elementwise operations we can put it
6196 straight into the destination register. For pairwise operations
6197 we have to be careful to avoid clobbering the source operands. */
6198 if (pairwise && rd == rm) {
6199 neon_store_scratch(pass, tmp);
6201 neon_store_reg(rd, pass, tmp);
6205 if (pairwise && rd == rm) {
6206 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6207 tmp = neon_load_scratch(pass);
6208 neon_store_reg(rd, pass, tmp);
6211 /* End of 3 register same size operations. */
6212 } else if (insn & (1 << 4)) {
6213 if ((insn & 0x00380080) != 0) {
6214 /* Two registers and shift. */
6215 op = (insn >> 8) & 0xf;
6216 if (insn & (1 << 7)) {
6224 while ((insn & (1 << (size + 19))) == 0)
6227 shift = (insn >> 16) & ((1 << (3 + size)) - 1);
6228 /* To avoid excessive duplication of ops we implement shift
6229 by immediate using the variable shift operations. */
6231 /* Shift by immediate:
6232 VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. */
6233 if (q && ((rd | rm) & 1)) {
6236 if (!u && (op == 4 || op == 6)) {
6239 /* Right shifts are encoded as N - shift, where N is the
6240 element size in bits. */
6242 shift = shift - (1 << (size + 3));
6250 imm = (uint8_t) shift;
6255 imm = (uint16_t) shift;
6266 for (pass = 0; pass < count; pass++) {
6268 neon_load_reg64(cpu_V0, rm + pass);
6269 tcg_gen_movi_i64(cpu_V1, imm);
6274 gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
6276 gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
6281 gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
6283 gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
6286 case 5: /* VSHL, VSLI */
6287 gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
6289 case 6: /* VQSHLU */
6290 gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
6295 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
6298 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
6303 if (op == 1 || op == 3) {
6305 neon_load_reg64(cpu_V1, rd + pass);
6306 tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
6307 } else if (op == 4 || (op == 5 && u)) {
6309 neon_load_reg64(cpu_V1, rd + pass);
6311 if (shift < -63 || shift > 63) {
6315 mask = 0xffffffffffffffffull >> -shift;
6317 mask = 0xffffffffffffffffull << shift;
6320 tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
6321 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6323 neon_store_reg64(cpu_V0, rd + pass);
6324 } else { /* size < 3 */
6325 /* Operands in T0 and T1. */
6326 tmp = neon_load_reg(rm, pass);
6327 tmp2 = tcg_temp_new_i32();
6328 tcg_gen_movi_i32(tmp2, imm);
6332 GEN_NEON_INTEGER_OP(shl);
6336 GEN_NEON_INTEGER_OP(rshl);
6339 case 5: /* VSHL, VSLI */
6341 case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
6342 case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
6343 case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
6347 case 6: /* VQSHLU */
6350 gen_helper_neon_qshlu_s8(tmp, cpu_env,
6354 gen_helper_neon_qshlu_s16(tmp, cpu_env,
6358 gen_helper_neon_qshlu_s32(tmp, cpu_env,
6366 GEN_NEON_INTEGER_OP_ENV(qshl);
6369 tcg_temp_free_i32(tmp2);
6371 if (op == 1 || op == 3) {
6373 tmp2 = neon_load_reg(rd, pass);
6374 gen_neon_add(size, tmp, tmp2);
6375 tcg_temp_free_i32(tmp2);
6376 } else if (op == 4 || (op == 5 && u)) {
6381 mask = 0xff >> -shift;
6383 mask = (uint8_t)(0xff << shift);
6389 mask = 0xffff >> -shift;
6391 mask = (uint16_t)(0xffff << shift);
6395 if (shift < -31 || shift > 31) {
6399 mask = 0xffffffffu >> -shift;
6401 mask = 0xffffffffu << shift;
6407 tmp2 = neon_load_reg(rd, pass);
6408 tcg_gen_andi_i32(tmp, tmp, mask);
6409 tcg_gen_andi_i32(tmp2, tmp2, ~mask);
6410 tcg_gen_or_i32(tmp, tmp, tmp2);
6411 tcg_temp_free_i32(tmp2);
6413 neon_store_reg(rd, pass, tmp);
6416 } else if (op < 10) {
6417 /* Shift by immediate and narrow:
6418 VSHRN, VRSHRN, VQSHRN, VQRSHRN. */
6419 int input_unsigned = (op == 8) ? !u : u;
6423 shift = shift - (1 << (size + 3));
6426 tmp64 = tcg_const_i64(shift);
6427 neon_load_reg64(cpu_V0, rm);
6428 neon_load_reg64(cpu_V1, rm + 1);
6429 for (pass = 0; pass < 2; pass++) {
6437 if (input_unsigned) {
6438 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
6440 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
6443 if (input_unsigned) {
6444 gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
6446 gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
6449 tmp = tcg_temp_new_i32();
6450 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
6451 neon_store_reg(rd, pass, tmp);
6453 tcg_temp_free_i64(tmp64);
6456 imm = (uint16_t)shift;
6460 imm = (uint32_t)shift;
6462 tmp2 = tcg_const_i32(imm);
6463 tmp4 = neon_load_reg(rm + 1, 0);
6464 tmp5 = neon_load_reg(rm + 1, 1);
6465 for (pass = 0; pass < 2; pass++) {
6467 tmp = neon_load_reg(rm, 0);
6471 gen_neon_shift_narrow(size, tmp, tmp2, q,
6474 tmp3 = neon_load_reg(rm, 1);
6478 gen_neon_shift_narrow(size, tmp3, tmp2, q,
6480 tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
6481 tcg_temp_free_i32(tmp);
6482 tcg_temp_free_i32(tmp3);
6483 tmp = tcg_temp_new_i32();
6484 gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
6485 neon_store_reg(rd, pass, tmp);
6487 tcg_temp_free_i32(tmp2);
6489 } else if (op == 10) {
6491 if (q || (rd & 1)) {
6494 tmp = neon_load_reg(rm, 0);
6495 tmp2 = neon_load_reg(rm, 1);
6496 for (pass = 0; pass < 2; pass++) {
6500 gen_neon_widen(cpu_V0, tmp, size, u);
6503 /* The shift is less than the width of the source
6504 type, so we can just shift the whole register. */
6505 tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
6506 /* Widen the result of shift: we need to clear
6507 * the potential overflow bits resulting from
6508 * left bits of the narrow input appearing as
6509 * right bits of left the neighbour narrow
6511 if (size < 2 || !u) {
6514 imm = (0xffu >> (8 - shift));
6516 } else if (size == 1) {
6517 imm = 0xffff >> (16 - shift);
6520 imm = 0xffffffff >> (32 - shift);
6523 imm64 = imm | (((uint64_t)imm) << 32);
6527 tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
6530 neon_store_reg64(cpu_V0, rd + pass);
6532 } else if (op >= 14) {
6533 /* VCVT fixed-point. */
6534 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
6537 /* We have already masked out the must-be-1 top bit of imm6,
6538 * hence this 32-shift where the ARM ARM has 64-imm6.
6541 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6542 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
6545 gen_vfp_ulto(0, shift, 1);
6547 gen_vfp_slto(0, shift, 1);
6550 gen_vfp_toul(0, shift, 1);
6552 gen_vfp_tosl(0, shift, 1);
6554 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
6559 } else { /* (insn & 0x00380080) == 0 */
6561 if (q && (rd & 1)) {
6565 op = (insn >> 8) & 0xf;
6566 /* One register and immediate. */
6567 imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
6568 invert = (insn & (1 << 5)) != 0;
6569 /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
6570 * We choose to not special-case this and will behave as if a
6571 * valid constant encoding of 0 had been given.
6590 imm = (imm << 8) | (imm << 24);
6593 imm = (imm << 8) | 0xff;
6596 imm = (imm << 16) | 0xffff;
6599 imm |= (imm << 8) | (imm << 16) | (imm << 24);
6607 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
6608 | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
6614 for (pass = 0; pass < (q ? 4 : 2); pass++) {
6615 if (op & 1 && op < 12) {
6616 tmp = neon_load_reg(rd, pass);
6618 /* The immediate value has already been inverted, so
6620 tcg_gen_andi_i32(tmp, tmp, imm);
6622 tcg_gen_ori_i32(tmp, tmp, imm);
6626 tmp = tcg_temp_new_i32();
6627 if (op == 14 && invert) {
6631 for (n = 0; n < 4; n++) {
6632 if (imm & (1 << (n + (pass & 1) * 4)))
6633 val |= 0xff << (n * 8);
6635 tcg_gen_movi_i32(tmp, val);
6637 tcg_gen_movi_i32(tmp, imm);
6640 neon_store_reg(rd, pass, tmp);
6643 } else { /* (insn & 0x00800010 == 0x00800000) */
6645 op = (insn >> 8) & 0xf;
6646 if ((insn & (1 << 6)) == 0) {
6647 /* Three registers of different lengths. */
6651 /* undefreq: bit 0 : UNDEF if size == 0
6652 * bit 1 : UNDEF if size == 1
6653 * bit 2 : UNDEF if size == 2
6654 * bit 3 : UNDEF if U == 1
6655 * Note that [2:0] set implies 'always UNDEF'
6658 /* prewiden, src1_wide, src2_wide, undefreq */
6659 static const int neon_3reg_wide[16][4] = {
6660 {1, 0, 0, 0}, /* VADDL */
6661 {1, 1, 0, 0}, /* VADDW */
6662 {1, 0, 0, 0}, /* VSUBL */
6663 {1, 1, 0, 0}, /* VSUBW */
6664 {0, 1, 1, 0}, /* VADDHN */
6665 {0, 0, 0, 0}, /* VABAL */
6666 {0, 1, 1, 0}, /* VSUBHN */
6667 {0, 0, 0, 0}, /* VABDL */
6668 {0, 0, 0, 0}, /* VMLAL */
6669 {0, 0, 0, 9}, /* VQDMLAL */
6670 {0, 0, 0, 0}, /* VMLSL */
6671 {0, 0, 0, 9}, /* VQDMLSL */
6672 {0, 0, 0, 0}, /* Integer VMULL */
6673 {0, 0, 0, 1}, /* VQDMULL */
6674 {0, 0, 0, 0xa}, /* Polynomial VMULL */
6675 {0, 0, 0, 7}, /* Reserved: always UNDEF */
6678 prewiden = neon_3reg_wide[op][0];
6679 src1_wide = neon_3reg_wide[op][1];
6680 src2_wide = neon_3reg_wide[op][2];
6681 undefreq = neon_3reg_wide[op][3];
6683 if ((undefreq & (1 << size)) ||
6684 ((undefreq & 8) && u)) {
6687 if ((src1_wide && (rn & 1)) ||
6688 (src2_wide && (rm & 1)) ||
6689 (!src2_wide && (rd & 1))) {
6693 /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
6694 * outside the loop below as it only performs a single pass.
6696 if (op == 14 && size == 2) {
6697 TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
6699 if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
6702 tcg_rn = tcg_temp_new_i64();
6703 tcg_rm = tcg_temp_new_i64();
6704 tcg_rd = tcg_temp_new_i64();
6705 neon_load_reg64(tcg_rn, rn);
6706 neon_load_reg64(tcg_rm, rm);
6707 gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
6708 neon_store_reg64(tcg_rd, rd);
6709 gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
6710 neon_store_reg64(tcg_rd, rd + 1);
6711 tcg_temp_free_i64(tcg_rn);
6712 tcg_temp_free_i64(tcg_rm);
6713 tcg_temp_free_i64(tcg_rd);
6717 /* Avoid overlapping operands. Wide source operands are
6718 always aligned so will never overlap with wide
6719 destinations in problematic ways. */
6720 if (rd == rm && !src2_wide) {
6721 tmp = neon_load_reg(rm, 1);
6722 neon_store_scratch(2, tmp);
6723 } else if (rd == rn && !src1_wide) {
6724 tmp = neon_load_reg(rn, 1);
6725 neon_store_scratch(2, tmp);
6728 for (pass = 0; pass < 2; pass++) {
6730 neon_load_reg64(cpu_V0, rn + pass);
6733 if (pass == 1 && rd == rn) {
6734 tmp = neon_load_scratch(2);
6736 tmp = neon_load_reg(rn, pass);
6739 gen_neon_widen(cpu_V0, tmp, size, u);
6743 neon_load_reg64(cpu_V1, rm + pass);
6746 if (pass == 1 && rd == rm) {
6747 tmp2 = neon_load_scratch(2);
6749 tmp2 = neon_load_reg(rm, pass);
6752 gen_neon_widen(cpu_V1, tmp2, size, u);
6756 case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
6757 gen_neon_addl(size);
6759 case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
6760 gen_neon_subl(size);
6762 case 5: case 7: /* VABAL, VABDL */
6763 switch ((size << 1) | u) {
6765 gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
6768 gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
6771 gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
6774 gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
6777 gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
6780 gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
6784 tcg_temp_free_i32(tmp2);
6785 tcg_temp_free_i32(tmp);
6787 case 8: case 9: case 10: case 11: case 12: case 13:
6788 /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
6789 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6791 case 14: /* Polynomial VMULL */
6792 gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
6793 tcg_temp_free_i32(tmp2);
6794 tcg_temp_free_i32(tmp);
6796 default: /* 15 is RESERVED: caught earlier */
6801 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6802 neon_store_reg64(cpu_V0, rd + pass);
6803 } else if (op == 5 || (op >= 8 && op <= 11)) {
6805 neon_load_reg64(cpu_V1, rd + pass);
6807 case 10: /* VMLSL */
6808 gen_neon_negl(cpu_V0, size);
6810 case 5: case 8: /* VABAL, VMLAL */
6811 gen_neon_addl(size);
6813 case 9: case 11: /* VQDMLAL, VQDMLSL */
6814 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6816 gen_neon_negl(cpu_V0, size);
6818 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6823 neon_store_reg64(cpu_V0, rd + pass);
6824 } else if (op == 4 || op == 6) {
6825 /* Narrowing operation. */
6826 tmp = tcg_temp_new_i32();
6830 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
6833 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
6836 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6837 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
6844 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
6847 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
6850 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
6851 tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
6852 tcg_gen_extrl_i64_i32(tmp, cpu_V0);
6860 neon_store_reg(rd, 0, tmp3);
6861 neon_store_reg(rd, 1, tmp);
6864 /* Write back the result. */
6865 neon_store_reg64(cpu_V0, rd + pass);
6869 /* Two registers and a scalar. NB that for ops of this form
6870 * the ARM ARM labels bit 24 as Q, but it is in our variable
6877 case 1: /* Float VMLA scalar */
6878 case 5: /* Floating point VMLS scalar */
6879 case 9: /* Floating point VMUL scalar */
6884 case 0: /* Integer VMLA scalar */
6885 case 4: /* Integer VMLS scalar */
6886 case 8: /* Integer VMUL scalar */
6887 case 12: /* VQDMULH scalar */
6888 case 13: /* VQRDMULH scalar */
6889 if (u && ((rd | rn) & 1)) {
6892 tmp = neon_get_scalar(size, rm);
6893 neon_store_scratch(0, tmp);
6894 for (pass = 0; pass < (u ? 4 : 2); pass++) {
6895 tmp = neon_load_scratch(0);
6896 tmp2 = neon_load_reg(rn, pass);
6899 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
6901 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
6903 } else if (op == 13) {
6905 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
6907 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
6909 } else if (op & 1) {
6910 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6911 gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
6912 tcg_temp_free_ptr(fpstatus);
6915 case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
6916 case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
6917 case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
6921 tcg_temp_free_i32(tmp2);
6924 tmp2 = neon_load_reg(rd, pass);
6927 gen_neon_add(size, tmp, tmp2);
6931 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6932 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
6933 tcg_temp_free_ptr(fpstatus);
6937 gen_neon_rsb(size, tmp, tmp2);
6941 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6942 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6943 tcg_temp_free_ptr(fpstatus);
6949 tcg_temp_free_i32(tmp2);
6951 neon_store_reg(rd, pass, tmp);
6954 case 3: /* VQDMLAL scalar */
6955 case 7: /* VQDMLSL scalar */
6956 case 11: /* VQDMULL scalar */
6961 case 2: /* VMLAL sclar */
6962 case 6: /* VMLSL scalar */
6963 case 10: /* VMULL scalar */
6967 tmp2 = neon_get_scalar(size, rm);
6968 /* We need a copy of tmp2 because gen_neon_mull
6969 * deletes it during pass 0. */
6970 tmp4 = tcg_temp_new_i32();
6971 tcg_gen_mov_i32(tmp4, tmp2);
6972 tmp3 = neon_load_reg(rn, 1);
6974 for (pass = 0; pass < 2; pass++) {
6976 tmp = neon_load_reg(rn, 0);
6981 gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6983 neon_load_reg64(cpu_V1, rd + pass);
6987 gen_neon_negl(cpu_V0, size);
6990 gen_neon_addl(size);
6993 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6995 gen_neon_negl(cpu_V0, size);
6997 gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
7003 gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
7008 neon_store_reg64(cpu_V0, rd + pass);
7011 case 14: /* VQRDMLAH scalar */
7012 case 15: /* VQRDMLSH scalar */
7014 NeonGenThreeOpEnvFn *fn;
7016 if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
7019 if (u && ((rd | rn) & 1)) {
7024 fn = gen_helper_neon_qrdmlah_s16;
7026 fn = gen_helper_neon_qrdmlah_s32;
7030 fn = gen_helper_neon_qrdmlsh_s16;
7032 fn = gen_helper_neon_qrdmlsh_s32;
7036 tmp2 = neon_get_scalar(size, rm);
7037 for (pass = 0; pass < (u ? 4 : 2); pass++) {
7038 tmp = neon_load_reg(rn, pass);
7039 tmp3 = neon_load_reg(rd, pass);
7040 fn(tmp, cpu_env, tmp, tmp2, tmp3);
7041 tcg_temp_free_i32(tmp3);
7042 neon_store_reg(rd, pass, tmp);
7044 tcg_temp_free_i32(tmp2);
7048 g_assert_not_reached();
7051 } else { /* size == 3 */
7054 imm = (insn >> 8) & 0xf;
7059 if (q && ((rd | rn | rm) & 1)) {
7064 neon_load_reg64(cpu_V0, rn);
7066 neon_load_reg64(cpu_V1, rn + 1);
7068 } else if (imm == 8) {
7069 neon_load_reg64(cpu_V0, rn + 1);
7071 neon_load_reg64(cpu_V1, rm);
7074 tmp64 = tcg_temp_new_i64();
7076 neon_load_reg64(cpu_V0, rn);
7077 neon_load_reg64(tmp64, rn + 1);
7079 neon_load_reg64(cpu_V0, rn + 1);
7080 neon_load_reg64(tmp64, rm);
7082 tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
7083 tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
7084 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
7086 neon_load_reg64(cpu_V1, rm);
7088 neon_load_reg64(cpu_V1, rm + 1);
7091 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
7092 tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
7093 tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
7094 tcg_temp_free_i64(tmp64);
7097 neon_load_reg64(cpu_V0, rn);
7098 tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
7099 neon_load_reg64(cpu_V1, rm);
7100 tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
7101 tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
7103 neon_store_reg64(cpu_V0, rd);
7105 neon_store_reg64(cpu_V1, rd + 1);
7107 } else if ((insn & (1 << 11)) == 0) {
7108 /* Two register misc. */
7109 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
7110 size = (insn >> 18) & 3;
7111 /* UNDEF for unknown op values and bad op-size combinations */
7112 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
7115 if (neon_2rm_is_v8_op(op) &&
7116 !arm_dc_feature(s, ARM_FEATURE_V8)) {
7119 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
7120 q && ((rm | rd) & 1)) {
7124 case NEON_2RM_VREV64:
7125 for (pass = 0; pass < (q ? 2 : 1); pass++) {
7126 tmp = neon_load_reg(rm, pass * 2);
7127 tmp2 = neon_load_reg(rm, pass * 2 + 1);
7129 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
7130 case 1: gen_swap_half(tmp); break;
7131 case 2: /* no-op */ break;
7134 neon_store_reg(rd, pass * 2 + 1, tmp);
7136 neon_store_reg(rd, pass * 2, tmp2);
7139 case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
7140 case 1: gen_swap_half(tmp2); break;
7143 neon_store_reg(rd, pass * 2, tmp2);
7147 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
7148 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
7149 for (pass = 0; pass < q + 1; pass++) {
7150 tmp = neon_load_reg(rm, pass * 2);
7151 gen_neon_widen(cpu_V0, tmp, size, op & 1);
7152 tmp = neon_load_reg(rm, pass * 2 + 1);
7153 gen_neon_widen(cpu_V1, tmp, size, op & 1);
7155 case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
7156 case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
7157 case 2: tcg_gen_add_i64(CPU_V001); break;
7160 if (op >= NEON_2RM_VPADAL) {
7162 neon_load_reg64(cpu_V1, rd + pass);
7163 gen_neon_addl(size);
7165 neon_store_reg64(cpu_V0, rd + pass);
7171 for (n = 0; n < (q ? 4 : 2); n += 2) {
7172 tmp = neon_load_reg(rm, n);
7173 tmp2 = neon_load_reg(rd, n + 1);
7174 neon_store_reg(rm, n, tmp2);
7175 neon_store_reg(rd, n + 1, tmp);
7182 if (gen_neon_unzip(rd, rm, size, q)) {
7187 if (gen_neon_zip(rd, rm, size, q)) {
7191 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
7192 /* also VQMOVUN; op field and mnemonics don't line up */
7197 for (pass = 0; pass < 2; pass++) {
7198 neon_load_reg64(cpu_V0, rm + pass);
7199 tmp = tcg_temp_new_i32();
7200 gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
7205 neon_store_reg(rd, 0, tmp2);
7206 neon_store_reg(rd, 1, tmp);
7210 case NEON_2RM_VSHLL:
7211 if (q || (rd & 1)) {
7214 tmp = neon_load_reg(rm, 0);
7215 tmp2 = neon_load_reg(rm, 1);
7216 for (pass = 0; pass < 2; pass++) {
7219 gen_neon_widen(cpu_V0, tmp, size, 1);
7220 tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
7221 neon_store_reg64(cpu_V0, rd + pass);
7224 case NEON_2RM_VCVT_F16_F32:
7225 if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
7229 tmp = tcg_temp_new_i32();
7230 tmp2 = tcg_temp_new_i32();
7231 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
7232 gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
7233 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
7234 gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
7235 tcg_gen_shli_i32(tmp2, tmp2, 16);
7236 tcg_gen_or_i32(tmp2, tmp2, tmp);
7237 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
7238 gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
7239 tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
7240 neon_store_reg(rd, 0, tmp2);
7241 tmp2 = tcg_temp_new_i32();
7242 gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
7243 tcg_gen_shli_i32(tmp2, tmp2, 16);
7244 tcg_gen_or_i32(tmp2, tmp2, tmp);
7245 neon_store_reg(rd, 1, tmp2);
7246 tcg_temp_free_i32(tmp);
7248 case NEON_2RM_VCVT_F32_F16:
7249 if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
7253 tmp3 = tcg_temp_new_i32();
7254 tmp = neon_load_reg(rm, 0);
7255 tmp2 = neon_load_reg(rm, 1);
7256 tcg_gen_ext16u_i32(tmp3, tmp);
7257 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
7258 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
7259 tcg_gen_shri_i32(tmp3, tmp, 16);
7260 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
7261 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
7262 tcg_temp_free_i32(tmp);
7263 tcg_gen_ext16u_i32(tmp3, tmp2);
7264 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
7265 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
7266 tcg_gen_shri_i32(tmp3, tmp2, 16);
7267 gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
7268 tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
7269 tcg_temp_free_i32(tmp2);
7270 tcg_temp_free_i32(tmp3);
7272 case NEON_2RM_AESE: case NEON_2RM_AESMC:
7273 if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
7274 || ((rm | rd) & 1)) {
7277 ptr1 = vfp_reg_ptr(true, rd);
7278 ptr2 = vfp_reg_ptr(true, rm);
7280 /* Bit 6 is the lowest opcode bit; it distinguishes between
7281 * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
7283 tmp3 = tcg_const_i32(extract32(insn, 6, 1));
7285 if (op == NEON_2RM_AESE) {
7286 gen_helper_crypto_aese(ptr1, ptr2, tmp3);
7288 gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
7290 tcg_temp_free_ptr(ptr1);
7291 tcg_temp_free_ptr(ptr2);
7292 tcg_temp_free_i32(tmp3);
7294 case NEON_2RM_SHA1H:
7295 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)
7296 || ((rm | rd) & 1)) {
7299 ptr1 = vfp_reg_ptr(true, rd);
7300 ptr2 = vfp_reg_ptr(true, rm);
7302 gen_helper_crypto_sha1h(ptr1, ptr2);
7304 tcg_temp_free_ptr(ptr1);
7305 tcg_temp_free_ptr(ptr2);
7307 case NEON_2RM_SHA1SU1:
7308 if ((rm | rd) & 1) {
7311 /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
7313 if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA256)) {
7316 } else if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA1)) {
7319 ptr1 = vfp_reg_ptr(true, rd);
7320 ptr2 = vfp_reg_ptr(true, rm);
7322 gen_helper_crypto_sha256su0(ptr1, ptr2);
7324 gen_helper_crypto_sha1su1(ptr1, ptr2);
7326 tcg_temp_free_ptr(ptr1);
7327 tcg_temp_free_ptr(ptr2);
7331 for (pass = 0; pass < (q ? 4 : 2); pass++) {
7332 if (neon_2rm_is_float_op(op)) {
7333 tcg_gen_ld_f32(cpu_F0s, cpu_env,
7334 neon_reg_offset(rm, pass));
7337 tmp = neon_load_reg(rm, pass);
7340 case NEON_2RM_VREV32:
7342 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
7343 case 1: gen_swap_half(tmp); break;
7347 case NEON_2RM_VREV16:
7352 case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
7353 case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
7354 case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
7360 case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
7361 case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
7362 case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
7367 gen_helper_neon_cnt_u8(tmp, tmp);
7370 tcg_gen_not_i32(tmp, tmp);
7372 case NEON_2RM_VQABS:
7375 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
7378 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
7381 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
7386 case NEON_2RM_VQNEG:
7389 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
7392 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
7395 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
7400 case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
7401 tmp2 = tcg_const_i32(0);
7403 case 0: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
7404 case 1: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
7405 case 2: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
7408 tcg_temp_free_i32(tmp2);
7409 if (op == NEON_2RM_VCLE0) {
7410 tcg_gen_not_i32(tmp, tmp);
7413 case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
7414 tmp2 = tcg_const_i32(0);
7416 case 0: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
7417 case 1: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
7418 case 2: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
7421 tcg_temp_free_i32(tmp2);
7422 if (op == NEON_2RM_VCLT0) {
7423 tcg_gen_not_i32(tmp, tmp);
7426 case NEON_2RM_VCEQ0:
7427 tmp2 = tcg_const_i32(0);
7429 case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
7430 case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
7431 case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
7434 tcg_temp_free_i32(tmp2);
7438 case 0: gen_helper_neon_abs_s8(tmp, tmp); break;
7439 case 1: gen_helper_neon_abs_s16(tmp, tmp); break;
7440 case 2: tcg_gen_abs_i32(tmp, tmp); break;
7445 tmp2 = tcg_const_i32(0);
7446 gen_neon_rsb(size, tmp, tmp2);
7447 tcg_temp_free_i32(tmp2);
7449 case NEON_2RM_VCGT0_F:
7451 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7452 tmp2 = tcg_const_i32(0);
7453 gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
7454 tcg_temp_free_i32(tmp2);
7455 tcg_temp_free_ptr(fpstatus);
7458 case NEON_2RM_VCGE0_F:
7460 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7461 tmp2 = tcg_const_i32(0);
7462 gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
7463 tcg_temp_free_i32(tmp2);
7464 tcg_temp_free_ptr(fpstatus);
7467 case NEON_2RM_VCEQ0_F:
7469 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7470 tmp2 = tcg_const_i32(0);
7471 gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
7472 tcg_temp_free_i32(tmp2);
7473 tcg_temp_free_ptr(fpstatus);
7476 case NEON_2RM_VCLE0_F:
7478 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7479 tmp2 = tcg_const_i32(0);
7480 gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
7481 tcg_temp_free_i32(tmp2);
7482 tcg_temp_free_ptr(fpstatus);
7485 case NEON_2RM_VCLT0_F:
7487 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7488 tmp2 = tcg_const_i32(0);
7489 gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
7490 tcg_temp_free_i32(tmp2);
7491 tcg_temp_free_ptr(fpstatus);
7494 case NEON_2RM_VABS_F:
7497 case NEON_2RM_VNEG_F:
7501 tmp2 = neon_load_reg(rd, pass);
7502 neon_store_reg(rm, pass, tmp2);
7505 tmp2 = neon_load_reg(rd, pass);
7507 case 0: gen_neon_trn_u8(tmp, tmp2); break;
7508 case 1: gen_neon_trn_u16(tmp, tmp2); break;
7511 neon_store_reg(rm, pass, tmp2);
7513 case NEON_2RM_VRINTN:
7514 case NEON_2RM_VRINTA:
7515 case NEON_2RM_VRINTM:
7516 case NEON_2RM_VRINTP:
7517 case NEON_2RM_VRINTZ:
7520 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7523 if (op == NEON_2RM_VRINTZ) {
7524 rmode = FPROUNDING_ZERO;
7526 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
7529 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
7530 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
7532 gen_helper_rints(cpu_F0s, cpu_F0s, fpstatus);
7533 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
7535 tcg_temp_free_ptr(fpstatus);
7536 tcg_temp_free_i32(tcg_rmode);
7539 case NEON_2RM_VRINTX:
7541 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7542 gen_helper_rints_exact(cpu_F0s, cpu_F0s, fpstatus);
7543 tcg_temp_free_ptr(fpstatus);
7546 case NEON_2RM_VCVTAU:
7547 case NEON_2RM_VCVTAS:
7548 case NEON_2RM_VCVTNU:
7549 case NEON_2RM_VCVTNS:
7550 case NEON_2RM_VCVTPU:
7551 case NEON_2RM_VCVTPS:
7552 case NEON_2RM_VCVTMU:
7553 case NEON_2RM_VCVTMS:
7555 bool is_signed = !extract32(insn, 7, 1);
7556 TCGv_ptr fpst = get_fpstatus_ptr(1);
7557 TCGv_i32 tcg_rmode, tcg_shift;
7558 int rmode = fp_decode_rm[extract32(insn, 8, 2)];
7560 tcg_shift = tcg_const_i32(0);
7561 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
7562 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
7566 gen_helper_vfp_tosls(cpu_F0s, cpu_F0s,
7569 gen_helper_vfp_touls(cpu_F0s, cpu_F0s,
7573 gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
7575 tcg_temp_free_i32(tcg_rmode);
7576 tcg_temp_free_i32(tcg_shift);
7577 tcg_temp_free_ptr(fpst);
7580 case NEON_2RM_VRECPE:
7582 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7583 gen_helper_recpe_u32(tmp, tmp, fpstatus);
7584 tcg_temp_free_ptr(fpstatus);
7587 case NEON_2RM_VRSQRTE:
7589 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7590 gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
7591 tcg_temp_free_ptr(fpstatus);
7594 case NEON_2RM_VRECPE_F:
7596 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7597 gen_helper_recpe_f32(cpu_F0s, cpu_F0s, fpstatus);
7598 tcg_temp_free_ptr(fpstatus);
7601 case NEON_2RM_VRSQRTE_F:
7603 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
7604 gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus);
7605 tcg_temp_free_ptr(fpstatus);
7608 case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
7611 case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
7614 case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
7615 gen_vfp_tosiz(0, 1);
7617 case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
7618 gen_vfp_touiz(0, 1);
7621 /* Reserved op values were caught by the
7622 * neon_2rm_sizes[] check earlier.
7626 if (neon_2rm_is_float_op(op)) {
7627 tcg_gen_st_f32(cpu_F0s, cpu_env,
7628 neon_reg_offset(rd, pass));
7630 neon_store_reg(rd, pass, tmp);
7635 } else if ((insn & (1 << 10)) == 0) {
7637 int n = ((insn >> 8) & 3) + 1;
7638 if ((rn + n) > 32) {
7639 /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
7640 * helper function running off the end of the register file.
7645 if (insn & (1 << 6)) {
7646 tmp = neon_load_reg(rd, 0);
7648 tmp = tcg_temp_new_i32();
7649 tcg_gen_movi_i32(tmp, 0);
7651 tmp2 = neon_load_reg(rm, 0);
7652 ptr1 = vfp_reg_ptr(true, rn);
7653 tmp5 = tcg_const_i32(n);
7654 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
7655 tcg_temp_free_i32(tmp);
7656 if (insn & (1 << 6)) {
7657 tmp = neon_load_reg(rd, 1);
7659 tmp = tcg_temp_new_i32();
7660 tcg_gen_movi_i32(tmp, 0);
7662 tmp3 = neon_load_reg(rm, 1);
7663 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
7664 tcg_temp_free_i32(tmp5);
7665 tcg_temp_free_ptr(ptr1);
7666 neon_store_reg(rd, 0, tmp2);
7667 neon_store_reg(rd, 1, tmp3);
7668 tcg_temp_free_i32(tmp);
7669 } else if ((insn & 0x380) == 0) {
7671 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
7674 if (insn & (1 << 19)) {
7675 tmp = neon_load_reg(rm, 1);
7677 tmp = neon_load_reg(rm, 0);
7679 if (insn & (1 << 16)) {
7680 gen_neon_dup_u8(tmp, ((insn >> 17) & 3) * 8);
7681 } else if (insn & (1 << 17)) {
7682 if ((insn >> 18) & 1)
7683 gen_neon_dup_high16(tmp);
7685 gen_neon_dup_low16(tmp);
7687 for (pass = 0; pass < (q ? 4 : 2); pass++) {
7688 tmp2 = tcg_temp_new_i32();
7689 tcg_gen_mov_i32(tmp2, tmp);
7690 neon_store_reg(rd, pass, tmp2);
7692 tcg_temp_free_i32(tmp);
7701 /* Advanced SIMD three registers of the same length extension.
7702 * 31 25 23 22 20 16 12 11 10 9 8 3 0
7703 * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
7704 * | 1 1 1 1 1 1 0 | op1 | D | op2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
7705 * +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
7707 static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
7709 gen_helper_gvec_3_ptr *fn_gvec_ptr;
7710 int rd, rn, rm, rot, size, opr_sz;
7714 q = extract32(insn, 6, 1);
7715 VFP_DREG_D(rd, insn);
7716 VFP_DREG_N(rn, insn);
7717 VFP_DREG_M(rm, insn);
7718 if ((rd | rn | rm) & q) {
7722 if ((insn & 0xfe200f10) == 0xfc200800) {
7723 /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
7724 size = extract32(insn, 20, 1);
7725 rot = extract32(insn, 23, 2);
7726 if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
7727 || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
7730 fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
7731 } else if ((insn & 0xfea00f10) == 0xfc800800) {
7732 /* VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... */
7733 size = extract32(insn, 20, 1);
7734 rot = extract32(insn, 24, 1);
7735 if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
7736 || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
7739 fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
7744 if (s->fp_excp_el) {
7745 gen_exception_insn(s, 4, EXCP_UDEF,
7746 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
7749 if (!s->vfp_enabled) {
7753 opr_sz = (1 + q) * 8;
7754 fpst = get_fpstatus_ptr(1);
7755 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
7756 vfp_reg_offset(1, rn),
7757 vfp_reg_offset(1, rm), fpst,
7758 opr_sz, opr_sz, rot, fn_gvec_ptr);
7759 tcg_temp_free_ptr(fpst);
7763 /* Advanced SIMD two registers and a scalar extension.
7764 * 31 24 23 22 20 16 12 11 10 9 8 3 0
7765 * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
7766 * | 1 1 1 1 1 1 1 0 | o1 | D | o2 | Vn | Vd | 1 | o3 | 0 | o4 | N Q M U | Vm |
7767 * +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
7771 static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
7773 int rd, rn, rm, rot, size, opr_sz;
7777 q = extract32(insn, 6, 1);
7778 VFP_DREG_D(rd, insn);
7779 VFP_DREG_N(rn, insn);
7780 VFP_DREG_M(rm, insn);
7781 if ((rd | rn) & q) {
7785 if ((insn & 0xff000f10) == 0xfe000800) {
7786 /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
7787 rot = extract32(insn, 20, 2);
7788 size = extract32(insn, 23, 1);
7789 if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)
7790 || (!size && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))) {
7797 if (s->fp_excp_el) {
7798 gen_exception_insn(s, 4, EXCP_UDEF,
7799 syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
7802 if (!s->vfp_enabled) {
7806 opr_sz = (1 + q) * 8;
7807 fpst = get_fpstatus_ptr(1);
7808 tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
7809 vfp_reg_offset(1, rn),
7810 vfp_reg_offset(1, rm), fpst,
7811 opr_sz, opr_sz, rot,
7812 size ? gen_helper_gvec_fcmlas_idx
7813 : gen_helper_gvec_fcmlah_idx);
7814 tcg_temp_free_ptr(fpst);
7818 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
7820 int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
7821 const ARMCPRegInfo *ri;
7823 cpnum = (insn >> 8) & 0xf;
7825 /* First check for coprocessor space used for XScale/iwMMXt insns */
7826 if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
7827 if (extract32(s->c15_cpar, cpnum, 1) == 0) {
7830 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
7831 return disas_iwmmxt_insn(s, insn);
7832 } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
7833 return disas_dsp_insn(s, insn);
7838 /* Otherwise treat as a generic register access */
7839 is64 = (insn & (1 << 25)) == 0;
7840 if (!is64 && ((insn & (1 << 4)) == 0)) {
7848 opc1 = (insn >> 4) & 0xf;
7850 rt2 = (insn >> 16) & 0xf;
7852 crn = (insn >> 16) & 0xf;
7853 opc1 = (insn >> 21) & 7;
7854 opc2 = (insn >> 5) & 7;
7857 isread = (insn >> 20) & 1;
7858 rt = (insn >> 12) & 0xf;
7860 ri = get_arm_cp_reginfo(s->cp_regs,
7861 ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
7863 /* Check access permissions */
7864 if (!cp_access_ok(s->current_el, ri, isread)) {
7869 (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
7870 /* Emit code to perform further access permissions checks at
7871 * runtime; this may result in an exception.
7872 * Note that on XScale all cp0..c13 registers do an access check
7873 * call in order to handle c15_cpar.
7876 TCGv_i32 tcg_syn, tcg_isread;
7879 /* Note that since we are an implementation which takes an
7880 * exception on a trapped conditional instruction only if the
7881 * instruction passes its condition code check, we can take
7882 * advantage of the clause in the ARM ARM that allows us to set
7883 * the COND field in the instruction to 0xE in all cases.
7884 * We could fish the actual condition out of the insn (ARM)
7885 * or the condexec bits (Thumb) but it isn't necessary.
7890 syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7893 syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7899 syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
7902 syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
7907 /* ARMv8 defines that only coprocessors 14 and 15 exist,
7908 * so this can only happen if this is an ARMv7 or earlier CPU,
7909 * in which case the syndrome information won't actually be
7912 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
7913 syndrome = syn_uncategorized();
7917 gen_set_condexec(s);
7918 gen_set_pc_im(s, s->pc - 4);
7919 tmpptr = tcg_const_ptr(ri);
7920 tcg_syn = tcg_const_i32(syndrome);
7921 tcg_isread = tcg_const_i32(isread);
7922 gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
7924 tcg_temp_free_ptr(tmpptr);
7925 tcg_temp_free_i32(tcg_syn);
7926 tcg_temp_free_i32(tcg_isread);
7929 /* Handle special cases first */
7930 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
7937 gen_set_pc_im(s, s->pc);
7938 s->base.is_jmp = DISAS_WFI;
7944 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7953 if (ri->type & ARM_CP_CONST) {
7954 tmp64 = tcg_const_i64(ri->resetvalue);
7955 } else if (ri->readfn) {
7957 tmp64 = tcg_temp_new_i64();
7958 tmpptr = tcg_const_ptr(ri);
7959 gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
7960 tcg_temp_free_ptr(tmpptr);
7962 tmp64 = tcg_temp_new_i64();
7963 tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7965 tmp = tcg_temp_new_i32();
7966 tcg_gen_extrl_i64_i32(tmp, tmp64);
7967 store_reg(s, rt, tmp);
7968 tcg_gen_shri_i64(tmp64, tmp64, 32);
7969 tmp = tcg_temp_new_i32();
7970 tcg_gen_extrl_i64_i32(tmp, tmp64);
7971 tcg_temp_free_i64(tmp64);
7972 store_reg(s, rt2, tmp);
7975 if (ri->type & ARM_CP_CONST) {
7976 tmp = tcg_const_i32(ri->resetvalue);
7977 } else if (ri->readfn) {
7979 tmp = tcg_temp_new_i32();
7980 tmpptr = tcg_const_ptr(ri);
7981 gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7982 tcg_temp_free_ptr(tmpptr);
7984 tmp = load_cpu_offset(ri->fieldoffset);
7987 /* Destination register of r15 for 32 bit loads sets
7988 * the condition codes from the high 4 bits of the value
7991 tcg_temp_free_i32(tmp);
7993 store_reg(s, rt, tmp);
7998 if (ri->type & ARM_CP_CONST) {
7999 /* If not forbidden by access permissions, treat as WI */
8004 TCGv_i32 tmplo, tmphi;
8005 TCGv_i64 tmp64 = tcg_temp_new_i64();
8006 tmplo = load_reg(s, rt);
8007 tmphi = load_reg(s, rt2);
8008 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
8009 tcg_temp_free_i32(tmplo);
8010 tcg_temp_free_i32(tmphi);
8012 TCGv_ptr tmpptr = tcg_const_ptr(ri);
8013 gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
8014 tcg_temp_free_ptr(tmpptr);
8016 tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
8018 tcg_temp_free_i64(tmp64);
8023 tmp = load_reg(s, rt);
8024 tmpptr = tcg_const_ptr(ri);
8025 gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
8026 tcg_temp_free_ptr(tmpptr);
8027 tcg_temp_free_i32(tmp);
8029 TCGv_i32 tmp = load_reg(s, rt);
8030 store_cpu_offset(tmp, ri->fieldoffset);
8035 if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
8036 /* I/O operations must end the TB here (whether read or write) */
8039 } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
8040 /* We default to ending the TB on a coprocessor register write,
8041 * but allow this to be suppressed by the register definition
8042 * (usually only necessary to work around guest bugs).
8050 /* Unknown register; this might be a guest error or a QEMU
8051 * unimplemented feature.
8054 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
8055 "64 bit system register cp:%d opc1: %d crm:%d "
8057 isread ? "read" : "write", cpnum, opc1, crm,
8058 s->ns ? "non-secure" : "secure");
8060 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
8061 "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
8063 isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
8064 s->ns ? "non-secure" : "secure");
8071 /* Store a 64-bit value to a register pair. Clobbers val. */
8072 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
8075 tmp = tcg_temp_new_i32();
8076 tcg_gen_extrl_i64_i32(tmp, val);
8077 store_reg(s, rlow, tmp);
8078 tmp = tcg_temp_new_i32();
8079 tcg_gen_shri_i64(val, val, 32);
8080 tcg_gen_extrl_i64_i32(tmp, val);
8081 store_reg(s, rhigh, tmp);
8084 /* load a 32-bit value from a register and perform a 64-bit accumulate. */
8085 static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
8090 /* Load value and extend to 64 bits. */
8091 tmp = tcg_temp_new_i64();
8092 tmp2 = load_reg(s, rlow);
8093 tcg_gen_extu_i32_i64(tmp, tmp2);
8094 tcg_temp_free_i32(tmp2);
8095 tcg_gen_add_i64(val, val, tmp);
8096 tcg_temp_free_i64(tmp);
8099 /* load and add a 64-bit value from a register pair. */
8100 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
8106 /* Load 64-bit value rd:rn. */
8107 tmpl = load_reg(s, rlow);
8108 tmph = load_reg(s, rhigh);
8109 tmp = tcg_temp_new_i64();
8110 tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
8111 tcg_temp_free_i32(tmpl);
8112 tcg_temp_free_i32(tmph);
8113 tcg_gen_add_i64(val, val, tmp);
8114 tcg_temp_free_i64(tmp);
8117 /* Set N and Z flags from hi|lo. */
8118 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
8120 tcg_gen_mov_i32(cpu_NF, hi);
8121 tcg_gen_or_i32(cpu_ZF, lo, hi);
8124 /* Load/Store exclusive instructions are implemented by remembering
8125 the value/address loaded, and seeing if these are the same
8126 when the store is performed. This should be sufficient to implement
8127 the architecturally mandated semantics, and avoids having to monitor
8128 regular stores. The compare vs the remembered value is done during
8129 the cmpxchg operation, but we must compare the addresses manually. */
8130 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
8131 TCGv_i32 addr, int size)
8133 TCGv_i32 tmp = tcg_temp_new_i32();
8134 TCGMemOp opc = size | MO_ALIGN | s->be_data;
8139 TCGv_i32 tmp2 = tcg_temp_new_i32();
8140 TCGv_i64 t64 = tcg_temp_new_i64();
8142 /* For AArch32, architecturally the 32-bit word at the lowest
8143 * address is always Rt and the one at addr+4 is Rt2, even if
8144 * the CPU is big-endian. That means we don't want to do a
8145 * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
8146 * for an architecturally 64-bit access, but instead do a
8147 * 64-bit access using MO_BE if appropriate and then split
8149 * This only makes a difference for BE32 user-mode, where
8150 * frob64() must not flip the two halves of the 64-bit data
8151 * but this code must treat BE32 user-mode like BE32 system.
8153 TCGv taddr = gen_aa32_addr(s, addr, opc);
8155 tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
8156 tcg_temp_free(taddr);
8157 tcg_gen_mov_i64(cpu_exclusive_val, t64);
8158 if (s->be_data == MO_BE) {
8159 tcg_gen_extr_i64_i32(tmp2, tmp, t64);
8161 tcg_gen_extr_i64_i32(tmp, tmp2, t64);
8163 tcg_temp_free_i64(t64);
8165 store_reg(s, rt2, tmp2);
8167 gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
8168 tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
8171 store_reg(s, rt, tmp);
8172 tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
8175 static void gen_clrex(DisasContext *s)
8177 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
8180 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
8181 TCGv_i32 addr, int size)
8183 TCGv_i32 t0, t1, t2;
8186 TCGLabel *done_label;
8187 TCGLabel *fail_label;
8188 TCGMemOp opc = size | MO_ALIGN | s->be_data;
8190 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
8196 fail_label = gen_new_label();
8197 done_label = gen_new_label();
8198 extaddr = tcg_temp_new_i64();
8199 tcg_gen_extu_i32_i64(extaddr, addr);
8200 tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
8201 tcg_temp_free_i64(extaddr);
8203 taddr = gen_aa32_addr(s, addr, opc);
8204 t0 = tcg_temp_new_i32();
8205 t1 = load_reg(s, rt);
8207 TCGv_i64 o64 = tcg_temp_new_i64();
8208 TCGv_i64 n64 = tcg_temp_new_i64();
8210 t2 = load_reg(s, rt2);
8211 /* For AArch32, architecturally the 32-bit word at the lowest
8212 * address is always Rt and the one at addr+4 is Rt2, even if
8213 * the CPU is big-endian. Since we're going to treat this as a
8214 * single 64-bit BE store, we need to put the two halves in the
8215 * opposite order for BE to LE, so that they end up in the right
8217 * We don't want gen_aa32_frob64() because that does the wrong
8218 * thing for BE32 usermode.
8220 if (s->be_data == MO_BE) {
8221 tcg_gen_concat_i32_i64(n64, t2, t1);
8223 tcg_gen_concat_i32_i64(n64, t1, t2);
8225 tcg_temp_free_i32(t2);
8227 tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
8228 get_mem_index(s), opc);
8229 tcg_temp_free_i64(n64);
8231 tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
8232 tcg_gen_extrl_i64_i32(t0, o64);
8234 tcg_temp_free_i64(o64);
8236 t2 = tcg_temp_new_i32();
8237 tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
8238 tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
8239 tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
8240 tcg_temp_free_i32(t2);
8242 tcg_temp_free_i32(t1);
8243 tcg_temp_free(taddr);
8244 tcg_gen_mov_i32(cpu_R[rd], t0);
8245 tcg_temp_free_i32(t0);
8246 tcg_gen_br(done_label);
8248 gen_set_label(fail_label);
8249 tcg_gen_movi_i32(cpu_R[rd], 1);
8250 gen_set_label(done_label);
8251 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
8257 * @mode: mode field from insn (which stack to store to)
8258 * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
8259 * @writeback: true if writeback bit set
8261 * Generate code for the SRS (Store Return State) insn.
8263 static void gen_srs(DisasContext *s,
8264 uint32_t mode, uint32_t amode, bool writeback)
8271 * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
8272 * and specified mode is monitor mode
8273 * - UNDEFINED in Hyp mode
8274 * - UNPREDICTABLE in User or System mode
8275 * - UNPREDICTABLE if the specified mode is:
8276 * -- not implemented
8277 * -- not a valid mode number
8278 * -- a mode that's at a higher exception level
8279 * -- Monitor, if we are Non-secure
8280 * For the UNPREDICTABLE cases we choose to UNDEF.
8282 if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
8283 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), 3);
8287 if (s->current_el == 0 || s->current_el == 2) {
8292 case ARM_CPU_MODE_USR:
8293 case ARM_CPU_MODE_FIQ:
8294 case ARM_CPU_MODE_IRQ:
8295 case ARM_CPU_MODE_SVC:
8296 case ARM_CPU_MODE_ABT:
8297 case ARM_CPU_MODE_UND:
8298 case ARM_CPU_MODE_SYS:
8300 case ARM_CPU_MODE_HYP:
8301 if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
8305 case ARM_CPU_MODE_MON:
8306 /* No need to check specifically for "are we non-secure" because
8307 * we've already made EL0 UNDEF and handled the trap for S-EL1;
8308 * so if this isn't EL3 then we must be non-secure.
8310 if (s->current_el != 3) {
8319 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
8320 default_exception_el(s));
8324 addr = tcg_temp_new_i32();
8325 tmp = tcg_const_i32(mode);
8326 /* get_r13_banked() will raise an exception if called from System mode */
8327 gen_set_condexec(s);
8328 gen_set_pc_im(s, s->pc - 4);
8329 gen_helper_get_r13_banked(addr, cpu_env, tmp);
8330 tcg_temp_free_i32(tmp);
8347 tcg_gen_addi_i32(addr, addr, offset);
8348 tmp = load_reg(s, 14);
8349 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
8350 tcg_temp_free_i32(tmp);
8351 tmp = load_cpu_field(spsr);
8352 tcg_gen_addi_i32(addr, addr, 4);
8353 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
8354 tcg_temp_free_i32(tmp);
8372 tcg_gen_addi_i32(addr, addr, offset);
8373 tmp = tcg_const_i32(mode);
8374 gen_helper_set_r13_banked(cpu_env, tmp, addr);
8375 tcg_temp_free_i32(tmp);
8377 tcg_temp_free_i32(addr);
8378 s->base.is_jmp = DISAS_UPDATE;
8381 static void disas_arm_insn(DisasContext *s, unsigned int insn)
8383 unsigned int cond, val, op1, i, shift, rm, rs, rn, rd, sh;
8390 /* M variants do not implement ARM mode; this must raise the INVSTATE
8391 * UsageFault exception.
8393 if (arm_dc_feature(s, ARM_FEATURE_M)) {
8394 gen_exception_insn(s, 4, EXCP_INVSTATE, syn_uncategorized(),
8395 default_exception_el(s));
8400 /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
8401 * choose to UNDEF. In ARMv5 and above the space is used
8402 * for miscellaneous unconditional instructions.
8406 /* Unconditional instructions. */
8407 if (((insn >> 25) & 7) == 1) {
8408 /* NEON Data processing. */
8409 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
8413 if (disas_neon_data_insn(s, insn)) {
8418 if ((insn & 0x0f100000) == 0x04000000) {
8419 /* NEON load/store. */
8420 if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
8424 if (disas_neon_ls_insn(s, insn)) {
8429 if ((insn & 0x0f000e10) == 0x0e000a00) {
8431 if (disas_vfp_insn(s, insn)) {
8436 if (((insn & 0x0f30f000) == 0x0510f000) ||
8437 ((insn & 0x0f30f010) == 0x0710f000)) {
8438 if ((insn & (1 << 22)) == 0) {
8440 if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
8444 /* Otherwise PLD; v5TE+ */
8448 if (((insn & 0x0f70f000) == 0x0450f000) ||
8449 ((insn & 0x0f70f010) == 0x0650f000)) {
8451 return; /* PLI; V7 */
8453 if (((insn & 0x0f700000) == 0x04100000) ||
8454 ((insn & 0x0f700010) == 0x06100000)) {
8455 if (!arm_dc_feature(s, ARM_FEATURE_V7MP)) {
8458 return; /* v7MP: Unallocated memory hint: must NOP */
8461 if ((insn & 0x0ffffdff) == 0x01010000) {
8464 if (((insn >> 9) & 1) != !!(s->be_data == MO_BE)) {
8465 gen_helper_setend(cpu_env);
8466 s->base.is_jmp = DISAS_UPDATE;
8469 } else if ((insn & 0x0fffff00) == 0x057ff000) {
8470 switch ((insn >> 4) & 0xf) {
8478 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
8481 /* We need to break the TB after this insn to execute
8482 * self-modifying code correctly and also to take
8483 * any pending interrupts immediately.
8485 gen_goto_tb(s, 0, s->pc & ~1);
8490 } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
8493 gen_srs(s, (insn & 0x1f), (insn >> 23) & 3, insn & (1 << 21));
8495 } else if ((insn & 0x0e50ffe0) == 0x08100a00) {
8501 rn = (insn >> 16) & 0xf;
8502 addr = load_reg(s, rn);
8503 i = (insn >> 23) & 3;
8505 case 0: offset = -4; break; /* DA */
8506 case 1: offset = 0; break; /* IA */
8507 case 2: offset = -8; break; /* DB */
8508 case 3: offset = 4; break; /* IB */
8512 tcg_gen_addi_i32(addr, addr, offset);
8513 /* Load PC into tmp and CPSR into tmp2. */
8514 tmp = tcg_temp_new_i32();
8515 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
8516 tcg_gen_addi_i32(addr, addr, 4);
8517 tmp2 = tcg_temp_new_i32();
8518 gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
8519 if (insn & (1 << 21)) {
8520 /* Base writeback. */
8522 case 0: offset = -8; break;
8523 case 1: offset = 4; break;
8524 case 2: offset = -4; break;
8525 case 3: offset = 0; break;
8529 tcg_gen_addi_i32(addr, addr, offset);
8530 store_reg(s, rn, addr);
8532 tcg_temp_free_i32(addr);
8534 gen_rfe(s, tmp, tmp2);
8536 } else if ((insn & 0x0e000000) == 0x0a000000) {
8537 /* branch link and change to thumb (blx <offset>) */
8540 val = (uint32_t)s->pc;
8541 tmp = tcg_temp_new_i32();
8542 tcg_gen_movi_i32(tmp, val);
8543 store_reg(s, 14, tmp);
8544 /* Sign-extend the 24-bit offset */
8545 offset = (((int32_t)insn) << 8) >> 8;
8546 /* offset * 4 + bit24 * 2 + (thumb bit) */
8547 val += (offset << 2) | ((insn >> 23) & 2) | 1;
8548 /* pipeline offset */
8550 /* protected by ARCH(5); above, near the start of uncond block */
8553 } else if ((insn & 0x0e000f00) == 0x0c000100) {
8554 if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
8555 /* iWMMXt register transfer. */
8556 if (extract32(s->c15_cpar, 1, 1)) {
8557 if (!disas_iwmmxt_insn(s, insn)) {
8562 } else if ((insn & 0x0e000a00) == 0x0c000800
8563 && arm_dc_feature(s, ARM_FEATURE_V8)) {
8564 if (disas_neon_insn_3same_ext(s, insn)) {
8568 } else if ((insn & 0x0f000a00) == 0x0e000800
8569 && arm_dc_feature(s, ARM_FEATURE_V8)) {
8570 if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
8574 } else if ((insn & 0x0fe00000) == 0x0c400000) {
8575 /* Coprocessor double register transfer. */
8577 } else if ((insn & 0x0f000010) == 0x0e000010) {
8578 /* Additional coprocessor register transfer. */
8579 } else if ((insn & 0x0ff10020) == 0x01000000) {
8582 /* cps (privileged) */
8586 if (insn & (1 << 19)) {
8587 if (insn & (1 << 8))
8589 if (insn & (1 << 7))
8591 if (insn & (1 << 6))
8593 if (insn & (1 << 18))
8596 if (insn & (1 << 17)) {
8598 val |= (insn & 0x1f);
8601 gen_set_psr_im(s, mask, 0, val);
8608 /* if not always execute, we generate a conditional jump to
8610 s->condlabel = gen_new_label();
8611 arm_gen_test_cc(cond ^ 1, s->condlabel);
8614 if ((insn & 0x0f900000) == 0x03000000) {
8615 if ((insn & (1 << 21)) == 0) {
8617 rd = (insn >> 12) & 0xf;
8618 val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
8619 if ((insn & (1 << 22)) == 0) {
8621 tmp = tcg_temp_new_i32();
8622 tcg_gen_movi_i32(tmp, val);
8625 tmp = load_reg(s, rd);
8626 tcg_gen_ext16u_i32(tmp, tmp);
8627 tcg_gen_ori_i32(tmp, tmp, val << 16);
8629 store_reg(s, rd, tmp);
8631 if (((insn >> 12) & 0xf) != 0xf)
8633 if (((insn >> 16) & 0xf) == 0) {
8634 gen_nop_hint(s, insn & 0xff);
8636 /* CPSR = immediate */
8638 shift = ((insn >> 8) & 0xf) * 2;
8640 val = (val >> shift) | (val << (32 - shift));
8641 i = ((insn & (1 << 22)) != 0);
8642 if (gen_set_psr_im(s, msr_mask(s, (insn >> 16) & 0xf, i),
8648 } else if ((insn & 0x0f900000) == 0x01000000
8649 && (insn & 0x00000090) != 0x00000090) {
8650 /* miscellaneous instructions */
8651 op1 = (insn >> 21) & 3;
8652 sh = (insn >> 4) & 0xf;
8655 case 0x0: /* MSR, MRS */
8656 if (insn & (1 << 9)) {
8657 /* MSR (banked) and MRS (banked) */
8658 int sysm = extract32(insn, 16, 4) |
8659 (extract32(insn, 8, 1) << 4);
8660 int r = extract32(insn, 22, 1);
8664 gen_msr_banked(s, r, sysm, rm);
8667 int rd = extract32(insn, 12, 4);
8669 gen_mrs_banked(s, r, sysm, rd);
8674 /* MSR, MRS (for PSRs) */
8677 tmp = load_reg(s, rm);
8678 i = ((op1 & 2) != 0);
8679 if (gen_set_psr(s, msr_mask(s, (insn >> 16) & 0xf, i), i, tmp))
8683 rd = (insn >> 12) & 0xf;
8687 tmp = load_cpu_field(spsr);
8689 tmp = tcg_temp_new_i32();
8690 gen_helper_cpsr_read(tmp, cpu_env);
8692 store_reg(s, rd, tmp);
8697 /* branch/exchange thumb (bx). */
8699 tmp = load_reg(s, rm);
8701 } else if (op1 == 3) {
8704 rd = (insn >> 12) & 0xf;
8705 tmp = load_reg(s, rm);
8706 tcg_gen_clzi_i32(tmp, tmp, 32);
8707 store_reg(s, rd, tmp);
8715 /* Trivial implementation equivalent to bx. */
8716 tmp = load_reg(s, rm);
8727 /* branch link/exchange thumb (blx) */
8728 tmp = load_reg(s, rm);
8729 tmp2 = tcg_temp_new_i32();
8730 tcg_gen_movi_i32(tmp2, s->pc);
8731 store_reg(s, 14, tmp2);
8737 uint32_t c = extract32(insn, 8, 4);
8739 /* Check this CPU supports ARMv8 CRC instructions.
8740 * op1 == 3 is UNPREDICTABLE but handle as UNDEFINED.
8741 * Bits 8, 10 and 11 should be zero.
8743 if (!arm_dc_feature(s, ARM_FEATURE_CRC) || op1 == 0x3 ||
8748 rn = extract32(insn, 16, 4);
8749 rd = extract32(insn, 12, 4);
8751 tmp = load_reg(s, rn);
8752 tmp2 = load_reg(s, rm);
8754 tcg_gen_andi_i32(tmp2, tmp2, 0xff);
8755 } else if (op1 == 1) {
8756 tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
8758 tmp3 = tcg_const_i32(1 << op1);
8760 gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
8762 gen_helper_crc32(tmp, tmp, tmp2, tmp3);
8764 tcg_temp_free_i32(tmp2);
8765 tcg_temp_free_i32(tmp3);
8766 store_reg(s, rd, tmp);
8769 case 0x5: /* saturating add/subtract */
8771 rd = (insn >> 12) & 0xf;
8772 rn = (insn >> 16) & 0xf;
8773 tmp = load_reg(s, rm);
8774 tmp2 = load_reg(s, rn);
8776 gen_helper_double_saturate(tmp2, cpu_env, tmp2);
8778 gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
8780 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
8781 tcg_temp_free_i32(tmp2);
8782 store_reg(s, rd, tmp);
8786 int imm16 = extract32(insn, 0, 4) | (extract32(insn, 8, 12) << 4);
8795 gen_exception_bkpt_insn(s, 4, syn_aa32_bkpt(imm16, false));
8798 /* Hypervisor call (v7) */
8806 /* Secure monitor call (v6+) */
8814 g_assert_not_reached();
8818 case 0x8: /* signed multiply */
8823 rs = (insn >> 8) & 0xf;
8824 rn = (insn >> 12) & 0xf;
8825 rd = (insn >> 16) & 0xf;
8827 /* (32 * 16) >> 16 */
8828 tmp = load_reg(s, rm);
8829 tmp2 = load_reg(s, rs);
8831 tcg_gen_sari_i32(tmp2, tmp2, 16);
8834 tmp64 = gen_muls_i64_i32(tmp, tmp2);
8835 tcg_gen_shri_i64(tmp64, tmp64, 16);
8836 tmp = tcg_temp_new_i32();
8837 tcg_gen_extrl_i64_i32(tmp, tmp64);
8838 tcg_temp_free_i64(tmp64);
8839 if ((sh & 2) == 0) {
8840 tmp2 = load_reg(s, rn);
8841 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8842 tcg_temp_free_i32(tmp2);
8844 store_reg(s, rd, tmp);
8847 tmp = load_reg(s, rm);
8848 tmp2 = load_reg(s, rs);
8849 gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
8850 tcg_temp_free_i32(tmp2);
8852 tmp64 = tcg_temp_new_i64();
8853 tcg_gen_ext_i32_i64(tmp64, tmp);
8854 tcg_temp_free_i32(tmp);
8855 gen_addq(s, tmp64, rn, rd);
8856 gen_storeq_reg(s, rn, rd, tmp64);
8857 tcg_temp_free_i64(tmp64);
8860 tmp2 = load_reg(s, rn);
8861 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
8862 tcg_temp_free_i32(tmp2);
8864 store_reg(s, rd, tmp);
8871 } else if (((insn & 0x0e000000) == 0 &&
8872 (insn & 0x00000090) != 0x90) ||
8873 ((insn & 0x0e000000) == (1 << 25))) {
8874 int set_cc, logic_cc, shiftop;
8876 op1 = (insn >> 21) & 0xf;
8877 set_cc = (insn >> 20) & 1;
8878 logic_cc = table_logic_cc[op1] & set_cc;
8880 /* data processing instruction */
8881 if (insn & (1 << 25)) {
8882 /* immediate operand */
8884 shift = ((insn >> 8) & 0xf) * 2;
8886 val = (val >> shift) | (val << (32 - shift));
8888 tmp2 = tcg_temp_new_i32();
8889 tcg_gen_movi_i32(tmp2, val);
8890 if (logic_cc && shift) {
8891 gen_set_CF_bit31(tmp2);
8896 tmp2 = load_reg(s, rm);
8897 shiftop = (insn >> 5) & 3;
8898 if (!(insn & (1 << 4))) {
8899 shift = (insn >> 7) & 0x1f;
8900 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
8902 rs = (insn >> 8) & 0xf;
8903 tmp = load_reg(s, rs);
8904 gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
8907 if (op1 != 0x0f && op1 != 0x0d) {
8908 rn = (insn >> 16) & 0xf;
8909 tmp = load_reg(s, rn);
8913 rd = (insn >> 12) & 0xf;
8916 tcg_gen_and_i32(tmp, tmp, tmp2);
8920 store_reg_bx(s, rd, tmp);
8923 tcg_gen_xor_i32(tmp, tmp, tmp2);
8927 store_reg_bx(s, rd, tmp);
8930 if (set_cc && rd == 15) {
8931 /* SUBS r15, ... is used for exception return. */
8935 gen_sub_CC(tmp, tmp, tmp2);
8936 gen_exception_return(s, tmp);
8939 gen_sub_CC(tmp, tmp, tmp2);
8941 tcg_gen_sub_i32(tmp, tmp, tmp2);
8943 store_reg_bx(s, rd, tmp);
8948 gen_sub_CC(tmp, tmp2, tmp);
8950 tcg_gen_sub_i32(tmp, tmp2, tmp);
8952 store_reg_bx(s, rd, tmp);
8956 gen_add_CC(tmp, tmp, tmp2);
8958 tcg_gen_add_i32(tmp, tmp, tmp2);
8960 store_reg_bx(s, rd, tmp);
8964 gen_adc_CC(tmp, tmp, tmp2);
8966 gen_add_carry(tmp, tmp, tmp2);
8968 store_reg_bx(s, rd, tmp);
8972 gen_sbc_CC(tmp, tmp, tmp2);
8974 gen_sub_carry(tmp, tmp, tmp2);
8976 store_reg_bx(s, rd, tmp);
8980 gen_sbc_CC(tmp, tmp2, tmp);
8982 gen_sub_carry(tmp, tmp2, tmp);
8984 store_reg_bx(s, rd, tmp);
8988 tcg_gen_and_i32(tmp, tmp, tmp2);
8991 tcg_temp_free_i32(tmp);
8995 tcg_gen_xor_i32(tmp, tmp, tmp2);
8998 tcg_temp_free_i32(tmp);
9002 gen_sub_CC(tmp, tmp, tmp2);
9004 tcg_temp_free_i32(tmp);
9008 gen_add_CC(tmp, tmp, tmp2);
9010 tcg_temp_free_i32(tmp);
9013 tcg_gen_or_i32(tmp, tmp, tmp2);
9017 store_reg_bx(s, rd, tmp);
9020 if (logic_cc && rd == 15) {
9021 /* MOVS r15, ... is used for exception return. */
9025 gen_exception_return(s, tmp2);
9030 store_reg_bx(s, rd, tmp2);
9034 tcg_gen_andc_i32(tmp, tmp, tmp2);
9038 store_reg_bx(s, rd, tmp);
9042 tcg_gen_not_i32(tmp2, tmp2);
9046 store_reg_bx(s, rd, tmp2);
9049 if (op1 != 0x0f && op1 != 0x0d) {
9050 tcg_temp_free_i32(tmp2);
9053 /* other instructions */
9054 op1 = (insn >> 24) & 0xf;
9058 /* multiplies, extra load/stores */
9059 sh = (insn >> 5) & 3;
9062 rd = (insn >> 16) & 0xf;
9063 rn = (insn >> 12) & 0xf;
9064 rs = (insn >> 8) & 0xf;
9066 op1 = (insn >> 20) & 0xf;
9068 case 0: case 1: case 2: case 3: case 6:
9070 tmp = load_reg(s, rs);
9071 tmp2 = load_reg(s, rm);
9072 tcg_gen_mul_i32(tmp, tmp, tmp2);
9073 tcg_temp_free_i32(tmp2);
9074 if (insn & (1 << 22)) {
9075 /* Subtract (mls) */
9077 tmp2 = load_reg(s, rn);
9078 tcg_gen_sub_i32(tmp, tmp2, tmp);
9079 tcg_temp_free_i32(tmp2);
9080 } else if (insn & (1 << 21)) {
9082 tmp2 = load_reg(s, rn);
9083 tcg_gen_add_i32(tmp, tmp, tmp2);
9084 tcg_temp_free_i32(tmp2);
9086 if (insn & (1 << 20))
9088 store_reg(s, rd, tmp);
9091 /* 64 bit mul double accumulate (UMAAL) */
9093 tmp = load_reg(s, rs);
9094 tmp2 = load_reg(s, rm);
9095 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
9096 gen_addq_lo(s, tmp64, rn);
9097 gen_addq_lo(s, tmp64, rd);
9098 gen_storeq_reg(s, rn, rd, tmp64);
9099 tcg_temp_free_i64(tmp64);
9101 case 8: case 9: case 10: case 11:
9102 case 12: case 13: case 14: case 15:
9103 /* 64 bit mul: UMULL, UMLAL, SMULL, SMLAL. */
9104 tmp = load_reg(s, rs);
9105 tmp2 = load_reg(s, rm);
9106 if (insn & (1 << 22)) {
9107 tcg_gen_muls2_i32(tmp, tmp2, tmp, tmp2);
9109 tcg_gen_mulu2_i32(tmp, tmp2, tmp, tmp2);
9111 if (insn & (1 << 21)) { /* mult accumulate */
9112 TCGv_i32 al = load_reg(s, rn);
9113 TCGv_i32 ah = load_reg(s, rd);
9114 tcg_gen_add2_i32(tmp, tmp2, tmp, tmp2, al, ah);
9115 tcg_temp_free_i32(al);
9116 tcg_temp_free_i32(ah);
9118 if (insn & (1 << 20)) {
9119 gen_logicq_cc(tmp, tmp2);
9121 store_reg(s, rn, tmp);
9122 store_reg(s, rd, tmp2);
9128 rn = (insn >> 16) & 0xf;
9129 rd = (insn >> 12) & 0xf;
9130 if (insn & (1 << 23)) {
9131 /* load/store exclusive */
9132 int op2 = (insn >> 8) & 3;
9133 op1 = (insn >> 21) & 0x3;
9136 case 0: /* lda/stl */
9142 case 1: /* reserved */
9144 case 2: /* ldaex/stlex */
9147 case 3: /* ldrex/strex */
9156 addr = tcg_temp_local_new_i32();
9157 load_reg_var(s, addr, rn);
9159 /* Since the emulation does not have barriers,
9160 the acquire/release semantics need no special
9163 if (insn & (1 << 20)) {
9164 tmp = tcg_temp_new_i32();
9167 gen_aa32_ld32u_iss(s, tmp, addr,
9172 gen_aa32_ld8u_iss(s, tmp, addr,
9177 gen_aa32_ld16u_iss(s, tmp, addr,
9184 store_reg(s, rd, tmp);
9187 tmp = load_reg(s, rm);
9190 gen_aa32_st32_iss(s, tmp, addr,
9195 gen_aa32_st8_iss(s, tmp, addr,
9200 gen_aa32_st16_iss(s, tmp, addr,
9207 tcg_temp_free_i32(tmp);
9209 } else if (insn & (1 << 20)) {
9212 gen_load_exclusive(s, rd, 15, addr, 2);
9214 case 1: /* ldrexd */
9215 gen_load_exclusive(s, rd, rd + 1, addr, 3);
9217 case 2: /* ldrexb */
9218 gen_load_exclusive(s, rd, 15, addr, 0);
9220 case 3: /* ldrexh */
9221 gen_load_exclusive(s, rd, 15, addr, 1);
9230 gen_store_exclusive(s, rd, rm, 15, addr, 2);
9232 case 1: /* strexd */
9233 gen_store_exclusive(s, rd, rm, rm + 1, addr, 3);
9235 case 2: /* strexb */
9236 gen_store_exclusive(s, rd, rm, 15, addr, 0);
9238 case 3: /* strexh */
9239 gen_store_exclusive(s, rd, rm, 15, addr, 1);
9245 tcg_temp_free_i32(addr);
9246 } else if ((insn & 0x00300f00) == 0) {
9247 /* 0bcccc_0001_0x00_xxxx_xxxx_0000_1001_xxxx
9252 TCGMemOp opc = s->be_data;
9256 if (insn & (1 << 22)) {
9259 opc |= MO_UL | MO_ALIGN;
9262 addr = load_reg(s, rn);
9263 taddr = gen_aa32_addr(s, addr, opc);
9264 tcg_temp_free_i32(addr);
9266 tmp = load_reg(s, rm);
9267 tcg_gen_atomic_xchg_i32(tmp, taddr, tmp,
9268 get_mem_index(s), opc);
9269 tcg_temp_free(taddr);
9270 store_reg(s, rd, tmp);
9277 bool load = insn & (1 << 20);
9278 bool wbit = insn & (1 << 21);
9279 bool pbit = insn & (1 << 24);
9280 bool doubleword = false;
9283 /* Misc load/store */
9284 rn = (insn >> 16) & 0xf;
9285 rd = (insn >> 12) & 0xf;
9287 /* ISS not valid if writeback */
9288 issinfo = (pbit & !wbit) ? rd : ISSInvalid;
9290 if (!load && (sh & 2)) {
9294 /* UNPREDICTABLE; we choose to UNDEF */
9297 load = (sh & 1) == 0;
9301 addr = load_reg(s, rn);
9303 gen_add_datah_offset(s, insn, 0, addr);
9310 tmp = load_reg(s, rd);
9311 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
9312 tcg_temp_free_i32(tmp);
9313 tcg_gen_addi_i32(addr, addr, 4);
9314 tmp = load_reg(s, rd + 1);
9315 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
9316 tcg_temp_free_i32(tmp);
9319 tmp = tcg_temp_new_i32();
9320 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
9321 store_reg(s, rd, tmp);
9322 tcg_gen_addi_i32(addr, addr, 4);
9323 tmp = tcg_temp_new_i32();
9324 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
9327 address_offset = -4;
9330 tmp = tcg_temp_new_i32();
9333 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s),
9337 gen_aa32_ld8s_iss(s, tmp, addr, get_mem_index(s),
9342 gen_aa32_ld16s_iss(s, tmp, addr, get_mem_index(s),
9348 tmp = load_reg(s, rd);
9349 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), issinfo);
9350 tcg_temp_free_i32(tmp);
9352 /* Perform base writeback before the loaded value to
9353 ensure correct behavior with overlapping index registers.
9354 ldrd with base writeback is undefined if the
9355 destination and index registers overlap. */
9357 gen_add_datah_offset(s, insn, address_offset, addr);
9358 store_reg(s, rn, addr);
9361 tcg_gen_addi_i32(addr, addr, address_offset);
9362 store_reg(s, rn, addr);
9364 tcg_temp_free_i32(addr);
9367 /* Complete the load. */
9368 store_reg(s, rd, tmp);
9377 if (insn & (1 << 4)) {
9379 /* Armv6 Media instructions. */
9381 rn = (insn >> 16) & 0xf;
9382 rd = (insn >> 12) & 0xf;
9383 rs = (insn >> 8) & 0xf;
9384 switch ((insn >> 23) & 3) {
9385 case 0: /* Parallel add/subtract. */
9386 op1 = (insn >> 20) & 7;
9387 tmp = load_reg(s, rn);
9388 tmp2 = load_reg(s, rm);
9389 sh = (insn >> 5) & 7;
9390 if ((op1 & 3) == 0 || sh == 5 || sh == 6)
9392 gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
9393 tcg_temp_free_i32(tmp2);
9394 store_reg(s, rd, tmp);
9397 if ((insn & 0x00700020) == 0) {
9398 /* Halfword pack. */
9399 tmp = load_reg(s, rn);
9400 tmp2 = load_reg(s, rm);
9401 shift = (insn >> 7) & 0x1f;
9402 if (insn & (1 << 6)) {
9406 tcg_gen_sari_i32(tmp2, tmp2, shift);
9407 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
9408 tcg_gen_ext16u_i32(tmp2, tmp2);
9412 tcg_gen_shli_i32(tmp2, tmp2, shift);
9413 tcg_gen_ext16u_i32(tmp, tmp);
9414 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
9416 tcg_gen_or_i32(tmp, tmp, tmp2);
9417 tcg_temp_free_i32(tmp2);
9418 store_reg(s, rd, tmp);
9419 } else if ((insn & 0x00200020) == 0x00200000) {
9421 tmp = load_reg(s, rm);
9422 shift = (insn >> 7) & 0x1f;
9423 if (insn & (1 << 6)) {
9426 tcg_gen_sari_i32(tmp, tmp, shift);
9428 tcg_gen_shli_i32(tmp, tmp, shift);
9430 sh = (insn >> 16) & 0x1f;
9431 tmp2 = tcg_const_i32(sh);
9432 if (insn & (1 << 22))
9433 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
9435 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
9436 tcg_temp_free_i32(tmp2);
9437 store_reg(s, rd, tmp);
9438 } else if ((insn & 0x00300fe0) == 0x00200f20) {
9440 tmp = load_reg(s, rm);
9441 sh = (insn >> 16) & 0x1f;
9442 tmp2 = tcg_const_i32(sh);
9443 if (insn & (1 << 22))
9444 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
9446 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
9447 tcg_temp_free_i32(tmp2);
9448 store_reg(s, rd, tmp);
9449 } else if ((insn & 0x00700fe0) == 0x00000fa0) {
9451 tmp = load_reg(s, rn);
9452 tmp2 = load_reg(s, rm);
9453 tmp3 = tcg_temp_new_i32();
9454 tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
9455 gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
9456 tcg_temp_free_i32(tmp3);
9457 tcg_temp_free_i32(tmp2);
9458 store_reg(s, rd, tmp);
9459 } else if ((insn & 0x000003e0) == 0x00000060) {
9460 tmp = load_reg(s, rm);
9461 shift = (insn >> 10) & 3;
9462 /* ??? In many cases it's not necessary to do a
9463 rotate, a shift is sufficient. */
9465 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
9466 op1 = (insn >> 20) & 7;
9468 case 0: gen_sxtb16(tmp); break;
9469 case 2: gen_sxtb(tmp); break;
9470 case 3: gen_sxth(tmp); break;
9471 case 4: gen_uxtb16(tmp); break;
9472 case 6: gen_uxtb(tmp); break;
9473 case 7: gen_uxth(tmp); break;
9474 default: goto illegal_op;
9477 tmp2 = load_reg(s, rn);
9478 if ((op1 & 3) == 0) {
9479 gen_add16(tmp, tmp2);
9481 tcg_gen_add_i32(tmp, tmp, tmp2);
9482 tcg_temp_free_i32(tmp2);
9485 store_reg(s, rd, tmp);
9486 } else if ((insn & 0x003f0f60) == 0x003f0f20) {
9488 tmp = load_reg(s, rm);
9489 if (insn & (1 << 22)) {
9490 if (insn & (1 << 7)) {
9494 gen_helper_rbit(tmp, tmp);
9497 if (insn & (1 << 7))
9500 tcg_gen_bswap32_i32(tmp, tmp);
9502 store_reg(s, rd, tmp);
9507 case 2: /* Multiplies (Type 3). */
9508 switch ((insn >> 20) & 0x7) {
9510 if (((insn >> 6) ^ (insn >> 7)) & 1) {
9511 /* op2 not 00x or 11x : UNDEF */
9514 /* Signed multiply most significant [accumulate].
9515 (SMMUL, SMMLA, SMMLS) */
9516 tmp = load_reg(s, rm);
9517 tmp2 = load_reg(s, rs);
9518 tmp64 = gen_muls_i64_i32(tmp, tmp2);
9521 tmp = load_reg(s, rd);
9522 if (insn & (1 << 6)) {
9523 tmp64 = gen_subq_msw(tmp64, tmp);
9525 tmp64 = gen_addq_msw(tmp64, tmp);
9528 if (insn & (1 << 5)) {
9529 tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
9531 tcg_gen_shri_i64(tmp64, tmp64, 32);
9532 tmp = tcg_temp_new_i32();
9533 tcg_gen_extrl_i64_i32(tmp, tmp64);
9534 tcg_temp_free_i64(tmp64);
9535 store_reg(s, rn, tmp);
9539 /* SMLAD, SMUAD, SMLSD, SMUSD, SMLALD, SMLSLD */
9540 if (insn & (1 << 7)) {
9543 tmp = load_reg(s, rm);
9544 tmp2 = load_reg(s, rs);
9545 if (insn & (1 << 5))
9546 gen_swap_half(tmp2);
9547 gen_smul_dual(tmp, tmp2);
9548 if (insn & (1 << 22)) {
9549 /* smlald, smlsld */
9552 tmp64 = tcg_temp_new_i64();
9553 tmp64_2 = tcg_temp_new_i64();
9554 tcg_gen_ext_i32_i64(tmp64, tmp);
9555 tcg_gen_ext_i32_i64(tmp64_2, tmp2);
9556 tcg_temp_free_i32(tmp);
9557 tcg_temp_free_i32(tmp2);
9558 if (insn & (1 << 6)) {
9559 tcg_gen_sub_i64(tmp64, tmp64, tmp64_2);
9561 tcg_gen_add_i64(tmp64, tmp64, tmp64_2);
9563 tcg_temp_free_i64(tmp64_2);
9564 gen_addq(s, tmp64, rd, rn);
9565 gen_storeq_reg(s, rd, rn, tmp64);
9566 tcg_temp_free_i64(tmp64);
9568 /* smuad, smusd, smlad, smlsd */
9569 if (insn & (1 << 6)) {
9570 /* This subtraction cannot overflow. */
9571 tcg_gen_sub_i32(tmp, tmp, tmp2);
9573 /* This addition cannot overflow 32 bits;
9574 * however it may overflow considered as a
9575 * signed operation, in which case we must set
9578 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9580 tcg_temp_free_i32(tmp2);
9583 tmp2 = load_reg(s, rd);
9584 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
9585 tcg_temp_free_i32(tmp2);
9587 store_reg(s, rn, tmp);
9593 if (!arm_dc_feature(s, ARM_FEATURE_ARM_DIV)) {
9596 if (((insn >> 5) & 7) || (rd != 15)) {
9599 tmp = load_reg(s, rm);
9600 tmp2 = load_reg(s, rs);
9601 if (insn & (1 << 21)) {
9602 gen_helper_udiv(tmp, tmp, tmp2);
9604 gen_helper_sdiv(tmp, tmp, tmp2);
9606 tcg_temp_free_i32(tmp2);
9607 store_reg(s, rn, tmp);
9614 op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
9616 case 0: /* Unsigned sum of absolute differences. */
9618 tmp = load_reg(s, rm);
9619 tmp2 = load_reg(s, rs);
9620 gen_helper_usad8(tmp, tmp, tmp2);
9621 tcg_temp_free_i32(tmp2);
9623 tmp2 = load_reg(s, rd);
9624 tcg_gen_add_i32(tmp, tmp, tmp2);
9625 tcg_temp_free_i32(tmp2);
9627 store_reg(s, rn, tmp);
9629 case 0x20: case 0x24: case 0x28: case 0x2c:
9630 /* Bitfield insert/clear. */
9632 shift = (insn >> 7) & 0x1f;
9633 i = (insn >> 16) & 0x1f;
9635 /* UNPREDICTABLE; we choose to UNDEF */
9640 tmp = tcg_temp_new_i32();
9641 tcg_gen_movi_i32(tmp, 0);
9643 tmp = load_reg(s, rm);
9646 tmp2 = load_reg(s, rd);
9647 tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
9648 tcg_temp_free_i32(tmp2);
9650 store_reg(s, rd, tmp);
9652 case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
9653 case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
9655 tmp = load_reg(s, rm);
9656 shift = (insn >> 7) & 0x1f;
9657 i = ((insn >> 16) & 0x1f) + 1;
9662 tcg_gen_extract_i32(tmp, tmp, shift, i);
9664 tcg_gen_sextract_i32(tmp, tmp, shift, i);
9667 store_reg(s, rd, tmp);
9677 /* Check for undefined extension instructions
9678 * per the ARM Bible IE:
9679 * xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx
9681 sh = (0xf << 20) | (0xf << 4);
9682 if (op1 == 0x7 && ((insn & sh) == sh))
9686 /* load/store byte/word */
9687 rn = (insn >> 16) & 0xf;
9688 rd = (insn >> 12) & 0xf;
9689 tmp2 = load_reg(s, rn);
9690 if ((insn & 0x01200000) == 0x00200000) {
9692 i = get_a32_user_mem_index(s);
9694 i = get_mem_index(s);
9696 if (insn & (1 << 24))
9697 gen_add_data_offset(s, insn, tmp2);
9698 if (insn & (1 << 20)) {
9700 tmp = tcg_temp_new_i32();
9701 if (insn & (1 << 22)) {
9702 gen_aa32_ld8u_iss(s, tmp, tmp2, i, rd);
9704 gen_aa32_ld32u_iss(s, tmp, tmp2, i, rd);
9708 tmp = load_reg(s, rd);
9709 if (insn & (1 << 22)) {
9710 gen_aa32_st8_iss(s, tmp, tmp2, i, rd);
9712 gen_aa32_st32_iss(s, tmp, tmp2, i, rd);
9714 tcg_temp_free_i32(tmp);
9716 if (!(insn & (1 << 24))) {
9717 gen_add_data_offset(s, insn, tmp2);
9718 store_reg(s, rn, tmp2);
9719 } else if (insn & (1 << 21)) {
9720 store_reg(s, rn, tmp2);
9722 tcg_temp_free_i32(tmp2);
9724 if (insn & (1 << 20)) {
9725 /* Complete the load. */
9726 store_reg_from_load(s, rd, tmp);
9732 int j, n, loaded_base;
9733 bool exc_return = false;
9734 bool is_load = extract32(insn, 20, 1);
9736 TCGv_i32 loaded_var;
9737 /* load/store multiple words */
9738 /* XXX: store correct base if write back */
9739 if (insn & (1 << 22)) {
9740 /* LDM (user), LDM (exception return) and STM (user) */
9742 goto illegal_op; /* only usable in supervisor mode */
9744 if (is_load && extract32(insn, 15, 1)) {
9750 rn = (insn >> 16) & 0xf;
9751 addr = load_reg(s, rn);
9753 /* compute total size */
9758 if (insn & (1 << i))
9761 /* XXX: test invalid n == 0 case ? */
9762 if (insn & (1 << 23)) {
9763 if (insn & (1 << 24)) {
9765 tcg_gen_addi_i32(addr, addr, 4);
9767 /* post increment */
9770 if (insn & (1 << 24)) {
9772 tcg_gen_addi_i32(addr, addr, -(n * 4));
9774 /* post decrement */
9776 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9781 if (insn & (1 << i)) {
9784 tmp = tcg_temp_new_i32();
9785 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
9787 tmp2 = tcg_const_i32(i);
9788 gen_helper_set_user_reg(cpu_env, tmp2, tmp);
9789 tcg_temp_free_i32(tmp2);
9790 tcg_temp_free_i32(tmp);
9791 } else if (i == rn) {
9794 } else if (rn == 15 && exc_return) {
9795 store_pc_exc_ret(s, tmp);
9797 store_reg_from_load(s, i, tmp);
9802 /* special case: r15 = PC + 8 */
9803 val = (long)s->pc + 4;
9804 tmp = tcg_temp_new_i32();
9805 tcg_gen_movi_i32(tmp, val);
9807 tmp = tcg_temp_new_i32();
9808 tmp2 = tcg_const_i32(i);
9809 gen_helper_get_user_reg(tmp, cpu_env, tmp2);
9810 tcg_temp_free_i32(tmp2);
9812 tmp = load_reg(s, i);
9814 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
9815 tcg_temp_free_i32(tmp);
9818 /* no need to add after the last transfer */
9820 tcg_gen_addi_i32(addr, addr, 4);
9823 if (insn & (1 << 21)) {
9825 if (insn & (1 << 23)) {
9826 if (insn & (1 << 24)) {
9829 /* post increment */
9830 tcg_gen_addi_i32(addr, addr, 4);
9833 if (insn & (1 << 24)) {
9836 tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9838 /* post decrement */
9839 tcg_gen_addi_i32(addr, addr, -(n * 4));
9842 store_reg(s, rn, addr);
9844 tcg_temp_free_i32(addr);
9847 store_reg(s, rn, loaded_var);
9850 /* Restore CPSR from SPSR. */
9851 tmp = load_cpu_field(spsr);
9852 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9855 gen_helper_cpsr_write_eret(cpu_env, tmp);
9856 if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9859 tcg_temp_free_i32(tmp);
9860 /* Must exit loop to check un-masked IRQs */
9861 s->base.is_jmp = DISAS_EXIT;
9870 /* branch (and link) */
9871 val = (int32_t)s->pc;
9872 if (insn & (1 << 24)) {
9873 tmp = tcg_temp_new_i32();
9874 tcg_gen_movi_i32(tmp, val);
9875 store_reg(s, 14, tmp);
9877 offset = sextract32(insn << 2, 0, 26);
9885 if (((insn >> 8) & 0xe) == 10) {
9887 if (disas_vfp_insn(s, insn)) {
9890 } else if (disas_coproc_insn(s, insn)) {
9897 gen_set_pc_im(s, s->pc);
9898 s->svc_imm = extract32(insn, 0, 24);
9899 s->base.is_jmp = DISAS_SWI;
9903 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
9904 default_exception_el(s));
9910 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t insn)
9912 /* Return true if this is a 16 bit instruction. We must be precise
9913 * about this (matching the decode). We assume that s->pc still
9914 * points to the first 16 bits of the insn.
9916 if ((insn >> 11) < 0x1d) {
9917 /* Definitely a 16-bit instruction */
9921 /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
9922 * first half of a 32-bit Thumb insn. Thumb-1 cores might
9923 * end up actually treating this as two 16-bit insns, though,
9924 * if it's half of a bl/blx pair that might span a page boundary.
9926 if (arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
9927 /* Thumb2 cores (including all M profile ones) always treat
9928 * 32-bit insns as 32-bit.
9933 if ((insn >> 11) == 0x1e && (s->pc < s->next_page_start - 3)) {
9934 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
9935 * is not on the next page; we merge this into a 32-bit
9940 /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9941 * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9942 * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9943 * -- handle as single 16 bit insn
9948 /* Return true if this is a Thumb-2 logical op. */
9950 thumb2_logic_op(int op)
9955 /* Generate code for a Thumb-2 data processing operation. If CONDS is nonzero
9956 then set condition code flags based on the result of the operation.
9957 If SHIFTER_OUT is nonzero then set the carry flag for logical operations
9958 to the high bit of T1.
9959 Returns zero if the opcode is valid. */
9962 gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out,
9963 TCGv_i32 t0, TCGv_i32 t1)
9970 tcg_gen_and_i32(t0, t0, t1);
9974 tcg_gen_andc_i32(t0, t0, t1);
9978 tcg_gen_or_i32(t0, t0, t1);
9982 tcg_gen_orc_i32(t0, t0, t1);
9986 tcg_gen_xor_i32(t0, t0, t1);
9991 gen_add_CC(t0, t0, t1);
9993 tcg_gen_add_i32(t0, t0, t1);
9997 gen_adc_CC(t0, t0, t1);
10003 gen_sbc_CC(t0, t0, t1);
10005 gen_sub_carry(t0, t0, t1);
10010 gen_sub_CC(t0, t0, t1);
10012 tcg_gen_sub_i32(t0, t0, t1);
10016 gen_sub_CC(t0, t1, t0);
10018 tcg_gen_sub_i32(t0, t1, t0);
10020 default: /* 5, 6, 7, 9, 12, 15. */
10026 gen_set_CF_bit31(t1);
10031 /* Translate a 32-bit thumb instruction. */
10032 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10034 uint32_t imm, shift, offset;
10035 uint32_t rd, rn, rm, rs;
10046 /* The only 32 bit insn that's allowed for Thumb1 is the combined
10047 * BL/BLX prefix and suffix.
10049 if ((insn & 0xf800e800) != 0xf000e800) {
10053 rn = (insn >> 16) & 0xf;
10054 rs = (insn >> 12) & 0xf;
10055 rd = (insn >> 8) & 0xf;
10057 switch ((insn >> 25) & 0xf) {
10058 case 0: case 1: case 2: case 3:
10059 /* 16-bit instructions. Should never happen. */
10062 if (insn & (1 << 22)) {
10063 /* 0b1110_100x_x1xx_xxxx_xxxx_xxxx_xxxx_xxxx
10064 * - load/store doubleword, load/store exclusive, ldacq/strel,
10065 * table branch, TT.
10067 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_M) &&
10068 arm_dc_feature(s, ARM_FEATURE_V8)) {
10069 /* 0b1110_1001_0111_1111_1110_1001_0111_111
10071 * The bulk of the behaviour for this instruction is implemented
10072 * in v7m_handle_execute_nsc(), which deals with the insn when
10073 * it is executed by a CPU in non-secure state from memory
10074 * which is Secure & NonSecure-Callable.
10075 * Here we only need to handle the remaining cases:
10076 * * in NS memory (including the "security extension not
10077 * implemented" case) : NOP
10078 * * in S memory but CPU already secure (clear IT bits)
10079 * We know that the attribute for the memory this insn is
10080 * in must match the current CPU state, because otherwise
10081 * get_phys_addr_pmsav8 would have generated an exception.
10083 if (s->v8m_secure) {
10084 /* Like the IT insn, we don't need to generate any code */
10085 s->condexec_cond = 0;
10086 s->condexec_mask = 0;
10088 } else if (insn & 0x01200000) {
10089 /* 0b1110_1000_x11x_xxxx_xxxx_xxxx_xxxx_xxxx
10090 * - load/store dual (post-indexed)
10091 * 0b1111_1001_x10x_xxxx_xxxx_xxxx_xxxx_xxxx
10092 * - load/store dual (literal and immediate)
10093 * 0b1111_1001_x11x_xxxx_xxxx_xxxx_xxxx_xxxx
10094 * - load/store dual (pre-indexed)
10097 if (insn & (1 << 21)) {
10098 /* UNPREDICTABLE */
10101 addr = tcg_temp_new_i32();
10102 tcg_gen_movi_i32(addr, s->pc & ~3);
10104 addr = load_reg(s, rn);
10106 offset = (insn & 0xff) * 4;
10107 if ((insn & (1 << 23)) == 0)
10109 if (insn & (1 << 24)) {
10110 tcg_gen_addi_i32(addr, addr, offset);
10113 if (insn & (1 << 20)) {
10115 tmp = tcg_temp_new_i32();
10116 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
10117 store_reg(s, rs, tmp);
10118 tcg_gen_addi_i32(addr, addr, 4);
10119 tmp = tcg_temp_new_i32();
10120 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
10121 store_reg(s, rd, tmp);
10124 tmp = load_reg(s, rs);
10125 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
10126 tcg_temp_free_i32(tmp);
10127 tcg_gen_addi_i32(addr, addr, 4);
10128 tmp = load_reg(s, rd);
10129 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
10130 tcg_temp_free_i32(tmp);
10132 if (insn & (1 << 21)) {
10133 /* Base writeback. */
10134 tcg_gen_addi_i32(addr, addr, offset - 4);
10135 store_reg(s, rn, addr);
10137 tcg_temp_free_i32(addr);
10139 } else if ((insn & (1 << 23)) == 0) {
10140 /* 0b1110_1000_010x_xxxx_xxxx_xxxx_xxxx_xxxx
10141 * - load/store exclusive word
10145 if (!(insn & (1 << 20)) &&
10146 arm_dc_feature(s, ARM_FEATURE_M) &&
10147 arm_dc_feature(s, ARM_FEATURE_V8)) {
10148 /* 0b1110_1000_0100_xxxx_1111_xxxx_xxxx_xxxx
10151 bool alt = insn & (1 << 7);
10152 TCGv_i32 addr, op, ttresp;
10154 if ((insn & 0x3f) || rd == 13 || rd == 15 || rn == 15) {
10155 /* we UNDEF for these UNPREDICTABLE cases */
10159 if (alt && !s->v8m_secure) {
10163 addr = load_reg(s, rn);
10164 op = tcg_const_i32(extract32(insn, 6, 2));
10165 ttresp = tcg_temp_new_i32();
10166 gen_helper_v7m_tt(ttresp, cpu_env, addr, op);
10167 tcg_temp_free_i32(addr);
10168 tcg_temp_free_i32(op);
10169 store_reg(s, rd, ttresp);
10174 addr = tcg_temp_local_new_i32();
10175 load_reg_var(s, addr, rn);
10176 tcg_gen_addi_i32(addr, addr, (insn & 0xff) << 2);
10177 if (insn & (1 << 20)) {
10178 gen_load_exclusive(s, rs, 15, addr, 2);
10180 gen_store_exclusive(s, rd, rs, 15, addr, 2);
10182 tcg_temp_free_i32(addr);
10183 } else if ((insn & (7 << 5)) == 0) {
10184 /* Table Branch. */
10186 addr = tcg_temp_new_i32();
10187 tcg_gen_movi_i32(addr, s->pc);
10189 addr = load_reg(s, rn);
10191 tmp = load_reg(s, rm);
10192 tcg_gen_add_i32(addr, addr, tmp);
10193 if (insn & (1 << 4)) {
10195 tcg_gen_add_i32(addr, addr, tmp);
10196 tcg_temp_free_i32(tmp);
10197 tmp = tcg_temp_new_i32();
10198 gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
10200 tcg_temp_free_i32(tmp);
10201 tmp = tcg_temp_new_i32();
10202 gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
10204 tcg_temp_free_i32(addr);
10205 tcg_gen_shli_i32(tmp, tmp, 1);
10206 tcg_gen_addi_i32(tmp, tmp, s->pc);
10207 store_reg(s, 15, tmp);
10209 int op2 = (insn >> 6) & 0x3;
10210 op = (insn >> 4) & 0x3;
10215 /* Load/store exclusive byte/halfword/doubleword */
10222 /* Load-acquire/store-release */
10228 /* Load-acquire/store-release exclusive */
10232 addr = tcg_temp_local_new_i32();
10233 load_reg_var(s, addr, rn);
10235 if (insn & (1 << 20)) {
10236 tmp = tcg_temp_new_i32();
10239 gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s),
10243 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s),
10247 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s),
10253 store_reg(s, rs, tmp);
10255 tmp = load_reg(s, rs);
10258 gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s),
10262 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s),
10266 gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s),
10272 tcg_temp_free_i32(tmp);
10274 } else if (insn & (1 << 20)) {
10275 gen_load_exclusive(s, rs, rd, addr, op);
10277 gen_store_exclusive(s, rm, rs, rd, addr, op);
10279 tcg_temp_free_i32(addr);
10282 /* Load/store multiple, RFE, SRS. */
10283 if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
10284 /* RFE, SRS: not available in user mode or on M profile */
10285 if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
10288 if (insn & (1 << 20)) {
10290 addr = load_reg(s, rn);
10291 if ((insn & (1 << 24)) == 0)
10292 tcg_gen_addi_i32(addr, addr, -8);
10293 /* Load PC into tmp and CPSR into tmp2. */
10294 tmp = tcg_temp_new_i32();
10295 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
10296 tcg_gen_addi_i32(addr, addr, 4);
10297 tmp2 = tcg_temp_new_i32();
10298 gen_aa32_ld32u(s, tmp2, addr, get_mem_index(s));
10299 if (insn & (1 << 21)) {
10300 /* Base writeback. */
10301 if (insn & (1 << 24)) {
10302 tcg_gen_addi_i32(addr, addr, 4);
10304 tcg_gen_addi_i32(addr, addr, -4);
10306 store_reg(s, rn, addr);
10308 tcg_temp_free_i32(addr);
10310 gen_rfe(s, tmp, tmp2);
10313 gen_srs(s, (insn & 0x1f), (insn & (1 << 24)) ? 1 : 2,
10317 int i, loaded_base = 0;
10318 TCGv_i32 loaded_var;
10319 /* Load/store multiple. */
10320 addr = load_reg(s, rn);
10322 for (i = 0; i < 16; i++) {
10323 if (insn & (1 << i))
10326 if (insn & (1 << 24)) {
10327 tcg_gen_addi_i32(addr, addr, -offset);
10331 for (i = 0; i < 16; i++) {
10332 if ((insn & (1 << i)) == 0)
10334 if (insn & (1 << 20)) {
10336 tmp = tcg_temp_new_i32();
10337 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
10339 gen_bx_excret(s, tmp);
10340 } else if (i == rn) {
10344 store_reg(s, i, tmp);
10348 tmp = load_reg(s, i);
10349 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
10350 tcg_temp_free_i32(tmp);
10352 tcg_gen_addi_i32(addr, addr, 4);
10355 store_reg(s, rn, loaded_var);
10357 if (insn & (1 << 21)) {
10358 /* Base register writeback. */
10359 if (insn & (1 << 24)) {
10360 tcg_gen_addi_i32(addr, addr, -offset);
10362 /* Fault if writeback register is in register list. */
10363 if (insn & (1 << rn))
10365 store_reg(s, rn, addr);
10367 tcg_temp_free_i32(addr);
10374 op = (insn >> 21) & 0xf;
10376 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10379 /* Halfword pack. */
10380 tmp = load_reg(s, rn);
10381 tmp2 = load_reg(s, rm);
10382 shift = ((insn >> 10) & 0x1c) | ((insn >> 6) & 0x3);
10383 if (insn & (1 << 5)) {
10387 tcg_gen_sari_i32(tmp2, tmp2, shift);
10388 tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
10389 tcg_gen_ext16u_i32(tmp2, tmp2);
10393 tcg_gen_shli_i32(tmp2, tmp2, shift);
10394 tcg_gen_ext16u_i32(tmp, tmp);
10395 tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
10397 tcg_gen_or_i32(tmp, tmp, tmp2);
10398 tcg_temp_free_i32(tmp2);
10399 store_reg(s, rd, tmp);
10401 /* Data processing register constant shift. */
10403 tmp = tcg_temp_new_i32();
10404 tcg_gen_movi_i32(tmp, 0);
10406 tmp = load_reg(s, rn);
10408 tmp2 = load_reg(s, rm);
10410 shiftop = (insn >> 4) & 3;
10411 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
10412 conds = (insn & (1 << 20)) != 0;
10413 logic_cc = (conds && thumb2_logic_op(op));
10414 gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
10415 if (gen_thumb2_data_op(s, op, conds, 0, tmp, tmp2))
10417 tcg_temp_free_i32(tmp2);
10419 store_reg(s, rd, tmp);
10421 tcg_temp_free_i32(tmp);
10425 case 13: /* Misc data processing. */
10426 op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
10427 if (op < 4 && (insn & 0xf000) != 0xf000)
10430 case 0: /* Register controlled shift. */
10431 tmp = load_reg(s, rn);
10432 tmp2 = load_reg(s, rm);
10433 if ((insn & 0x70) != 0)
10435 op = (insn >> 21) & 3;
10436 logic_cc = (insn & (1 << 20)) != 0;
10437 gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
10440 store_reg(s, rd, tmp);
10442 case 1: /* Sign/zero extend. */
10443 op = (insn >> 20) & 7;
10445 case 0: /* SXTAH, SXTH */
10446 case 1: /* UXTAH, UXTH */
10447 case 4: /* SXTAB, SXTB */
10448 case 5: /* UXTAB, UXTB */
10450 case 2: /* SXTAB16, SXTB16 */
10451 case 3: /* UXTAB16, UXTB16 */
10452 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10460 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10464 tmp = load_reg(s, rm);
10465 shift = (insn >> 4) & 3;
10466 /* ??? In many cases it's not necessary to do a
10467 rotate, a shift is sufficient. */
10469 tcg_gen_rotri_i32(tmp, tmp, shift * 8);
10470 op = (insn >> 20) & 7;
10472 case 0: gen_sxth(tmp); break;
10473 case 1: gen_uxth(tmp); break;
10474 case 2: gen_sxtb16(tmp); break;
10475 case 3: gen_uxtb16(tmp); break;
10476 case 4: gen_sxtb(tmp); break;
10477 case 5: gen_uxtb(tmp); break;
10479 g_assert_not_reached();
10482 tmp2 = load_reg(s, rn);
10483 if ((op >> 1) == 1) {
10484 gen_add16(tmp, tmp2);
10486 tcg_gen_add_i32(tmp, tmp, tmp2);
10487 tcg_temp_free_i32(tmp2);
10490 store_reg(s, rd, tmp);
10492 case 2: /* SIMD add/subtract. */
10493 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10496 op = (insn >> 20) & 7;
10497 shift = (insn >> 4) & 7;
10498 if ((op & 3) == 3 || (shift & 3) == 3)
10500 tmp = load_reg(s, rn);
10501 tmp2 = load_reg(s, rm);
10502 gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
10503 tcg_temp_free_i32(tmp2);
10504 store_reg(s, rd, tmp);
10506 case 3: /* Other data processing. */
10507 op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
10509 /* Saturating add/subtract. */
10510 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10513 tmp = load_reg(s, rn);
10514 tmp2 = load_reg(s, rm);
10516 gen_helper_double_saturate(tmp, cpu_env, tmp);
10518 gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
10520 gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
10521 tcg_temp_free_i32(tmp2);
10524 case 0x0a: /* rbit */
10525 case 0x08: /* rev */
10526 case 0x09: /* rev16 */
10527 case 0x0b: /* revsh */
10528 case 0x18: /* clz */
10530 case 0x10: /* sel */
10531 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10535 case 0x20: /* crc32/crc32c */
10541 if (!arm_dc_feature(s, ARM_FEATURE_CRC)) {
10548 tmp = load_reg(s, rn);
10550 case 0x0a: /* rbit */
10551 gen_helper_rbit(tmp, tmp);
10553 case 0x08: /* rev */
10554 tcg_gen_bswap32_i32(tmp, tmp);
10556 case 0x09: /* rev16 */
10559 case 0x0b: /* revsh */
10562 case 0x10: /* sel */
10563 tmp2 = load_reg(s, rm);
10564 tmp3 = tcg_temp_new_i32();
10565 tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUARMState, GE));
10566 gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
10567 tcg_temp_free_i32(tmp3);
10568 tcg_temp_free_i32(tmp2);
10570 case 0x18: /* clz */
10571 tcg_gen_clzi_i32(tmp, tmp, 32);
10581 uint32_t sz = op & 0x3;
10582 uint32_t c = op & 0x8;
10584 tmp2 = load_reg(s, rm);
10586 tcg_gen_andi_i32(tmp2, tmp2, 0xff);
10587 } else if (sz == 1) {
10588 tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
10590 tmp3 = tcg_const_i32(1 << sz);
10592 gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
10594 gen_helper_crc32(tmp, tmp, tmp2, tmp3);
10596 tcg_temp_free_i32(tmp2);
10597 tcg_temp_free_i32(tmp3);
10601 g_assert_not_reached();
10604 store_reg(s, rd, tmp);
10606 case 4: case 5: /* 32-bit multiply. Sum of absolute differences. */
10607 switch ((insn >> 20) & 7) {
10608 case 0: /* 32 x 32 -> 32 */
10609 case 7: /* Unsigned sum of absolute differences. */
10611 case 1: /* 16 x 16 -> 32 */
10612 case 2: /* Dual multiply add. */
10613 case 3: /* 32 * 16 -> 32msb */
10614 case 4: /* Dual multiply subtract. */
10615 case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
10616 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10621 op = (insn >> 4) & 0xf;
10622 tmp = load_reg(s, rn);
10623 tmp2 = load_reg(s, rm);
10624 switch ((insn >> 20) & 7) {
10625 case 0: /* 32 x 32 -> 32 */
10626 tcg_gen_mul_i32(tmp, tmp, tmp2);
10627 tcg_temp_free_i32(tmp2);
10629 tmp2 = load_reg(s, rs);
10631 tcg_gen_sub_i32(tmp, tmp2, tmp);
10633 tcg_gen_add_i32(tmp, tmp, tmp2);
10634 tcg_temp_free_i32(tmp2);
10637 case 1: /* 16 x 16 -> 32 */
10638 gen_mulxy(tmp, tmp2, op & 2, op & 1);
10639 tcg_temp_free_i32(tmp2);
10641 tmp2 = load_reg(s, rs);
10642 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
10643 tcg_temp_free_i32(tmp2);
10646 case 2: /* Dual multiply add. */
10647 case 4: /* Dual multiply subtract. */
10649 gen_swap_half(tmp2);
10650 gen_smul_dual(tmp, tmp2);
10651 if (insn & (1 << 22)) {
10652 /* This subtraction cannot overflow. */
10653 tcg_gen_sub_i32(tmp, tmp, tmp2);
10655 /* This addition cannot overflow 32 bits;
10656 * however it may overflow considered as a signed
10657 * operation, in which case we must set the Q flag.
10659 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
10661 tcg_temp_free_i32(tmp2);
10664 tmp2 = load_reg(s, rs);
10665 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
10666 tcg_temp_free_i32(tmp2);
10669 case 3: /* 32 * 16 -> 32msb */
10671 tcg_gen_sari_i32(tmp2, tmp2, 16);
10674 tmp64 = gen_muls_i64_i32(tmp, tmp2);
10675 tcg_gen_shri_i64(tmp64, tmp64, 16);
10676 tmp = tcg_temp_new_i32();
10677 tcg_gen_extrl_i64_i32(tmp, tmp64);
10678 tcg_temp_free_i64(tmp64);
10681 tmp2 = load_reg(s, rs);
10682 gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
10683 tcg_temp_free_i32(tmp2);
10686 case 5: case 6: /* 32 * 32 -> 32msb (SMMUL, SMMLA, SMMLS) */
10687 tmp64 = gen_muls_i64_i32(tmp, tmp2);
10689 tmp = load_reg(s, rs);
10690 if (insn & (1 << 20)) {
10691 tmp64 = gen_addq_msw(tmp64, tmp);
10693 tmp64 = gen_subq_msw(tmp64, tmp);
10696 if (insn & (1 << 4)) {
10697 tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
10699 tcg_gen_shri_i64(tmp64, tmp64, 32);
10700 tmp = tcg_temp_new_i32();
10701 tcg_gen_extrl_i64_i32(tmp, tmp64);
10702 tcg_temp_free_i64(tmp64);
10704 case 7: /* Unsigned sum of absolute differences. */
10705 gen_helper_usad8(tmp, tmp, tmp2);
10706 tcg_temp_free_i32(tmp2);
10708 tmp2 = load_reg(s, rs);
10709 tcg_gen_add_i32(tmp, tmp, tmp2);
10710 tcg_temp_free_i32(tmp2);
10714 store_reg(s, rd, tmp);
10716 case 6: case 7: /* 64-bit multiply, Divide. */
10717 op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
10718 tmp = load_reg(s, rn);
10719 tmp2 = load_reg(s, rm);
10720 if ((op & 0x50) == 0x10) {
10722 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DIV)) {
10726 gen_helper_udiv(tmp, tmp, tmp2);
10728 gen_helper_sdiv(tmp, tmp, tmp2);
10729 tcg_temp_free_i32(tmp2);
10730 store_reg(s, rd, tmp);
10731 } else if ((op & 0xe) == 0xc) {
10732 /* Dual multiply accumulate long. */
10733 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10734 tcg_temp_free_i32(tmp);
10735 tcg_temp_free_i32(tmp2);
10739 gen_swap_half(tmp2);
10740 gen_smul_dual(tmp, tmp2);
10742 tcg_gen_sub_i32(tmp, tmp, tmp2);
10744 tcg_gen_add_i32(tmp, tmp, tmp2);
10746 tcg_temp_free_i32(tmp2);
10748 tmp64 = tcg_temp_new_i64();
10749 tcg_gen_ext_i32_i64(tmp64, tmp);
10750 tcg_temp_free_i32(tmp);
10751 gen_addq(s, tmp64, rs, rd);
10752 gen_storeq_reg(s, rs, rd, tmp64);
10753 tcg_temp_free_i64(tmp64);
10756 /* Unsigned 64-bit multiply */
10757 tmp64 = gen_mulu_i64_i32(tmp, tmp2);
10761 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10762 tcg_temp_free_i32(tmp2);
10763 tcg_temp_free_i32(tmp);
10766 gen_mulxy(tmp, tmp2, op & 2, op & 1);
10767 tcg_temp_free_i32(tmp2);
10768 tmp64 = tcg_temp_new_i64();
10769 tcg_gen_ext_i32_i64(tmp64, tmp);
10770 tcg_temp_free_i32(tmp);
10772 /* Signed 64-bit multiply */
10773 tmp64 = gen_muls_i64_i32(tmp, tmp2);
10778 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
10779 tcg_temp_free_i64(tmp64);
10782 gen_addq_lo(s, tmp64, rs);
10783 gen_addq_lo(s, tmp64, rd);
10784 } else if (op & 0x40) {
10785 /* 64-bit accumulate. */
10786 gen_addq(s, tmp64, rs, rd);
10788 gen_storeq_reg(s, rs, rd, tmp64);
10789 tcg_temp_free_i64(tmp64);
10794 case 6: case 7: case 14: case 15:
10796 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10797 /* We don't currently implement M profile FP support,
10798 * so this entire space should give a NOCP fault, with
10799 * the exception of the v8M VLLDM and VLSTM insns, which
10800 * must be NOPs in Secure state and UNDEF in Nonsecure state.
10802 if (arm_dc_feature(s, ARM_FEATURE_V8) &&
10803 (insn & 0xffa00f00) == 0xec200a00) {
10804 /* 0b1110_1100_0x1x_xxxx_xxxx_1010_xxxx_xxxx
10806 * We choose to UNDEF if the RAZ bits are non-zero.
10808 if (!s->v8m_secure || (insn & 0x0040f0ff)) {
10811 /* Just NOP since FP support is not implemented */
10814 /* All other insns: NOCP */
10815 gen_exception_insn(s, 4, EXCP_NOCP, syn_uncategorized(),
10816 default_exception_el(s));
10819 if ((insn & 0xfe000a00) == 0xfc000800
10820 && arm_dc_feature(s, ARM_FEATURE_V8)) {
10821 /* The Thumb2 and ARM encodings are identical. */
10822 if (disas_neon_insn_3same_ext(s, insn)) {
10825 } else if ((insn & 0xff000a00) == 0xfe000800
10826 && arm_dc_feature(s, ARM_FEATURE_V8)) {
10827 /* The Thumb2 and ARM encodings are identical. */
10828 if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
10831 } else if (((insn >> 24) & 3) == 3) {
10832 /* Translate into the equivalent ARM encoding. */
10833 insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10834 if (disas_neon_data_insn(s, insn)) {
10837 } else if (((insn >> 8) & 0xe) == 10) {
10838 if (disas_vfp_insn(s, insn)) {
10842 if (insn & (1 << 28))
10844 if (disas_coproc_insn(s, insn)) {
10849 case 8: case 9: case 10: case 11:
10850 if (insn & (1 << 15)) {
10851 /* Branches, misc control. */
10852 if (insn & 0x5000) {
10853 /* Unconditional branch. */
10854 /* signextend(hw1[10:0]) -> offset[:12]. */
10855 offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
10856 /* hw1[10:0] -> offset[11:1]. */
10857 offset |= (insn & 0x7ff) << 1;
10858 /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
10859 offset[24:22] already have the same value because of the
10860 sign extension above. */
10861 offset ^= ((~insn) & (1 << 13)) << 10;
10862 offset ^= ((~insn) & (1 << 11)) << 11;
10864 if (insn & (1 << 14)) {
10865 /* Branch and link. */
10866 tcg_gen_movi_i32(cpu_R[14], s->pc | 1);
10870 if (insn & (1 << 12)) {
10872 gen_jmp(s, offset);
10875 offset &= ~(uint32_t)2;
10876 /* thumb2 bx, no need to check */
10877 gen_bx_im(s, offset);
10879 } else if (((insn >> 23) & 7) == 7) {
10881 if (insn & (1 << 13))
10884 if (insn & (1 << 26)) {
10885 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10888 if (!(insn & (1 << 20))) {
10889 /* Hypervisor call (v7) */
10890 int imm16 = extract32(insn, 16, 4) << 12
10891 | extract32(insn, 0, 12);
10898 /* Secure monitor call (v6+) */
10906 op = (insn >> 20) & 7;
10908 case 0: /* msr cpsr. */
10909 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10910 tmp = load_reg(s, rn);
10911 /* the constant is the mask and SYSm fields */
10912 addr = tcg_const_i32(insn & 0xfff);
10913 gen_helper_v7m_msr(cpu_env, addr, tmp);
10914 tcg_temp_free_i32(addr);
10915 tcg_temp_free_i32(tmp);
10920 case 1: /* msr spsr. */
10921 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10925 if (extract32(insn, 5, 1)) {
10927 int sysm = extract32(insn, 8, 4) |
10928 (extract32(insn, 4, 1) << 4);
10931 gen_msr_banked(s, r, sysm, rm);
10935 /* MSR (for PSRs) */
10936 tmp = load_reg(s, rn);
10938 msr_mask(s, (insn >> 8) & 0xf, op == 1),
10942 case 2: /* cps, nop-hint. */
10943 if (((insn >> 8) & 7) == 0) {
10944 gen_nop_hint(s, insn & 0xff);
10946 /* Implemented as NOP in user mode. */
10951 if (insn & (1 << 10)) {
10952 if (insn & (1 << 7))
10954 if (insn & (1 << 6))
10956 if (insn & (1 << 5))
10958 if (insn & (1 << 9))
10959 imm = CPSR_A | CPSR_I | CPSR_F;
10961 if (insn & (1 << 8)) {
10963 imm |= (insn & 0x1f);
10966 gen_set_psr_im(s, offset, 0, imm);
10969 case 3: /* Special control operations. */
10971 op = (insn >> 4) & 0xf;
10973 case 2: /* clrex */
10978 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10981 /* We need to break the TB after this insn
10982 * to execute self-modifying code correctly
10983 * and also to take any pending interrupts
10986 gen_goto_tb(s, 0, s->pc & ~1);
10993 /* Trivial implementation equivalent to bx.
10994 * This instruction doesn't exist at all for M-profile.
10996 if (arm_dc_feature(s, ARM_FEATURE_M)) {
10999 tmp = load_reg(s, rn);
11002 case 5: /* Exception return. */
11006 if (rn != 14 || rd != 15) {
11009 tmp = load_reg(s, rn);
11010 tcg_gen_subi_i32(tmp, tmp, insn & 0xff);
11011 gen_exception_return(s, tmp);
11014 if (extract32(insn, 5, 1) &&
11015 !arm_dc_feature(s, ARM_FEATURE_M)) {
11017 int sysm = extract32(insn, 16, 4) |
11018 (extract32(insn, 4, 1) << 4);
11020 gen_mrs_banked(s, 0, sysm, rd);
11024 if (extract32(insn, 16, 4) != 0xf) {
11027 if (!arm_dc_feature(s, ARM_FEATURE_M) &&
11028 extract32(insn, 0, 8) != 0) {
11033 tmp = tcg_temp_new_i32();
11034 if (arm_dc_feature(s, ARM_FEATURE_M)) {
11035 addr = tcg_const_i32(insn & 0xff);
11036 gen_helper_v7m_mrs(tmp, cpu_env, addr);
11037 tcg_temp_free_i32(addr);
11039 gen_helper_cpsr_read(tmp, cpu_env);
11041 store_reg(s, rd, tmp);
11044 if (extract32(insn, 5, 1) &&
11045 !arm_dc_feature(s, ARM_FEATURE_M)) {
11047 int sysm = extract32(insn, 16, 4) |
11048 (extract32(insn, 4, 1) << 4);
11050 gen_mrs_banked(s, 1, sysm, rd);
11055 /* Not accessible in user mode. */
11056 if (IS_USER(s) || arm_dc_feature(s, ARM_FEATURE_M)) {
11060 if (extract32(insn, 16, 4) != 0xf ||
11061 extract32(insn, 0, 8) != 0) {
11065 tmp = load_cpu_field(spsr);
11066 store_reg(s, rd, tmp);
11071 /* Conditional branch. */
11072 op = (insn >> 22) & 0xf;
11073 /* Generate a conditional jump to next instruction. */
11074 s->condlabel = gen_new_label();
11075 arm_gen_test_cc(op ^ 1, s->condlabel);
11078 /* offset[11:1] = insn[10:0] */
11079 offset = (insn & 0x7ff) << 1;
11080 /* offset[17:12] = insn[21:16]. */
11081 offset |= (insn & 0x003f0000) >> 4;
11082 /* offset[31:20] = insn[26]. */
11083 offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
11084 /* offset[18] = insn[13]. */
11085 offset |= (insn & (1 << 13)) << 5;
11086 /* offset[19] = insn[11]. */
11087 offset |= (insn & (1 << 11)) << 8;
11089 /* jump to the offset */
11090 gen_jmp(s, s->pc + offset);
11093 /* Data processing immediate. */
11094 if (insn & (1 << 25)) {
11095 if (insn & (1 << 24)) {
11096 if (insn & (1 << 20))
11098 /* Bitfield/Saturate. */
11099 op = (insn >> 21) & 7;
11101 shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
11103 tmp = tcg_temp_new_i32();
11104 tcg_gen_movi_i32(tmp, 0);
11106 tmp = load_reg(s, rn);
11109 case 2: /* Signed bitfield extract. */
11111 if (shift + imm > 32)
11114 tcg_gen_sextract_i32(tmp, tmp, shift, imm);
11117 case 6: /* Unsigned bitfield extract. */
11119 if (shift + imm > 32)
11122 tcg_gen_extract_i32(tmp, tmp, shift, imm);
11125 case 3: /* Bitfield insert/clear. */
11128 imm = imm + 1 - shift;
11130 tmp2 = load_reg(s, rd);
11131 tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
11132 tcg_temp_free_i32(tmp2);
11137 default: /* Saturate. */
11140 tcg_gen_sari_i32(tmp, tmp, shift);
11142 tcg_gen_shli_i32(tmp, tmp, shift);
11144 tmp2 = tcg_const_i32(imm);
11147 if ((op & 1) && shift == 0) {
11148 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11149 tcg_temp_free_i32(tmp);
11150 tcg_temp_free_i32(tmp2);
11153 gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
11155 gen_helper_usat(tmp, cpu_env, tmp, tmp2);
11159 if ((op & 1) && shift == 0) {
11160 if (!arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
11161 tcg_temp_free_i32(tmp);
11162 tcg_temp_free_i32(tmp2);
11165 gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
11167 gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
11170 tcg_temp_free_i32(tmp2);
11173 store_reg(s, rd, tmp);
11175 imm = ((insn & 0x04000000) >> 15)
11176 | ((insn & 0x7000) >> 4) | (insn & 0xff);
11177 if (insn & (1 << 22)) {
11178 /* 16-bit immediate. */
11179 imm |= (insn >> 4) & 0xf000;
11180 if (insn & (1 << 23)) {
11182 tmp = load_reg(s, rd);
11183 tcg_gen_ext16u_i32(tmp, tmp);
11184 tcg_gen_ori_i32(tmp, tmp, imm << 16);
11187 tmp = tcg_temp_new_i32();
11188 tcg_gen_movi_i32(tmp, imm);
11191 /* Add/sub 12-bit immediate. */
11193 offset = s->pc & ~(uint32_t)3;
11194 if (insn & (1 << 23))
11198 tmp = tcg_temp_new_i32();
11199 tcg_gen_movi_i32(tmp, offset);
11201 tmp = load_reg(s, rn);
11202 if (insn & (1 << 23))
11203 tcg_gen_subi_i32(tmp, tmp, imm);
11205 tcg_gen_addi_i32(tmp, tmp, imm);
11208 store_reg(s, rd, tmp);
11211 int shifter_out = 0;
11212 /* modified 12-bit immediate. */
11213 shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
11214 imm = (insn & 0xff);
11217 /* Nothing to do. */
11219 case 1: /* 00XY00XY */
11222 case 2: /* XY00XY00 */
11226 case 3: /* XYXYXYXY */
11230 default: /* Rotated constant. */
11231 shift = (shift << 1) | (imm >> 7);
11233 imm = imm << (32 - shift);
11237 tmp2 = tcg_temp_new_i32();
11238 tcg_gen_movi_i32(tmp2, imm);
11239 rn = (insn >> 16) & 0xf;
11241 tmp = tcg_temp_new_i32();
11242 tcg_gen_movi_i32(tmp, 0);
11244 tmp = load_reg(s, rn);
11246 op = (insn >> 21) & 0xf;
11247 if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
11248 shifter_out, tmp, tmp2))
11250 tcg_temp_free_i32(tmp2);
11251 rd = (insn >> 8) & 0xf;
11253 store_reg(s, rd, tmp);
11255 tcg_temp_free_i32(tmp);
11260 case 12: /* Load/store single data item. */
11267 if ((insn & 0x01100000) == 0x01000000) {
11268 if (disas_neon_ls_insn(s, insn)) {
11273 op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
11275 if (!(insn & (1 << 20))) {
11279 /* Byte or halfword load space with dest == r15 : memory hints.
11280 * Catch them early so we don't emit pointless addressing code.
11281 * This space is a mix of:
11282 * PLD/PLDW/PLI, which we implement as NOPs (note that unlike
11283 * the ARM encodings, PLDW space doesn't UNDEF for non-v7MP
11285 * unallocated hints, which must be treated as NOPs
11286 * UNPREDICTABLE space, which we NOP or UNDEF depending on
11287 * which is easiest for the decoding logic
11288 * Some space which must UNDEF
11290 int op1 = (insn >> 23) & 3;
11291 int op2 = (insn >> 6) & 0x3f;
11296 /* UNPREDICTABLE, unallocated hint or
11297 * PLD/PLDW/PLI (literal)
11302 return; /* PLD/PLDW/PLI or unallocated hint */
11304 if ((op2 == 0) || ((op2 & 0x3c) == 0x30)) {
11305 return; /* PLD/PLDW/PLI or unallocated hint */
11307 /* UNDEF space, or an UNPREDICTABLE */
11311 memidx = get_mem_index(s);
11313 addr = tcg_temp_new_i32();
11315 /* s->pc has already been incremented by 4. */
11316 imm = s->pc & 0xfffffffc;
11317 if (insn & (1 << 23))
11318 imm += insn & 0xfff;
11320 imm -= insn & 0xfff;
11321 tcg_gen_movi_i32(addr, imm);
11323 addr = load_reg(s, rn);
11324 if (insn & (1 << 23)) {
11325 /* Positive offset. */
11326 imm = insn & 0xfff;
11327 tcg_gen_addi_i32(addr, addr, imm);
11330 switch ((insn >> 8) & 0xf) {
11331 case 0x0: /* Shifted Register. */
11332 shift = (insn >> 4) & 0xf;
11334 tcg_temp_free_i32(addr);
11337 tmp = load_reg(s, rm);
11339 tcg_gen_shli_i32(tmp, tmp, shift);
11340 tcg_gen_add_i32(addr, addr, tmp);
11341 tcg_temp_free_i32(tmp);
11343 case 0xc: /* Negative offset. */
11344 tcg_gen_addi_i32(addr, addr, -imm);
11346 case 0xe: /* User privilege. */
11347 tcg_gen_addi_i32(addr, addr, imm);
11348 memidx = get_a32_user_mem_index(s);
11350 case 0x9: /* Post-decrement. */
11352 /* Fall through. */
11353 case 0xb: /* Post-increment. */
11357 case 0xd: /* Pre-decrement. */
11359 /* Fall through. */
11360 case 0xf: /* Pre-increment. */
11361 tcg_gen_addi_i32(addr, addr, imm);
11365 tcg_temp_free_i32(addr);
11371 issinfo = writeback ? ISSInvalid : rs;
11373 if (insn & (1 << 20)) {
11375 tmp = tcg_temp_new_i32();
11378 gen_aa32_ld8u_iss(s, tmp, addr, memidx, issinfo);
11381 gen_aa32_ld8s_iss(s, tmp, addr, memidx, issinfo);
11384 gen_aa32_ld16u_iss(s, tmp, addr, memidx, issinfo);
11387 gen_aa32_ld16s_iss(s, tmp, addr, memidx, issinfo);
11390 gen_aa32_ld32u_iss(s, tmp, addr, memidx, issinfo);
11393 tcg_temp_free_i32(tmp);
11394 tcg_temp_free_i32(addr);
11398 gen_bx_excret(s, tmp);
11400 store_reg(s, rs, tmp);
11404 tmp = load_reg(s, rs);
11407 gen_aa32_st8_iss(s, tmp, addr, memidx, issinfo);
11410 gen_aa32_st16_iss(s, tmp, addr, memidx, issinfo);
11413 gen_aa32_st32_iss(s, tmp, addr, memidx, issinfo);
11416 tcg_temp_free_i32(tmp);
11417 tcg_temp_free_i32(addr);
11420 tcg_temp_free_i32(tmp);
11423 tcg_gen_addi_i32(addr, addr, imm);
11425 store_reg(s, rn, addr);
11427 tcg_temp_free_i32(addr);
11436 gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
11437 default_exception_el(s));
11440 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
11442 uint32_t val, op, rm, rn, rd, shift, cond;
11449 switch (insn >> 12) {
11453 op = (insn >> 11) & 3;
11456 rn = (insn >> 3) & 7;
11457 tmp = load_reg(s, rn);
11458 if (insn & (1 << 10)) {
11460 tmp2 = tcg_temp_new_i32();
11461 tcg_gen_movi_i32(tmp2, (insn >> 6) & 7);
11464 rm = (insn >> 6) & 7;
11465 tmp2 = load_reg(s, rm);
11467 if (insn & (1 << 9)) {
11468 if (s->condexec_mask)
11469 tcg_gen_sub_i32(tmp, tmp, tmp2);
11471 gen_sub_CC(tmp, tmp, tmp2);
11473 if (s->condexec_mask)
11474 tcg_gen_add_i32(tmp, tmp, tmp2);
11476 gen_add_CC(tmp, tmp, tmp2);
11478 tcg_temp_free_i32(tmp2);
11479 store_reg(s, rd, tmp);
11481 /* shift immediate */
11482 rm = (insn >> 3) & 7;
11483 shift = (insn >> 6) & 0x1f;
11484 tmp = load_reg(s, rm);
11485 gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
11486 if (!s->condexec_mask)
11488 store_reg(s, rd, tmp);
11492 /* arithmetic large immediate */
11493 op = (insn >> 11) & 3;
11494 rd = (insn >> 8) & 0x7;
11495 if (op == 0) { /* mov */
11496 tmp = tcg_temp_new_i32();
11497 tcg_gen_movi_i32(tmp, insn & 0xff);
11498 if (!s->condexec_mask)
11500 store_reg(s, rd, tmp);
11502 tmp = load_reg(s, rd);
11503 tmp2 = tcg_temp_new_i32();
11504 tcg_gen_movi_i32(tmp2, insn & 0xff);
11507 gen_sub_CC(tmp, tmp, tmp2);
11508 tcg_temp_free_i32(tmp);
11509 tcg_temp_free_i32(tmp2);
11512 if (s->condexec_mask)
11513 tcg_gen_add_i32(tmp, tmp, tmp2);
11515 gen_add_CC(tmp, tmp, tmp2);
11516 tcg_temp_free_i32(tmp2);
11517 store_reg(s, rd, tmp);
11520 if (s->condexec_mask)
11521 tcg_gen_sub_i32(tmp, tmp, tmp2);
11523 gen_sub_CC(tmp, tmp, tmp2);
11524 tcg_temp_free_i32(tmp2);
11525 store_reg(s, rd, tmp);
11531 if (insn & (1 << 11)) {
11532 rd = (insn >> 8) & 7;
11533 /* load pc-relative. Bit 1 of PC is ignored. */
11534 val = s->pc + 2 + ((insn & 0xff) * 4);
11535 val &= ~(uint32_t)2;
11536 addr = tcg_temp_new_i32();
11537 tcg_gen_movi_i32(addr, val);
11538 tmp = tcg_temp_new_i32();
11539 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s),
11541 tcg_temp_free_i32(addr);
11542 store_reg(s, rd, tmp);
11545 if (insn & (1 << 10)) {
11546 /* 0b0100_01xx_xxxx_xxxx
11547 * - data processing extended, branch and exchange
11549 rd = (insn & 7) | ((insn >> 4) & 8);
11550 rm = (insn >> 3) & 0xf;
11551 op = (insn >> 8) & 3;
11554 tmp = load_reg(s, rd);
11555 tmp2 = load_reg(s, rm);
11556 tcg_gen_add_i32(tmp, tmp, tmp2);
11557 tcg_temp_free_i32(tmp2);
11558 store_reg(s, rd, tmp);
11561 tmp = load_reg(s, rd);
11562 tmp2 = load_reg(s, rm);
11563 gen_sub_CC(tmp, tmp, tmp2);
11564 tcg_temp_free_i32(tmp2);
11565 tcg_temp_free_i32(tmp);
11567 case 2: /* mov/cpy */
11568 tmp = load_reg(s, rm);
11569 store_reg(s, rd, tmp);
11573 /* 0b0100_0111_xxxx_xxxx
11574 * - branch [and link] exchange thumb register
11576 bool link = insn & (1 << 7);
11585 /* BXNS/BLXNS: only exists for v8M with the
11586 * security extensions, and always UNDEF if NonSecure.
11587 * We don't implement these in the user-only mode
11588 * either (in theory you can use them from Secure User
11589 * mode but they are too tied in to system emulation.)
11591 if (!s->v8m_secure || IS_USER_ONLY) {
11602 tmp = load_reg(s, rm);
11604 val = (uint32_t)s->pc | 1;
11605 tmp2 = tcg_temp_new_i32();
11606 tcg_gen_movi_i32(tmp2, val);
11607 store_reg(s, 14, tmp2);
11610 /* Only BX works as exception-return, not BLX */
11611 gen_bx_excret(s, tmp);
11619 /* data processing register */
11621 rm = (insn >> 3) & 7;
11622 op = (insn >> 6) & 0xf;
11623 if (op == 2 || op == 3 || op == 4 || op == 7) {
11624 /* the shift/rotate ops want the operands backwards */
11633 if (op == 9) { /* neg */
11634 tmp = tcg_temp_new_i32();
11635 tcg_gen_movi_i32(tmp, 0);
11636 } else if (op != 0xf) { /* mvn doesn't read its first operand */
11637 tmp = load_reg(s, rd);
11642 tmp2 = load_reg(s, rm);
11644 case 0x0: /* and */
11645 tcg_gen_and_i32(tmp, tmp, tmp2);
11646 if (!s->condexec_mask)
11649 case 0x1: /* eor */
11650 tcg_gen_xor_i32(tmp, tmp, tmp2);
11651 if (!s->condexec_mask)
11654 case 0x2: /* lsl */
11655 if (s->condexec_mask) {
11656 gen_shl(tmp2, tmp2, tmp);
11658 gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
11659 gen_logic_CC(tmp2);
11662 case 0x3: /* lsr */
11663 if (s->condexec_mask) {
11664 gen_shr(tmp2, tmp2, tmp);
11666 gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
11667 gen_logic_CC(tmp2);
11670 case 0x4: /* asr */
11671 if (s->condexec_mask) {
11672 gen_sar(tmp2, tmp2, tmp);
11674 gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
11675 gen_logic_CC(tmp2);
11678 case 0x5: /* adc */
11679 if (s->condexec_mask) {
11680 gen_adc(tmp, tmp2);
11682 gen_adc_CC(tmp, tmp, tmp2);
11685 case 0x6: /* sbc */
11686 if (s->condexec_mask) {
11687 gen_sub_carry(tmp, tmp, tmp2);
11689 gen_sbc_CC(tmp, tmp, tmp2);
11692 case 0x7: /* ror */
11693 if (s->condexec_mask) {
11694 tcg_gen_andi_i32(tmp, tmp, 0x1f);
11695 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
11697 gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
11698 gen_logic_CC(tmp2);
11701 case 0x8: /* tst */
11702 tcg_gen_and_i32(tmp, tmp, tmp2);
11706 case 0x9: /* neg */
11707 if (s->condexec_mask)
11708 tcg_gen_neg_i32(tmp, tmp2);
11710 gen_sub_CC(tmp, tmp, tmp2);
11712 case 0xa: /* cmp */
11713 gen_sub_CC(tmp, tmp, tmp2);
11716 case 0xb: /* cmn */
11717 gen_add_CC(tmp, tmp, tmp2);
11720 case 0xc: /* orr */
11721 tcg_gen_or_i32(tmp, tmp, tmp2);
11722 if (!s->condexec_mask)
11725 case 0xd: /* mul */
11726 tcg_gen_mul_i32(tmp, tmp, tmp2);
11727 if (!s->condexec_mask)
11730 case 0xe: /* bic */
11731 tcg_gen_andc_i32(tmp, tmp, tmp2);
11732 if (!s->condexec_mask)
11735 case 0xf: /* mvn */
11736 tcg_gen_not_i32(tmp2, tmp2);
11737 if (!s->condexec_mask)
11738 gen_logic_CC(tmp2);
11745 store_reg(s, rm, tmp2);
11747 tcg_temp_free_i32(tmp);
11749 store_reg(s, rd, tmp);
11750 tcg_temp_free_i32(tmp2);
11753 tcg_temp_free_i32(tmp);
11754 tcg_temp_free_i32(tmp2);
11759 /* load/store register offset. */
11761 rn = (insn >> 3) & 7;
11762 rm = (insn >> 6) & 7;
11763 op = (insn >> 9) & 7;
11764 addr = load_reg(s, rn);
11765 tmp = load_reg(s, rm);
11766 tcg_gen_add_i32(addr, addr, tmp);
11767 tcg_temp_free_i32(tmp);
11769 if (op < 3) { /* store */
11770 tmp = load_reg(s, rd);
11772 tmp = tcg_temp_new_i32();
11777 gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11780 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11783 gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11785 case 3: /* ldrsb */
11786 gen_aa32_ld8s_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11789 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11792 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11795 gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11797 case 7: /* ldrsh */
11798 gen_aa32_ld16s_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11801 if (op >= 3) { /* load */
11802 store_reg(s, rd, tmp);
11804 tcg_temp_free_i32(tmp);
11806 tcg_temp_free_i32(addr);
11810 /* load/store word immediate offset */
11812 rn = (insn >> 3) & 7;
11813 addr = load_reg(s, rn);
11814 val = (insn >> 4) & 0x7c;
11815 tcg_gen_addi_i32(addr, addr, val);
11817 if (insn & (1 << 11)) {
11819 tmp = tcg_temp_new_i32();
11820 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
11821 store_reg(s, rd, tmp);
11824 tmp = load_reg(s, rd);
11825 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
11826 tcg_temp_free_i32(tmp);
11828 tcg_temp_free_i32(addr);
11832 /* load/store byte immediate offset */
11834 rn = (insn >> 3) & 7;
11835 addr = load_reg(s, rn);
11836 val = (insn >> 6) & 0x1f;
11837 tcg_gen_addi_i32(addr, addr, val);
11839 if (insn & (1 << 11)) {
11841 tmp = tcg_temp_new_i32();
11842 gen_aa32_ld8u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11843 store_reg(s, rd, tmp);
11846 tmp = load_reg(s, rd);
11847 gen_aa32_st8_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11848 tcg_temp_free_i32(tmp);
11850 tcg_temp_free_i32(addr);
11854 /* load/store halfword immediate offset */
11856 rn = (insn >> 3) & 7;
11857 addr = load_reg(s, rn);
11858 val = (insn >> 5) & 0x3e;
11859 tcg_gen_addi_i32(addr, addr, val);
11861 if (insn & (1 << 11)) {
11863 tmp = tcg_temp_new_i32();
11864 gen_aa32_ld16u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11865 store_reg(s, rd, tmp);
11868 tmp = load_reg(s, rd);
11869 gen_aa32_st16_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11870 tcg_temp_free_i32(tmp);
11872 tcg_temp_free_i32(addr);
11876 /* load/store from stack */
11877 rd = (insn >> 8) & 7;
11878 addr = load_reg(s, 13);
11879 val = (insn & 0xff) * 4;
11880 tcg_gen_addi_i32(addr, addr, val);
11882 if (insn & (1 << 11)) {
11884 tmp = tcg_temp_new_i32();
11885 gen_aa32_ld32u_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11886 store_reg(s, rd, tmp);
11889 tmp = load_reg(s, rd);
11890 gen_aa32_st32_iss(s, tmp, addr, get_mem_index(s), rd | ISSIs16Bit);
11891 tcg_temp_free_i32(tmp);
11893 tcg_temp_free_i32(addr);
11897 /* add to high reg */
11898 rd = (insn >> 8) & 7;
11899 if (insn & (1 << 11)) {
11901 tmp = load_reg(s, 13);
11903 /* PC. bit 1 is ignored. */
11904 tmp = tcg_temp_new_i32();
11905 tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
11907 val = (insn & 0xff) * 4;
11908 tcg_gen_addi_i32(tmp, tmp, val);
11909 store_reg(s, rd, tmp);
11914 op = (insn >> 8) & 0xf;
11917 /* adjust stack pointer */
11918 tmp = load_reg(s, 13);
11919 val = (insn & 0x7f) * 4;
11920 if (insn & (1 << 7))
11921 val = -(int32_t)val;
11922 tcg_gen_addi_i32(tmp, tmp, val);
11923 store_reg(s, 13, tmp);
11926 case 2: /* sign/zero extend. */
11929 rm = (insn >> 3) & 7;
11930 tmp = load_reg(s, rm);
11931 switch ((insn >> 6) & 3) {
11932 case 0: gen_sxth(tmp); break;
11933 case 1: gen_sxtb(tmp); break;
11934 case 2: gen_uxth(tmp); break;
11935 case 3: gen_uxtb(tmp); break;
11937 store_reg(s, rd, tmp);
11939 case 4: case 5: case 0xc: case 0xd:
11941 addr = load_reg(s, 13);
11942 if (insn & (1 << 8))
11946 for (i = 0; i < 8; i++) {
11947 if (insn & (1 << i))
11950 if ((insn & (1 << 11)) == 0) {
11951 tcg_gen_addi_i32(addr, addr, -offset);
11953 for (i = 0; i < 8; i++) {
11954 if (insn & (1 << i)) {
11955 if (insn & (1 << 11)) {
11957 tmp = tcg_temp_new_i32();
11958 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
11959 store_reg(s, i, tmp);
11962 tmp = load_reg(s, i);
11963 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
11964 tcg_temp_free_i32(tmp);
11966 /* advance to the next address. */
11967 tcg_gen_addi_i32(addr, addr, 4);
11971 if (insn & (1 << 8)) {
11972 if (insn & (1 << 11)) {
11974 tmp = tcg_temp_new_i32();
11975 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
11976 /* don't set the pc until the rest of the instruction
11980 tmp = load_reg(s, 14);
11981 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
11982 tcg_temp_free_i32(tmp);
11984 tcg_gen_addi_i32(addr, addr, 4);
11986 if ((insn & (1 << 11)) == 0) {
11987 tcg_gen_addi_i32(addr, addr, -offset);
11989 /* write back the new stack pointer */
11990 store_reg(s, 13, addr);
11991 /* set the new PC value */
11992 if ((insn & 0x0900) == 0x0900) {
11993 store_reg_from_load(s, 15, tmp);
11997 case 1: case 3: case 9: case 11: /* czb */
11999 tmp = load_reg(s, rm);
12000 s->condlabel = gen_new_label();
12002 if (insn & (1 << 11))
12003 tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
12005 tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
12006 tcg_temp_free_i32(tmp);
12007 offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
12008 val = (uint32_t)s->pc + 2;
12013 case 15: /* IT, nop-hint. */
12014 if ((insn & 0xf) == 0) {
12015 gen_nop_hint(s, (insn >> 4) & 0xf);
12019 s->condexec_cond = (insn >> 4) & 0xe;
12020 s->condexec_mask = insn & 0x1f;
12021 /* No actual code generated for this insn, just setup state. */
12024 case 0xe: /* bkpt */
12026 int imm8 = extract32(insn, 0, 8);
12028 gen_exception_bkpt_insn(s, 2, syn_aa32_bkpt(imm8, true));
12032 case 0xa: /* rev, and hlt */
12034 int op1 = extract32(insn, 6, 2);
12038 int imm6 = extract32(insn, 0, 6);
12044 /* Otherwise this is rev */
12046 rn = (insn >> 3) & 0x7;
12048 tmp = load_reg(s, rn);
12050 case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
12051 case 1: gen_rev16(tmp); break;
12052 case 3: gen_revsh(tmp); break;
12054 g_assert_not_reached();
12056 store_reg(s, rd, tmp);
12061 switch ((insn >> 5) & 7) {
12065 if (((insn >> 3) & 1) != !!(s->be_data == MO_BE)) {
12066 gen_helper_setend(cpu_env);
12067 s->base.is_jmp = DISAS_UPDATE;
12076 if (arm_dc_feature(s, ARM_FEATURE_M)) {
12077 tmp = tcg_const_i32((insn & (1 << 4)) != 0);
12080 addr = tcg_const_i32(19);
12081 gen_helper_v7m_msr(cpu_env, addr, tmp);
12082 tcg_temp_free_i32(addr);
12086 addr = tcg_const_i32(16);
12087 gen_helper_v7m_msr(cpu_env, addr, tmp);
12088 tcg_temp_free_i32(addr);
12090 tcg_temp_free_i32(tmp);
12093 if (insn & (1 << 4)) {
12094 shift = CPSR_A | CPSR_I | CPSR_F;
12098 gen_set_psr_im(s, ((insn & 7) << 6), 0, shift);
12113 /* load/store multiple */
12114 TCGv_i32 loaded_var = NULL;
12115 rn = (insn >> 8) & 0x7;
12116 addr = load_reg(s, rn);
12117 for (i = 0; i < 8; i++) {
12118 if (insn & (1 << i)) {
12119 if (insn & (1 << 11)) {
12121 tmp = tcg_temp_new_i32();
12122 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
12126 store_reg(s, i, tmp);
12130 tmp = load_reg(s, i);
12131 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
12132 tcg_temp_free_i32(tmp);
12134 /* advance to the next address */
12135 tcg_gen_addi_i32(addr, addr, 4);
12138 if ((insn & (1 << rn)) == 0) {
12139 /* base reg not in list: base register writeback */
12140 store_reg(s, rn, addr);
12142 /* base reg in list: if load, complete it now */
12143 if (insn & (1 << 11)) {
12144 store_reg(s, rn, loaded_var);
12146 tcg_temp_free_i32(addr);
12151 /* conditional branch or swi */
12152 cond = (insn >> 8) & 0xf;
12158 gen_set_pc_im(s, s->pc);
12159 s->svc_imm = extract32(insn, 0, 8);
12160 s->base.is_jmp = DISAS_SWI;
12163 /* generate a conditional jump to next instruction */
12164 s->condlabel = gen_new_label();
12165 arm_gen_test_cc(cond ^ 1, s->condlabel);
12168 /* jump to the offset */
12169 val = (uint32_t)s->pc + 2;
12170 offset = ((int32_t)insn << 24) >> 24;
12171 val += offset << 1;
12176 if (insn & (1 << 11)) {
12177 /* thumb_insn_is_16bit() ensures we can't get here for
12178 * a Thumb2 CPU, so this must be a thumb1 split BL/BLX:
12179 * 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF)
12181 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
12183 offset = ((insn & 0x7ff) << 1);
12184 tmp = load_reg(s, 14);
12185 tcg_gen_addi_i32(tmp, tmp, offset);
12186 tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
12188 tmp2 = tcg_temp_new_i32();
12189 tcg_gen_movi_i32(tmp2, s->pc | 1);
12190 store_reg(s, 14, tmp2);
12194 /* unconditional branch */
12195 val = (uint32_t)s->pc;
12196 offset = ((int32_t)insn << 21) >> 21;
12197 val += (offset << 1) + 2;
12202 /* thumb_insn_is_16bit() ensures we can't get here for
12203 * a Thumb2 CPU, so this must be a thumb1 split BL/BLX.
12205 assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
12207 if (insn & (1 << 11)) {
12208 /* 0b1111_1xxx_xxxx_xxxx : BL suffix */
12209 offset = ((insn & 0x7ff) << 1) | 1;
12210 tmp = load_reg(s, 14);
12211 tcg_gen_addi_i32(tmp, tmp, offset);
12213 tmp2 = tcg_temp_new_i32();
12214 tcg_gen_movi_i32(tmp2, s->pc | 1);
12215 store_reg(s, 14, tmp2);
12218 /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix */
12219 uint32_t uoffset = ((int32_t)insn << 21) >> 9;
12221 tcg_gen_movi_i32(cpu_R[14], s->pc + 2 + uoffset);
12228 gen_exception_insn(s, 2, EXCP_UDEF, syn_uncategorized(),
12229 default_exception_el(s));
12232 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
12234 /* Return true if the insn at dc->pc might cross a page boundary.
12235 * (False positives are OK, false negatives are not.)
12236 * We know this is a Thumb insn, and our caller ensures we are
12237 * only called if dc->pc is less than 4 bytes from the page
12238 * boundary, so we cross the page if the first 16 bits indicate
12239 * that this is a 32 bit insn.
12241 uint16_t insn = arm_lduw_code(env, s->pc, s->sctlr_b);
12243 return !thumb_insn_is_16bit(s, insn);
12246 static int arm_tr_init_disas_context(DisasContextBase *dcbase,
12247 CPUState *cs, int max_insns)
12249 DisasContext *dc = container_of(dcbase, DisasContext, base);
12250 CPUARMState *env = cs->env_ptr;
12251 ARMCPU *cpu = arm_env_get_cpu(env);
12253 dc->pc = dc->base.pc_first;
12257 /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
12258 * there is no secure EL1, so we route exceptions to EL3.
12260 dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
12261 !arm_el_is_aa64(env, 3);
12262 dc->thumb = ARM_TBFLAG_THUMB(dc->base.tb->flags);
12263 dc->sctlr_b = ARM_TBFLAG_SCTLR_B(dc->base.tb->flags);
12264 dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
12265 dc->condexec_mask = (ARM_TBFLAG_CONDEXEC(dc->base.tb->flags) & 0xf) << 1;
12266 dc->condexec_cond = ARM_TBFLAG_CONDEXEC(dc->base.tb->flags) >> 4;
12267 dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
12268 dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
12269 #if !defined(CONFIG_USER_ONLY)
12270 dc->user = (dc->current_el == 0);
12272 dc->ns = ARM_TBFLAG_NS(dc->base.tb->flags);
12273 dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
12274 dc->vfp_enabled = ARM_TBFLAG_VFPEN(dc->base.tb->flags);
12275 dc->vec_len = ARM_TBFLAG_VECLEN(dc->base.tb->flags);
12276 dc->vec_stride = ARM_TBFLAG_VECSTRIDE(dc->base.tb->flags);
12277 dc->c15_cpar = ARM_TBFLAG_XSCALE_CPAR(dc->base.tb->flags);
12278 dc->v7m_handler_mode = ARM_TBFLAG_HANDLER(dc->base.tb->flags);
12279 dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
12280 regime_is_secure(env, dc->mmu_idx);
12281 dc->cp_regs = cpu->cp_regs;
12282 dc->features = env->features;
12284 /* Single step state. The code-generation logic here is:
12286 * generate code with no special handling for single-stepping (except
12287 * that anything that can make us go to SS_ACTIVE == 1 must end the TB;
12288 * this happens anyway because those changes are all system register or
12290 * SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
12291 * emit code for one insn
12292 * emit code to clear PSTATE.SS
12293 * emit code to generate software step exception for completed step
12294 * end TB (as usual for having generated an exception)
12295 * SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
12296 * emit code to generate a software step exception
12299 dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
12300 dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
12301 dc->is_ldex = false;
12302 dc->ss_same_el = false; /* Can't be true since EL_d must be AArch64 */
12304 dc->next_page_start =
12305 (dc->base.pc_first & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
12307 /* If architectural single step active, limit to 1. */
12308 if (is_singlestepping(dc)) {
12312 /* ARM is a fixed-length ISA. Bound the number of insns to execute
12313 to those left on the page. */
12315 int bound = (dc->next_page_start - dc->base.pc_first) / 4;
12316 max_insns = MIN(max_insns, bound);
12319 cpu_F0s = tcg_temp_new_i32();
12320 cpu_F1s = tcg_temp_new_i32();
12321 cpu_F0d = tcg_temp_new_i64();
12322 cpu_F1d = tcg_temp_new_i64();
12325 /* FIXME: cpu_M0 can probably be the same as cpu_V0. */
12326 cpu_M0 = tcg_temp_new_i64();
12331 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
12333 DisasContext *dc = container_of(dcbase, DisasContext, base);
12335 /* A note on handling of the condexec (IT) bits:
12337 * We want to avoid the overhead of having to write the updated condexec
12338 * bits back to the CPUARMState for every instruction in an IT block. So:
12339 * (1) if the condexec bits are not already zero then we write
12340 * zero back into the CPUARMState now. This avoids complications trying
12341 * to do it at the end of the block. (For example if we don't do this
12342 * it's hard to identify whether we can safely skip writing condexec
12343 * at the end of the TB, which we definitely want to do for the case
12344 * where a TB doesn't do anything with the IT state at all.)
12345 * (2) if we are going to leave the TB then we call gen_set_condexec()
12346 * which will write the correct value into CPUARMState if zero is wrong.
12347 * This is done both for leaving the TB at the end, and for leaving
12348 * it because of an exception we know will happen, which is done in
12349 * gen_exception_insn(). The latter is necessary because we need to
12350 * leave the TB with the PC/IT state just prior to execution of the
12351 * instruction which caused the exception.
12352 * (3) if we leave the TB unexpectedly (eg a data abort on a load)
12353 * then the CPUARMState will be wrong and we need to reset it.
12354 * This is handled in the same way as restoration of the
12355 * PC in these situations; we save the value of the condexec bits
12356 * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
12357 * then uses this to restore them after an exception.
12359 * Note that there are no instructions which can read the condexec
12360 * bits, and none which can write non-static values to them, so
12361 * we don't need to care about whether CPUARMState is correct in the
12365 /* Reset the conditional execution bits immediately. This avoids
12366 complications trying to do it at the end of the block. */
12367 if (dc->condexec_mask || dc->condexec_cond) {
12368 TCGv_i32 tmp = tcg_temp_new_i32();
12369 tcg_gen_movi_i32(tmp, 0);
12370 store_cpu_field(tmp, condexec_bits);
12372 tcg_clear_temp_count();
12375 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
12377 DisasContext *dc = container_of(dcbase, DisasContext, base);
12379 tcg_gen_insn_start(dc->pc,
12380 (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
12382 dc->insn_start = tcg_last_op();
12385 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
12386 const CPUBreakpoint *bp)
12388 DisasContext *dc = container_of(dcbase, DisasContext, base);
12390 if (bp->flags & BP_CPU) {
12391 gen_set_condexec(dc);
12392 gen_set_pc_im(dc, dc->pc);
12393 gen_helper_check_breakpoints(cpu_env);
12394 /* End the TB early; it's likely not going to be executed */
12395 dc->base.is_jmp = DISAS_TOO_MANY;
12397 gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
12398 /* The address covered by the breakpoint must be
12399 included in [tb->pc, tb->pc + tb->size) in order
12400 to for it to be properly cleared -- thus we
12401 increment the PC here so that the logic setting
12402 tb->size below does the right thing. */
12403 /* TODO: Advance PC by correct instruction length to
12404 * avoid disassembler error messages */
12406 dc->base.is_jmp = DISAS_NORETURN;
12412 static bool arm_pre_translate_insn(DisasContext *dc)
12414 #ifdef CONFIG_USER_ONLY
12415 /* Intercept jump to the magic kernel page. */
12416 if (dc->pc >= 0xffff0000) {
12417 /* We always get here via a jump, so know we are not in a
12418 conditional execution block. */
12419 gen_exception_internal(EXCP_KERNEL_TRAP);
12420 dc->base.is_jmp = DISAS_NORETURN;
12425 if (dc->ss_active && !dc->pstate_ss) {
12426 /* Singlestep state is Active-pending.
12427 * If we're in this state at the start of a TB then either
12428 * a) we just took an exception to an EL which is being debugged
12429 * and this is the first insn in the exception handler
12430 * b) debug exceptions were masked and we just unmasked them
12431 * without changing EL (eg by clearing PSTATE.D)
12432 * In either case we're going to take a swstep exception in the
12433 * "did not step an insn" case, and so the syndrome ISV and EX
12434 * bits should be zero.
12436 assert(dc->base.num_insns == 1);
12437 gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
12438 default_exception_el(dc));
12439 dc->base.is_jmp = DISAS_NORETURN;
12446 static void arm_post_translate_insn(DisasContext *dc)
12448 if (dc->condjmp && !dc->base.is_jmp) {
12449 gen_set_label(dc->condlabel);
12452 dc->base.pc_next = dc->pc;
12453 translator_loop_temp_check(&dc->base);
12456 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
12458 DisasContext *dc = container_of(dcbase, DisasContext, base);
12459 CPUARMState *env = cpu->env_ptr;
12462 if (arm_pre_translate_insn(dc)) {
12466 insn = arm_ldl_code(env, dc->pc, dc->sctlr_b);
12469 disas_arm_insn(dc, insn);
12471 arm_post_translate_insn(dc);
12473 /* ARM is a fixed-length ISA. We performed the cross-page check
12474 in init_disas_context by adjusting max_insns. */
12477 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
12479 /* Return true if this Thumb insn is always unconditional,
12480 * even inside an IT block. This is true of only a very few
12481 * instructions: BKPT, HLT, and SG.
12483 * A larger class of instructions are UNPREDICTABLE if used
12484 * inside an IT block; we do not need to detect those here, because
12485 * what we do by default (perform the cc check and update the IT
12486 * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
12487 * choice for those situations.
12489 * insn is either a 16-bit or a 32-bit instruction; the two are
12490 * distinguishable because for the 16-bit case the top 16 bits
12491 * are zeroes, and that isn't a valid 32-bit encoding.
12493 if ((insn & 0xffffff00) == 0xbe00) {
12498 if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
12499 !arm_dc_feature(s, ARM_FEATURE_M)) {
12500 /* HLT: v8A only. This is unconditional even when it is going to
12501 * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
12502 * For v7 cores this was a plain old undefined encoding and so
12503 * honours its cc check. (We might be using the encoding as
12504 * a semihosting trap, but we don't change the cc check behaviour
12505 * on that account, because a debugger connected to a real v7A
12506 * core and emulating semihosting traps by catching the UNDEF
12507 * exception would also only see cases where the cc check passed.
12508 * No guest code should be trying to do a HLT semihosting trap
12509 * in an IT block anyway.
12514 if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
12515 arm_dc_feature(s, ARM_FEATURE_M)) {
12523 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
12525 DisasContext *dc = container_of(dcbase, DisasContext, base);
12526 CPUARMState *env = cpu->env_ptr;
12530 if (arm_pre_translate_insn(dc)) {
12534 insn = arm_lduw_code(env, dc->pc, dc->sctlr_b);
12535 is_16bit = thumb_insn_is_16bit(dc, insn);
12538 uint32_t insn2 = arm_lduw_code(env, dc->pc, dc->sctlr_b);
12540 insn = insn << 16 | insn2;
12545 if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
12546 uint32_t cond = dc->condexec_cond;
12548 if (cond != 0x0e) { /* Skip conditional when condition is AL. */
12549 dc->condlabel = gen_new_label();
12550 arm_gen_test_cc(cond ^ 1, dc->condlabel);
12556 disas_thumb_insn(dc, insn);
12558 disas_thumb2_insn(dc, insn);
12561 /* Advance the Thumb condexec condition. */
12562 if (dc->condexec_mask) {
12563 dc->condexec_cond = ((dc->condexec_cond & 0xe) |
12564 ((dc->condexec_mask >> 4) & 1));
12565 dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
12566 if (dc->condexec_mask == 0) {
12567 dc->condexec_cond = 0;
12571 arm_post_translate_insn(dc);
12573 /* Thumb is a variable-length ISA. Stop translation when the next insn
12574 * will touch a new page. This ensures that prefetch aborts occur at
12577 * We want to stop the TB if the next insn starts in a new page,
12578 * or if it spans between this page and the next. This means that
12579 * if we're looking at the last halfword in the page we need to
12580 * see if it's a 16-bit Thumb insn (which will fit in this TB)
12581 * or a 32-bit Thumb insn (which won't).
12582 * This is to avoid generating a silly TB with a single 16-bit insn
12583 * in it at the end of this page (which would execute correctly
12584 * but isn't very efficient).
12586 if (dc->base.is_jmp == DISAS_NEXT
12587 && (dc->pc >= dc->next_page_start
12588 || (dc->pc >= dc->next_page_start - 3
12589 && insn_crosses_page(env, dc)))) {
12590 dc->base.is_jmp = DISAS_TOO_MANY;
12594 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
12596 DisasContext *dc = container_of(dcbase, DisasContext, base);
12598 if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
12599 /* FIXME: This can theoretically happen with self-modifying code. */
12600 cpu_abort(cpu, "IO on conditional branch instruction");
12603 /* At this stage dc->condjmp will only be set when the skipped
12604 instruction was a conditional branch or trap, and the PC has
12605 already been written. */
12606 gen_set_condexec(dc);
12607 if (dc->base.is_jmp == DISAS_BX_EXCRET) {
12608 /* Exception return branches need some special case code at the
12609 * end of the TB, which is complex enough that it has to
12610 * handle the single-step vs not and the condition-failed
12611 * insn codepath itself.
12613 gen_bx_excret_final_code(dc);
12614 } else if (unlikely(is_singlestepping(dc))) {
12615 /* Unconditional and "condition passed" instruction codepath. */
12616 switch (dc->base.is_jmp) {
12618 gen_ss_advance(dc);
12619 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
12620 default_exception_el(dc));
12623 gen_ss_advance(dc);
12624 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
12627 gen_ss_advance(dc);
12628 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
12631 case DISAS_TOO_MANY:
12633 gen_set_pc_im(dc, dc->pc);
12636 /* FIXME: Single stepping a WFI insn will not halt the CPU. */
12637 gen_singlestep_exception(dc);
12639 case DISAS_NORETURN:
12643 /* While branches must always occur at the end of an IT block,
12644 there are a few other things that can cause us to terminate
12645 the TB in the middle of an IT block:
12646 - Exception generating instructions (bkpt, swi, undefined).
12648 - Hardware watchpoints.
12649 Hardware breakpoints have already been handled and skip this code.
12651 switch(dc->base.is_jmp) {
12653 case DISAS_TOO_MANY:
12654 gen_goto_tb(dc, 1, dc->pc);
12660 gen_set_pc_im(dc, dc->pc);
12663 /* indicate that the hash table must be used to find the next TB */
12664 tcg_gen_exit_tb(0);
12666 case DISAS_NORETURN:
12667 /* nothing more to generate */
12671 TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
12672 !(dc->insn & (1U << 31))) ? 2 : 4);
12674 gen_helper_wfi(cpu_env, tmp);
12675 tcg_temp_free_i32(tmp);
12676 /* The helper doesn't necessarily throw an exception, but we
12677 * must go back to the main loop to check for interrupts anyway.
12679 tcg_gen_exit_tb(0);
12683 gen_helper_wfe(cpu_env);
12686 gen_helper_yield(cpu_env);
12689 gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
12690 default_exception_el(dc));
12693 gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
12696 gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
12702 /* "Condition failed" instruction codepath for the branch/trap insn */
12703 gen_set_label(dc->condlabel);
12704 gen_set_condexec(dc);
12705 if (unlikely(is_singlestepping(dc))) {
12706 gen_set_pc_im(dc, dc->pc);
12707 gen_singlestep_exception(dc);
12709 gen_goto_tb(dc, 1, dc->pc);
12713 /* Functions above can change dc->pc, so re-align db->pc_next */
12714 dc->base.pc_next = dc->pc;
12717 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
12719 DisasContext *dc = container_of(dcbase, DisasContext, base);
12721 qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
12722 log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
12725 static const TranslatorOps arm_translator_ops = {
12726 .init_disas_context = arm_tr_init_disas_context,
12727 .tb_start = arm_tr_tb_start,
12728 .insn_start = arm_tr_insn_start,
12729 .breakpoint_check = arm_tr_breakpoint_check,
12730 .translate_insn = arm_tr_translate_insn,
12731 .tb_stop = arm_tr_tb_stop,
12732 .disas_log = arm_tr_disas_log,
12735 static const TranslatorOps thumb_translator_ops = {
12736 .init_disas_context = arm_tr_init_disas_context,
12737 .tb_start = arm_tr_tb_start,
12738 .insn_start = arm_tr_insn_start,
12739 .breakpoint_check = arm_tr_breakpoint_check,
12740 .translate_insn = thumb_tr_translate_insn,
12741 .tb_stop = arm_tr_tb_stop,
12742 .disas_log = arm_tr_disas_log,
12745 /* generate intermediate code for basic block 'tb'. */
12746 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb)
12749 const TranslatorOps *ops = &arm_translator_ops;
12751 if (ARM_TBFLAG_THUMB(tb->flags)) {
12752 ops = &thumb_translator_ops;
12754 #ifdef TARGET_AARCH64
12755 if (ARM_TBFLAG_AARCH64_STATE(tb->flags)) {
12756 ops = &aarch64_translator_ops;
12760 translator_loop(ops, &dc.base, cpu, tb);
12763 static const char *cpu_mode_names[16] = {
12764 "usr", "fiq", "irq", "svc", "???", "???", "mon", "abt",
12765 "???", "???", "hyp", "und", "???", "???", "???", "sys"
12768 void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
12771 ARMCPU *cpu = ARM_CPU(cs);
12772 CPUARMState *env = &cpu->env;
12776 aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags);
12780 for(i=0;i<16;i++) {
12781 cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
12783 cpu_fprintf(f, "\n");
12785 cpu_fprintf(f, " ");
12788 if (arm_feature(env, ARM_FEATURE_M)) {
12789 uint32_t xpsr = xpsr_read(env);
12791 const char *ns_status = "";
12793 if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
12794 ns_status = env->v7m.secure ? "S " : "NS ";
12797 if (xpsr & XPSR_EXCP) {
12800 if (env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_NPRIV_MASK) {
12801 mode = "unpriv-thread";
12803 mode = "priv-thread";
12807 cpu_fprintf(f, "XPSR=%08x %c%c%c%c %c %s%s\n",
12809 xpsr & XPSR_N ? 'N' : '-',
12810 xpsr & XPSR_Z ? 'Z' : '-',
12811 xpsr & XPSR_C ? 'C' : '-',
12812 xpsr & XPSR_V ? 'V' : '-',
12813 xpsr & XPSR_T ? 'T' : 'A',
12817 uint32_t psr = cpsr_read(env);
12818 const char *ns_status = "";
12820 if (arm_feature(env, ARM_FEATURE_EL3) &&
12821 (psr & CPSR_M) != ARM_CPU_MODE_MON) {
12822 ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
12825 cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%s%d\n",
12827 psr & CPSR_N ? 'N' : '-',
12828 psr & CPSR_Z ? 'Z' : '-',
12829 psr & CPSR_C ? 'C' : '-',
12830 psr & CPSR_V ? 'V' : '-',
12831 psr & CPSR_T ? 'T' : 'A',
12833 cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
12836 if (flags & CPU_DUMP_FPU) {
12837 int numvfpregs = 0;
12838 if (arm_feature(env, ARM_FEATURE_VFP)) {
12841 if (arm_feature(env, ARM_FEATURE_VFP3)) {
12844 for (i = 0; i < numvfpregs; i++) {
12845 uint64_t v = *aa32_vfp_dreg(env, i);
12846 cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
12847 i * 2, (uint32_t)v,
12848 i * 2 + 1, (uint32_t)(v >> 32),
12851 cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
12855 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
12856 target_ulong *data)
12860 env->condexec_bits = 0;
12861 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
12863 env->regs[15] = data[0];
12864 env->condexec_bits = data[1];
12865 env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;