]> Git Repo - qemu.git/blame - target-arm/translate-a64.c
target-arm: Implement AArch64 MPIDR
[qemu.git] / target-arm / translate-a64.c
CommitLineData
14ade10f
AG
1/*
2 * AArch64 translation
3 *
4 * Copyright (c) 2013 Alexander Graf <[email protected]>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19#include <stdarg.h>
20#include <stdlib.h>
21#include <stdio.h>
22#include <string.h>
23#include <inttypes.h>
24
25#include "cpu.h"
26#include "tcg-op.h"
27#include "qemu/log.h"
28#include "translate.h"
29#include "qemu/host-utils.h"
30
40f860cd
PM
31#include "exec/gen-icount.h"
32
14ade10f
AG
33#include "helper.h"
34#define GEN_HELPER 1
35#include "helper.h"
36
37static TCGv_i64 cpu_X[32];
38static TCGv_i64 cpu_pc;
832ffa1c 39static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
14ade10f 40
fa2ef212
MM
41/* Load/store exclusive handling */
42static TCGv_i64 cpu_exclusive_addr;
43static TCGv_i64 cpu_exclusive_val;
44static TCGv_i64 cpu_exclusive_high;
45#ifdef CONFIG_USER_ONLY
46static TCGv_i64 cpu_exclusive_test;
47static TCGv_i32 cpu_exclusive_info;
48#endif
49
14ade10f
AG
50static const char *regnames[] = {
51 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
52 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
53 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
54 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
55};
56
832ffa1c
AG
57enum a64_shift_type {
58 A64_SHIFT_TYPE_LSL = 0,
59 A64_SHIFT_TYPE_LSR = 1,
60 A64_SHIFT_TYPE_ASR = 2,
61 A64_SHIFT_TYPE_ROR = 3
62};
63
384b26fb
AB
64/* Table based decoder typedefs - used when the relevant bits for decode
65 * are too awkwardly scattered across the instruction (eg SIMD).
66 */
67typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
68
69typedef struct AArch64DecodeTable {
70 uint32_t pattern;
71 uint32_t mask;
72 AArch64DecodeFn *disas_fn;
73} AArch64DecodeTable;
74
1f8a73af
PM
75/* Function prototype for gen_ functions for calling Neon helpers */
76typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
6d9571f7 77typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
70d7f984 78typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
d980fd59
PM
79typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
80typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
70d7f984 81typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
8908f4d1
AB
82typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
83typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
1f8a73af 84
14ade10f
AG
85/* initialize TCG globals. */
86void a64_translate_init(void)
87{
88 int i;
89
90 cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
91 offsetof(CPUARMState, pc),
92 "pc");
93 for (i = 0; i < 32; i++) {
94 cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
95 offsetof(CPUARMState, xregs[i]),
96 regnames[i]);
97 }
98
832ffa1c
AG
99 cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
100 cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
101 cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
102 cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
fa2ef212
MM
103
104 cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
105 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
106 cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
107 offsetof(CPUARMState, exclusive_val), "exclusive_val");
108 cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
109 offsetof(CPUARMState, exclusive_high), "exclusive_high");
110#ifdef CONFIG_USER_ONLY
111 cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
112 offsetof(CPUARMState, exclusive_test), "exclusive_test");
113 cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
114 offsetof(CPUARMState, exclusive_info), "exclusive_info");
115#endif
14ade10f
AG
116}
117
118void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
119 fprintf_function cpu_fprintf, int flags)
120{
121 ARMCPU *cpu = ARM_CPU(cs);
122 CPUARMState *env = &cpu->env;
d356312f 123 uint32_t psr = pstate_read(env);
14ade10f
AG
124 int i;
125
126 cpu_fprintf(f, "PC=%016"PRIx64" SP=%016"PRIx64"\n",
127 env->pc, env->xregs[31]);
128 for (i = 0; i < 31; i++) {
129 cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
130 if ((i % 4) == 3) {
131 cpu_fprintf(f, "\n");
132 } else {
133 cpu_fprintf(f, " ");
134 }
135 }
d356312f
PM
136 cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n",
137 psr,
138 psr & PSTATE_N ? 'N' : '-',
139 psr & PSTATE_Z ? 'Z' : '-',
140 psr & PSTATE_C ? 'C' : '-',
141 psr & PSTATE_V ? 'V' : '-');
14ade10f 142 cpu_fprintf(f, "\n");
f6d8a314
AG
143
144 if (flags & CPU_DUMP_FPU) {
145 int numvfpregs = 32;
146 for (i = 0; i < numvfpregs; i += 2) {
147 uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
148 uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
149 cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
150 i, vhi, vlo);
151 vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
152 vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
153 cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
154 i + 1, vhi, vlo);
155 }
156 cpu_fprintf(f, "FPCR: %08x FPSR: %08x\n",
157 vfp_get_fpcr(env), vfp_get_fpsr(env));
158 }
14ade10f
AG
159}
160
4a08d475
PM
161static int get_mem_index(DisasContext *s)
162{
163#ifdef CONFIG_USER_ONLY
164 return 1;
165#else
166 return s->user;
167#endif
168}
169
14ade10f
AG
170void gen_a64_set_pc_im(uint64_t val)
171{
172 tcg_gen_movi_i64(cpu_pc, val);
173}
174
175static void gen_exception(int excp)
176{
177 TCGv_i32 tmp = tcg_temp_new_i32();
178 tcg_gen_movi_i32(tmp, excp);
179 gen_helper_exception(cpu_env, tmp);
180 tcg_temp_free_i32(tmp);
181}
182
183static void gen_exception_insn(DisasContext *s, int offset, int excp)
184{
185 gen_a64_set_pc_im(s->pc - offset);
186 gen_exception(excp);
40f860cd
PM
187 s->is_jmp = DISAS_EXC;
188}
189
190static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
191{
192 /* No direct tb linking with singlestep or deterministic io */
193 if (s->singlestep_enabled || (s->tb->cflags & CF_LAST_IO)) {
194 return false;
195 }
196
197 /* Only link tbs from inside the same guest page */
198 if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
199 return false;
200 }
201
202 return true;
203}
204
205static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
206{
207 TranslationBlock *tb;
208
209 tb = s->tb;
210 if (use_goto_tb(s, n, dest)) {
211 tcg_gen_goto_tb(n);
212 gen_a64_set_pc_im(dest);
213 tcg_gen_exit_tb((tcg_target_long)tb + n);
214 s->is_jmp = DISAS_TB_JUMP;
215 } else {
216 gen_a64_set_pc_im(dest);
217 if (s->singlestep_enabled) {
218 gen_exception(EXCP_DEBUG);
219 }
220 tcg_gen_exit_tb(0);
221 s->is_jmp = DISAS_JUMP;
222 }
14ade10f
AG
223}
224
ad7ee8a2 225static void unallocated_encoding(DisasContext *s)
14ade10f 226{
14ade10f
AG
227 gen_exception_insn(s, 4, EXCP_UDEF);
228}
229
ad7ee8a2
CF
230#define unsupported_encoding(s, insn) \
231 do { \
232 qemu_log_mask(LOG_UNIMP, \
233 "%s:%d: unsupported instruction encoding 0x%08x " \
234 "at pc=%016" PRIx64 "\n", \
235 __FILE__, __LINE__, insn, s->pc - 4); \
236 unallocated_encoding(s); \
237 } while (0);
14ade10f 238
11e169de
AG
239static void init_tmp_a64_array(DisasContext *s)
240{
241#ifdef CONFIG_DEBUG_TCG
242 int i;
243 for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
244 TCGV_UNUSED_I64(s->tmp_a64[i]);
245 }
246#endif
247 s->tmp_a64_count = 0;
248}
249
250static void free_tmp_a64(DisasContext *s)
251{
252 int i;
253 for (i = 0; i < s->tmp_a64_count; i++) {
254 tcg_temp_free_i64(s->tmp_a64[i]);
255 }
256 init_tmp_a64_array(s);
257}
258
259static TCGv_i64 new_tmp_a64(DisasContext *s)
260{
261 assert(s->tmp_a64_count < TMP_A64_MAX);
262 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
263}
264
265static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
266{
267 TCGv_i64 t = new_tmp_a64(s);
268 tcg_gen_movi_i64(t, 0);
269 return t;
270}
271
71b46089
AG
272/*
273 * Register access functions
274 *
275 * These functions are used for directly accessing a register in where
276 * changes to the final register value are likely to be made. If you
277 * need to use a register for temporary calculation (e.g. index type
278 * operations) use the read_* form.
279 *
280 * B1.2.1 Register mappings
281 *
282 * In instruction register encoding 31 can refer to ZR (zero register) or
283 * the SP (stack pointer) depending on context. In QEMU's case we map SP
284 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
285 * This is the point of the _sp forms.
286 */
11e169de
AG
287static TCGv_i64 cpu_reg(DisasContext *s, int reg)
288{
289 if (reg == 31) {
290 return new_tmp_a64_zero(s);
291 } else {
292 return cpu_X[reg];
293 }
294}
295
71b46089
AG
296/* register access for when 31 == SP */
297static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
298{
299 return cpu_X[reg];
300}
301
60e53388
AG
302/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
303 * representing the register contents. This TCGv is an auto-freed
304 * temporary so it need not be explicitly freed, and may be modified.
305 */
306static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
307{
308 TCGv_i64 v = new_tmp_a64(s);
309 if (reg != 31) {
310 if (sf) {
311 tcg_gen_mov_i64(v, cpu_X[reg]);
312 } else {
313 tcg_gen_ext32u_i64(v, cpu_X[reg]);
314 }
315 } else {
316 tcg_gen_movi_i64(v, 0);
317 }
318 return v;
319}
320
4a08d475
PM
321static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
322{
323 TCGv_i64 v = new_tmp_a64(s);
324 if (sf) {
325 tcg_gen_mov_i64(v, cpu_X[reg]);
326 } else {
327 tcg_gen_ext32u_i64(v, cpu_X[reg]);
328 }
329 return v;
330}
331
72430bf5
AB
332/* Return the offset into CPUARMState of an element of specified
333 * size, 'element' places in from the least significant end of
334 * the FP/vector register Qn.
335 */
336static inline int vec_reg_offset(int regno, int element, TCGMemOp size)
337{
338 int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
339#ifdef HOST_WORDS_BIGENDIAN
340 /* This is complicated slightly because vfp.regs[2n] is
341 * still the low half and vfp.regs[2n+1] the high half
342 * of the 128 bit vector, even on big endian systems.
343 * Calculate the offset assuming a fully bigendian 128 bits,
344 * then XOR to account for the order of the two 64 bit halves.
345 */
346 offs += (16 - ((element + 1) * (1 << size)));
347 offs ^= 8;
348#else
349 offs += element * (1 << size);
350#endif
351 return offs;
352}
353
e2f90565
PM
354/* Return the offset into CPUARMState of a slice (from
355 * the least significant end) of FP register Qn (ie
356 * Dn, Sn, Hn or Bn).
357 * (Note that this is not the same mapping as for A32; see cpu.h)
358 */
359static inline int fp_reg_offset(int regno, TCGMemOp size)
360{
361 int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
362#ifdef HOST_WORDS_BIGENDIAN
363 offs += (8 - (1 << size));
364#endif
365 return offs;
366}
367
368/* Offset of the high half of the 128 bit vector Qn */
369static inline int fp_reg_hi_offset(int regno)
370{
371 return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
372}
373
ec73d2e0
AG
374/* Convenience accessors for reading and writing single and double
375 * FP registers. Writing clears the upper parts of the associated
376 * 128 bit vector register, as required by the architecture.
377 * Note that unlike the GP register accessors, the values returned
378 * by the read functions must be manually freed.
379 */
380static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
381{
382 TCGv_i64 v = tcg_temp_new_i64();
383
384 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
385 return v;
386}
387
388static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
389{
390 TCGv_i32 v = tcg_temp_new_i32();
391
392 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(reg, MO_32));
393 return v;
394}
395
396static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
397{
398 TCGv_i64 tcg_zero = tcg_const_i64(0);
399
400 tcg_gen_st_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
401 tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(reg));
402 tcg_temp_free_i64(tcg_zero);
403}
404
405static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
406{
407 TCGv_i64 tmp = tcg_temp_new_i64();
408
409 tcg_gen_extu_i32_i64(tmp, v);
410 write_fp_dreg(s, reg, tmp);
411 tcg_temp_free_i64(tmp);
412}
413
414static TCGv_ptr get_fpstatus_ptr(void)
415{
416 TCGv_ptr statusptr = tcg_temp_new_ptr();
417 int offset;
418
419 /* In A64 all instructions (both FP and Neon) use the FPCR;
420 * there is no equivalent of the A32 Neon "standard FPSCR value"
421 * and all operations use vfp.fp_status.
422 */
423 offset = offsetof(CPUARMState, vfp.fp_status);
424 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
425 return statusptr;
426}
427
832ffa1c
AG
428/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
429 * than the 32 bit equivalent.
430 */
431static inline void gen_set_NZ64(TCGv_i64 result)
432{
433 TCGv_i64 flag = tcg_temp_new_i64();
434
435 tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0);
436 tcg_gen_trunc_i64_i32(cpu_ZF, flag);
437 tcg_gen_shri_i64(flag, result, 32);
438 tcg_gen_trunc_i64_i32(cpu_NF, flag);
439 tcg_temp_free_i64(flag);
440}
441
442/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
443static inline void gen_logic_CC(int sf, TCGv_i64 result)
444{
445 if (sf) {
446 gen_set_NZ64(result);
447 } else {
448 tcg_gen_trunc_i64_i32(cpu_ZF, result);
449 tcg_gen_trunc_i64_i32(cpu_NF, result);
450 }
451 tcg_gen_movi_i32(cpu_CF, 0);
452 tcg_gen_movi_i32(cpu_VF, 0);
453}
454
b0ff21b4
AB
455/* dest = T0 + T1; compute C, N, V and Z flags */
456static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
457{
458 if (sf) {
459 TCGv_i64 result, flag, tmp;
460 result = tcg_temp_new_i64();
461 flag = tcg_temp_new_i64();
462 tmp = tcg_temp_new_i64();
463
464 tcg_gen_movi_i64(tmp, 0);
465 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
466
467 tcg_gen_trunc_i64_i32(cpu_CF, flag);
468
469 gen_set_NZ64(result);
470
471 tcg_gen_xor_i64(flag, result, t0);
472 tcg_gen_xor_i64(tmp, t0, t1);
473 tcg_gen_andc_i64(flag, flag, tmp);
474 tcg_temp_free_i64(tmp);
475 tcg_gen_shri_i64(flag, flag, 32);
476 tcg_gen_trunc_i64_i32(cpu_VF, flag);
477
478 tcg_gen_mov_i64(dest, result);
479 tcg_temp_free_i64(result);
480 tcg_temp_free_i64(flag);
481 } else {
482 /* 32 bit arithmetic */
483 TCGv_i32 t0_32 = tcg_temp_new_i32();
484 TCGv_i32 t1_32 = tcg_temp_new_i32();
485 TCGv_i32 tmp = tcg_temp_new_i32();
486
487 tcg_gen_movi_i32(tmp, 0);
488 tcg_gen_trunc_i64_i32(t0_32, t0);
489 tcg_gen_trunc_i64_i32(t1_32, t1);
490 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
491 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
492 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
493 tcg_gen_xor_i32(tmp, t0_32, t1_32);
494 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
495 tcg_gen_extu_i32_i64(dest, cpu_NF);
496
497 tcg_temp_free_i32(tmp);
498 tcg_temp_free_i32(t0_32);
499 tcg_temp_free_i32(t1_32);
500 }
501}
502
503/* dest = T0 - T1; compute C, N, V and Z flags */
504static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
505{
506 if (sf) {
507 /* 64 bit arithmetic */
508 TCGv_i64 result, flag, tmp;
509
510 result = tcg_temp_new_i64();
511 flag = tcg_temp_new_i64();
512 tcg_gen_sub_i64(result, t0, t1);
513
514 gen_set_NZ64(result);
515
516 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
517 tcg_gen_trunc_i64_i32(cpu_CF, flag);
518
519 tcg_gen_xor_i64(flag, result, t0);
520 tmp = tcg_temp_new_i64();
521 tcg_gen_xor_i64(tmp, t0, t1);
522 tcg_gen_and_i64(flag, flag, tmp);
523 tcg_temp_free_i64(tmp);
524 tcg_gen_shri_i64(flag, flag, 32);
525 tcg_gen_trunc_i64_i32(cpu_VF, flag);
526 tcg_gen_mov_i64(dest, result);
527 tcg_temp_free_i64(flag);
528 tcg_temp_free_i64(result);
529 } else {
530 /* 32 bit arithmetic */
531 TCGv_i32 t0_32 = tcg_temp_new_i32();
532 TCGv_i32 t1_32 = tcg_temp_new_i32();
533 TCGv_i32 tmp;
534
535 tcg_gen_trunc_i64_i32(t0_32, t0);
536 tcg_gen_trunc_i64_i32(t1_32, t1);
537 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
538 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
539 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
540 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
541 tmp = tcg_temp_new_i32();
542 tcg_gen_xor_i32(tmp, t0_32, t1_32);
543 tcg_temp_free_i32(t0_32);
544 tcg_temp_free_i32(t1_32);
545 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
546 tcg_temp_free_i32(tmp);
547 tcg_gen_extu_i32_i64(dest, cpu_NF);
548 }
549}
550
643dbb07
CF
551/* dest = T0 + T1 + CF; do not compute flags. */
552static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
553{
554 TCGv_i64 flag = tcg_temp_new_i64();
555 tcg_gen_extu_i32_i64(flag, cpu_CF);
556 tcg_gen_add_i64(dest, t0, t1);
557 tcg_gen_add_i64(dest, dest, flag);
558 tcg_temp_free_i64(flag);
559
560 if (!sf) {
561 tcg_gen_ext32u_i64(dest, dest);
562 }
563}
564
565/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
566static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
567{
568 if (sf) {
569 TCGv_i64 result, cf_64, vf_64, tmp;
570 result = tcg_temp_new_i64();
571 cf_64 = tcg_temp_new_i64();
572 vf_64 = tcg_temp_new_i64();
573 tmp = tcg_const_i64(0);
574
575 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
576 tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
577 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
578 tcg_gen_trunc_i64_i32(cpu_CF, cf_64);
579 gen_set_NZ64(result);
580
581 tcg_gen_xor_i64(vf_64, result, t0);
582 tcg_gen_xor_i64(tmp, t0, t1);
583 tcg_gen_andc_i64(vf_64, vf_64, tmp);
584 tcg_gen_shri_i64(vf_64, vf_64, 32);
585 tcg_gen_trunc_i64_i32(cpu_VF, vf_64);
586
587 tcg_gen_mov_i64(dest, result);
588
589 tcg_temp_free_i64(tmp);
590 tcg_temp_free_i64(vf_64);
591 tcg_temp_free_i64(cf_64);
592 tcg_temp_free_i64(result);
593 } else {
594 TCGv_i32 t0_32, t1_32, tmp;
595 t0_32 = tcg_temp_new_i32();
596 t1_32 = tcg_temp_new_i32();
597 tmp = tcg_const_i32(0);
598
599 tcg_gen_trunc_i64_i32(t0_32, t0);
600 tcg_gen_trunc_i64_i32(t1_32, t1);
601 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
602 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
603
604 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
605 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
606 tcg_gen_xor_i32(tmp, t0_32, t1_32);
607 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
608 tcg_gen_extu_i32_i64(dest, cpu_NF);
609
610 tcg_temp_free_i32(tmp);
611 tcg_temp_free_i32(t1_32);
612 tcg_temp_free_i32(t0_32);
613 }
614}
615
4a08d475
PM
616/*
617 * Load/Store generators
618 */
619
620/*
60510aed 621 * Store from GPR register to memory.
4a08d475 622 */
60510aed
PM
623static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
624 TCGv_i64 tcg_addr, int size, int memidx)
625{
626 g_assert(size <= 3);
627 tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
628}
629
4a08d475
PM
630static void do_gpr_st(DisasContext *s, TCGv_i64 source,
631 TCGv_i64 tcg_addr, int size)
632{
60510aed 633 do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
4a08d475
PM
634}
635
636/*
637 * Load from memory to GPR register
638 */
60510aed
PM
639static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
640 int size, bool is_signed, bool extend, int memidx)
4a08d475
PM
641{
642 TCGMemOp memop = MO_TE + size;
643
644 g_assert(size <= 3);
645
646 if (is_signed) {
647 memop += MO_SIGN;
648 }
649
60510aed 650 tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
4a08d475
PM
651
652 if (extend && is_signed) {
653 g_assert(size < 3);
654 tcg_gen_ext32u_i64(dest, dest);
655 }
656}
657
60510aed
PM
658static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
659 int size, bool is_signed, bool extend)
660{
661 do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
662 get_mem_index(s));
663}
664
4a08d475
PM
665/*
666 * Store from FP register to memory
667 */
668static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
669{
670 /* This writes the bottom N bits of a 128 bit wide vector to memory */
4a08d475 671 TCGv_i64 tmp = tcg_temp_new_i64();
e2f90565 672 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(srcidx, MO_64));
4a08d475 673 if (size < 4) {
4a08d475
PM
674 tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
675 } else {
676 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
4a08d475
PM
677 tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
678 tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
e2f90565 679 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(srcidx));
4a08d475
PM
680 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
681 tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
682 tcg_temp_free_i64(tcg_hiaddr);
683 }
684
685 tcg_temp_free_i64(tmp);
686}
687
688/*
689 * Load from memory to FP register
690 */
691static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
692{
693 /* This always zero-extends and writes to a full 128 bit wide vector */
4a08d475
PM
694 TCGv_i64 tmplo = tcg_temp_new_i64();
695 TCGv_i64 tmphi;
696
697 if (size < 4) {
698 TCGMemOp memop = MO_TE + size;
699 tmphi = tcg_const_i64(0);
700 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
701 } else {
702 TCGv_i64 tcg_hiaddr;
703 tmphi = tcg_temp_new_i64();
704 tcg_hiaddr = tcg_temp_new_i64();
705
706 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
707 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
708 tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
709 tcg_temp_free_i64(tcg_hiaddr);
710 }
711
e2f90565
PM
712 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(destidx, MO_64));
713 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(destidx));
4a08d475
PM
714
715 tcg_temp_free_i64(tmplo);
716 tcg_temp_free_i64(tmphi);
717}
718
72430bf5
AB
719/*
720 * Vector load/store helpers.
721 *
722 * The principal difference between this and a FP load is that we don't
723 * zero extend as we are filling a partial chunk of the vector register.
724 * These functions don't support 128 bit loads/stores, which would be
725 * normal load/store operations.
a08582f4
PM
726 *
727 * The _i32 versions are useful when operating on 32 bit quantities
728 * (eg for floating point single or using Neon helper functions).
72430bf5
AB
729 */
730
731/* Get value of an element within a vector register */
732static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
733 int element, TCGMemOp memop)
734{
735 int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
736 switch (memop) {
737 case MO_8:
738 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
739 break;
740 case MO_16:
741 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
742 break;
743 case MO_32:
744 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
745 break;
746 case MO_8|MO_SIGN:
747 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
748 break;
749 case MO_16|MO_SIGN:
750 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
751 break;
752 case MO_32|MO_SIGN:
753 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
754 break;
755 case MO_64:
756 case MO_64|MO_SIGN:
757 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
758 break;
759 default:
760 g_assert_not_reached();
761 }
762}
763
a08582f4
PM
764static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
765 int element, TCGMemOp memop)
766{
767 int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
768 switch (memop) {
769 case MO_8:
770 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
771 break;
772 case MO_16:
773 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
774 break;
775 case MO_8|MO_SIGN:
776 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
777 break;
778 case MO_16|MO_SIGN:
779 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
780 break;
781 case MO_32:
782 case MO_32|MO_SIGN:
783 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
784 break;
785 default:
786 g_assert_not_reached();
787 }
788}
789
72430bf5
AB
790/* Set value of an element within a vector register */
791static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
792 int element, TCGMemOp memop)
793{
794 int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
795 switch (memop) {
796 case MO_8:
797 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
798 break;
799 case MO_16:
800 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
801 break;
802 case MO_32:
803 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
804 break;
805 case MO_64:
806 tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
807 break;
808 default:
809 g_assert_not_reached();
810 }
811}
812
1f8a73af
PM
813static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
814 int destidx, int element, TCGMemOp memop)
815{
816 int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
817 switch (memop) {
818 case MO_8:
819 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
820 break;
821 case MO_16:
822 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
823 break;
824 case MO_32:
825 tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
826 break;
827 default:
828 g_assert_not_reached();
829 }
830}
831
72430bf5
AB
832/* Clear the high 64 bits of a 128 bit vector (in general non-quad
833 * vector ops all need to do this).
834 */
835static void clear_vec_high(DisasContext *s, int rd)
836{
837 TCGv_i64 tcg_zero = tcg_const_i64(0);
838
839 write_vec_element(s, tcg_zero, rd, 1, MO_64);
840 tcg_temp_free_i64(tcg_zero);
841}
842
843/* Store from vector register to memory */
844static void do_vec_st(DisasContext *s, int srcidx, int element,
845 TCGv_i64 tcg_addr, int size)
846{
847 TCGMemOp memop = MO_TE + size;
848 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
849
850 read_vec_element(s, tcg_tmp, srcidx, element, size);
851 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
852
853 tcg_temp_free_i64(tcg_tmp);
854}
855
856/* Load from memory to vector register */
857static void do_vec_ld(DisasContext *s, int destidx, int element,
858 TCGv_i64 tcg_addr, int size)
859{
860 TCGMemOp memop = MO_TE + size;
861 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
862
863 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
864 write_vec_element(s, tcg_tmp, destidx, element, size);
865
866 tcg_temp_free_i64(tcg_tmp);
867}
868
229b7a05
AB
869/*
870 * This utility function is for doing register extension with an
871 * optional shift. You will likely want to pass a temporary for the
872 * destination register. See DecodeRegExtend() in the ARM ARM.
873 */
874static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
875 int option, unsigned int shift)
876{
877 int extsize = extract32(option, 0, 2);
878 bool is_signed = extract32(option, 2, 1);
879
880 if (is_signed) {
881 switch (extsize) {
882 case 0:
883 tcg_gen_ext8s_i64(tcg_out, tcg_in);
884 break;
885 case 1:
886 tcg_gen_ext16s_i64(tcg_out, tcg_in);
887 break;
888 case 2:
889 tcg_gen_ext32s_i64(tcg_out, tcg_in);
890 break;
891 case 3:
892 tcg_gen_mov_i64(tcg_out, tcg_in);
893 break;
894 }
895 } else {
896 switch (extsize) {
897 case 0:
898 tcg_gen_ext8u_i64(tcg_out, tcg_in);
899 break;
900 case 1:
901 tcg_gen_ext16u_i64(tcg_out, tcg_in);
902 break;
903 case 2:
904 tcg_gen_ext32u_i64(tcg_out, tcg_in);
905 break;
906 case 3:
907 tcg_gen_mov_i64(tcg_out, tcg_in);
908 break;
909 }
910 }
911
912 if (shift) {
913 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
914 }
915}
916
4a08d475
PM
917static inline void gen_check_sp_alignment(DisasContext *s)
918{
919 /* The AArch64 architecture mandates that (if enabled via PSTATE
920 * or SCTLR bits) there is a check that SP is 16-aligned on every
921 * SP-relative load or store (with an exception generated if it is not).
922 * In line with general QEMU practice regarding misaligned accesses,
923 * we omit these checks for the sake of guest program performance.
924 * This function is provided as a hook so we can more easily add these
925 * checks in future (possibly as a "favour catching guest program bugs
926 * over speed" user selectable option).
927 */
928}
929
384b26fb
AB
930/*
931 * This provides a simple table based table lookup decoder. It is
932 * intended to be used when the relevant bits for decode are too
933 * awkwardly placed and switch/if based logic would be confusing and
934 * deeply nested. Since it's a linear search through the table, tables
935 * should be kept small.
936 *
937 * It returns the first handler where insn & mask == pattern, or
938 * NULL if there is no match.
939 * The table is terminated by an empty mask (i.e. 0)
940 */
941static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
942 uint32_t insn)
943{
944 const AArch64DecodeTable *tptr = table;
945
946 while (tptr->mask) {
947 if ((insn & tptr->mask) == tptr->pattern) {
948 return tptr->disas_fn;
949 }
950 tptr++;
951 }
952 return NULL;
953}
954
ad7ee8a2
CF
955/*
956 * the instruction disassembly implemented here matches
957 * the instruction encoding classifications in chapter 3 (C3)
958 * of the ARM Architecture Reference Manual (DDI0487A_a)
959 */
960
11e169de
AG
961/* C3.2.7 Unconditional branch (immediate)
962 * 31 30 26 25 0
963 * +----+-----------+-------------------------------------+
964 * | op | 0 0 1 0 1 | imm26 |
965 * +----+-----------+-------------------------------------+
966 */
ad7ee8a2
CF
967static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
968{
11e169de
AG
969 uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
970
971 if (insn & (1 << 31)) {
972 /* C5.6.26 BL Branch with link */
973 tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
974 }
975
976 /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
977 gen_goto_tb(s, 0, addr);
ad7ee8a2
CF
978}
979
60e53388
AG
980/* C3.2.1 Compare & branch (immediate)
981 * 31 30 25 24 23 5 4 0
982 * +----+-------------+----+---------------------+--------+
983 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt |
984 * +----+-------------+----+---------------------+--------+
985 */
ad7ee8a2
CF
986static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
987{
60e53388
AG
988 unsigned int sf, op, rt;
989 uint64_t addr;
990 int label_match;
991 TCGv_i64 tcg_cmp;
992
993 sf = extract32(insn, 31, 1);
994 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
995 rt = extract32(insn, 0, 5);
996 addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
997
998 tcg_cmp = read_cpu_reg(s, rt, sf);
999 label_match = gen_new_label();
1000
1001 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1002 tcg_cmp, 0, label_match);
1003
1004 gen_goto_tb(s, 0, s->pc);
1005 gen_set_label(label_match);
1006 gen_goto_tb(s, 1, addr);
ad7ee8a2
CF
1007}
1008
db0f7958
AG
1009/* C3.2.5 Test & branch (immediate)
1010 * 31 30 25 24 23 19 18 5 4 0
1011 * +----+-------------+----+-------+-------------+------+
1012 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt |
1013 * +----+-------------+----+-------+-------------+------+
1014 */
ad7ee8a2
CF
1015static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1016{
db0f7958
AG
1017 unsigned int bit_pos, op, rt;
1018 uint64_t addr;
1019 int label_match;
1020 TCGv_i64 tcg_cmp;
1021
1022 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1023 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1024 addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1025 rt = extract32(insn, 0, 5);
1026
1027 tcg_cmp = tcg_temp_new_i64();
1028 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1029 label_match = gen_new_label();
1030 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1031 tcg_cmp, 0, label_match);
1032 tcg_temp_free_i64(tcg_cmp);
1033 gen_goto_tb(s, 0, s->pc);
1034 gen_set_label(label_match);
1035 gen_goto_tb(s, 1, addr);
ad7ee8a2
CF
1036}
1037
39fb730a
AG
1038/* C3.2.2 / C5.6.19 Conditional branch (immediate)
1039 * 31 25 24 23 5 4 3 0
1040 * +---------------+----+---------------------+----+------+
1041 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond |
1042 * +---------------+----+---------------------+----+------+
1043 */
ad7ee8a2
CF
1044static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1045{
39fb730a
AG
1046 unsigned int cond;
1047 uint64_t addr;
1048
1049 if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1050 unallocated_encoding(s);
1051 return;
1052 }
1053 addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1054 cond = extract32(insn, 0, 4);
1055
1056 if (cond < 0x0e) {
1057 /* genuinely conditional branches */
1058 int label_match = gen_new_label();
1059 arm_gen_test_cc(cond, label_match);
1060 gen_goto_tb(s, 0, s->pc);
1061 gen_set_label(label_match);
1062 gen_goto_tb(s, 1, addr);
1063 } else {
1064 /* 0xe and 0xf are both "always" conditions */
1065 gen_goto_tb(s, 0, addr);
1066 }
ad7ee8a2
CF
1067}
1068
87462e0f
CF
1069/* C5.6.68 HINT */
1070static void handle_hint(DisasContext *s, uint32_t insn,
1071 unsigned int op1, unsigned int op2, unsigned int crm)
1072{
1073 unsigned int selector = crm << 3 | op2;
1074
1075 if (op1 != 3) {
1076 unallocated_encoding(s);
1077 return;
1078 }
1079
1080 switch (selector) {
1081 case 0: /* NOP */
1082 return;
1083 case 1: /* YIELD */
1084 case 2: /* WFE */
1085 case 3: /* WFI */
1086 case 4: /* SEV */
1087 case 5: /* SEVL */
1088 /* we treat all as NOP at least for now */
1089 return;
1090 default:
1091 /* default specified as NOP equivalent */
1092 return;
1093 }
1094}
1095
fa2ef212
MM
1096static void gen_clrex(DisasContext *s, uint32_t insn)
1097{
1098 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1099}
1100
87462e0f
CF
1101/* CLREX, DSB, DMB, ISB */
1102static void handle_sync(DisasContext *s, uint32_t insn,
1103 unsigned int op1, unsigned int op2, unsigned int crm)
1104{
1105 if (op1 != 3) {
1106 unallocated_encoding(s);
1107 return;
1108 }
1109
1110 switch (op2) {
1111 case 2: /* CLREX */
fa2ef212 1112 gen_clrex(s, insn);
87462e0f
CF
1113 return;
1114 case 4: /* DSB */
1115 case 5: /* DMB */
1116 case 6: /* ISB */
1117 /* We don't emulate caches so barriers are no-ops */
1118 return;
1119 default:
1120 unallocated_encoding(s);
1121 return;
1122 }
1123}
1124
1125/* C5.6.130 MSR (immediate) - move immediate to processor state field */
1126static void handle_msr_i(DisasContext *s, uint32_t insn,
1127 unsigned int op1, unsigned int op2, unsigned int crm)
1128{
1129 unsupported_encoding(s, insn);
1130}
1131
b0d2b7d0
PM
1132static void gen_get_nzcv(TCGv_i64 tcg_rt)
1133{
1134 TCGv_i32 tmp = tcg_temp_new_i32();
1135 TCGv_i32 nzcv = tcg_temp_new_i32();
1136
1137 /* build bit 31, N */
1138 tcg_gen_andi_i32(nzcv, cpu_NF, (1 << 31));
1139 /* build bit 30, Z */
1140 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1141 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1142 /* build bit 29, C */
1143 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1144 /* build bit 28, V */
1145 tcg_gen_shri_i32(tmp, cpu_VF, 31);
1146 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1147 /* generate result */
1148 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1149
1150 tcg_temp_free_i32(nzcv);
1151 tcg_temp_free_i32(tmp);
1152}
1153
1154static void gen_set_nzcv(TCGv_i64 tcg_rt)
1155
1156{
1157 TCGv_i32 nzcv = tcg_temp_new_i32();
1158
1159 /* take NZCV from R[t] */
1160 tcg_gen_trunc_i64_i32(nzcv, tcg_rt);
1161
1162 /* bit 31, N */
1163 tcg_gen_andi_i32(cpu_NF, nzcv, (1 << 31));
1164 /* bit 30, Z */
1165 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1166 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1167 /* bit 29, C */
1168 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1169 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1170 /* bit 28, V */
1171 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1172 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1173 tcg_temp_free_i32(nzcv);
1174}
1175
fea50522
PM
1176/* C5.6.129 MRS - move from system register
1177 * C5.6.131 MSR (register) - move to system register
1178 * C5.6.204 SYS
1179 * C5.6.205 SYSL
1180 * These are all essentially the same insn in 'read' and 'write'
1181 * versions, with varying op0 fields.
1182 */
1183static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1184 unsigned int op0, unsigned int op1, unsigned int op2,
87462e0f
CF
1185 unsigned int crn, unsigned int crm, unsigned int rt)
1186{
fea50522
PM
1187 const ARMCPRegInfo *ri;
1188 TCGv_i64 tcg_rt;
87462e0f 1189
fea50522
PM
1190 ri = get_arm_cp_reginfo(s->cp_regs,
1191 ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1192 crn, crm, op0, op1, op2));
87462e0f 1193
fea50522 1194 if (!ri) {
626187d8
PM
1195 /* Unknown register; this might be a guest error or a QEMU
1196 * unimplemented feature.
1197 */
1198 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1199 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1200 isread ? "read" : "write", op0, op1, crn, crm, op2);
fea50522
PM
1201 unallocated_encoding(s);
1202 return;
1203 }
1204
1205 /* Check access permissions */
1206 if (!cp_access_ok(s->current_pl, ri, isread)) {
1207 unallocated_encoding(s);
1208 return;
1209 }
1210
f59df3f2
PM
1211 if (ri->accessfn) {
1212 /* Emit code to perform further access permissions checks at
1213 * runtime; this may result in an exception.
1214 */
1215 TCGv_ptr tmpptr;
1216 gen_a64_set_pc_im(s->pc - 4);
1217 tmpptr = tcg_const_ptr(ri);
1218 gen_helper_access_check_cp_reg(cpu_env, tmpptr);
1219 tcg_temp_free_ptr(tmpptr);
1220 }
1221
fea50522
PM
1222 /* Handle special cases first */
1223 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1224 case ARM_CP_NOP:
1225 return;
b0d2b7d0
PM
1226 case ARM_CP_NZCV:
1227 tcg_rt = cpu_reg(s, rt);
1228 if (isread) {
1229 gen_get_nzcv(tcg_rt);
1230 } else {
1231 gen_set_nzcv(tcg_rt);
1232 }
1233 return;
0eef9d98
PM
1234 case ARM_CP_CURRENTEL:
1235 /* Reads as current EL value from pstate, which is
1236 * guaranteed to be constant by the tb flags.
1237 */
1238 tcg_rt = cpu_reg(s, rt);
1239 tcg_gen_movi_i64(tcg_rt, s->current_pl << 2);
1240 return;
fea50522
PM
1241 default:
1242 break;
1243 }
1244
1245 if (use_icount && (ri->type & ARM_CP_IO)) {
1246 gen_io_start();
1247 }
1248
1249 tcg_rt = cpu_reg(s, rt);
1250
1251 if (isread) {
1252 if (ri->type & ARM_CP_CONST) {
1253 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1254 } else if (ri->readfn) {
1255 TCGv_ptr tmpptr;
fea50522
PM
1256 tmpptr = tcg_const_ptr(ri);
1257 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1258 tcg_temp_free_ptr(tmpptr);
1259 } else {
1260 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1261 }
1262 } else {
1263 if (ri->type & ARM_CP_CONST) {
1264 /* If not forbidden by access permissions, treat as WI */
1265 return;
1266 } else if (ri->writefn) {
1267 TCGv_ptr tmpptr;
fea50522
PM
1268 tmpptr = tcg_const_ptr(ri);
1269 gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1270 tcg_temp_free_ptr(tmpptr);
1271 } else {
1272 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1273 }
1274 }
1275
1276 if (use_icount && (ri->type & ARM_CP_IO)) {
1277 /* I/O operations must end the TB here (whether read or write) */
1278 gen_io_end();
1279 s->is_jmp = DISAS_UPDATE;
1280 } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1281 /* We default to ending the TB on a coprocessor register write,
1282 * but allow this to be suppressed by the register definition
1283 * (usually only necessary to work around guest bugs).
1284 */
1285 s->is_jmp = DISAS_UPDATE;
1286 }
ad7ee8a2
CF
1287}
1288
87462e0f
CF
1289/* C3.2.4 System
1290 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0
1291 * +---------------------+---+-----+-----+-------+-------+-----+------+
1292 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt |
1293 * +---------------------+---+-----+-----+-------+-------+-----+------+
1294 */
1295static void disas_system(DisasContext *s, uint32_t insn)
1296{
1297 unsigned int l, op0, op1, crn, crm, op2, rt;
1298 l = extract32(insn, 21, 1);
1299 op0 = extract32(insn, 19, 2);
1300 op1 = extract32(insn, 16, 3);
1301 crn = extract32(insn, 12, 4);
1302 crm = extract32(insn, 8, 4);
1303 op2 = extract32(insn, 5, 3);
1304 rt = extract32(insn, 0, 5);
1305
1306 if (op0 == 0) {
1307 if (l || rt != 31) {
1308 unallocated_encoding(s);
1309 return;
1310 }
1311 switch (crn) {
1312 case 2: /* C5.6.68 HINT */
1313 handle_hint(s, insn, op1, op2, crm);
1314 break;
1315 case 3: /* CLREX, DSB, DMB, ISB */
1316 handle_sync(s, insn, op1, op2, crm);
1317 break;
1318 case 4: /* C5.6.130 MSR (immediate) */
1319 handle_msr_i(s, insn, op1, op2, crm);
1320 break;
1321 default:
1322 unallocated_encoding(s);
1323 break;
1324 }
1325 return;
1326 }
fea50522 1327 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
87462e0f
CF
1328}
1329
9618e809
AG
1330/* C3.2.3 Exception generation
1331 *
1332 * 31 24 23 21 20 5 4 2 1 0
1333 * +-----------------+-----+------------------------+-----+----+
1334 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL |
1335 * +-----------------------+------------------------+----------+
1336 */
ad7ee8a2
CF
1337static void disas_exc(DisasContext *s, uint32_t insn)
1338{
9618e809
AG
1339 int opc = extract32(insn, 21, 3);
1340 int op2_ll = extract32(insn, 0, 5);
1341
1342 switch (opc) {
1343 case 0:
1344 /* SVC, HVC, SMC; since we don't support the Virtualization
1345 * or TrustZone extensions these all UNDEF except SVC.
1346 */
1347 if (op2_ll != 1) {
1348 unallocated_encoding(s);
1349 break;
1350 }
1351 gen_exception_insn(s, 0, EXCP_SWI);
1352 break;
1353 case 1:
1354 if (op2_ll != 0) {
1355 unallocated_encoding(s);
1356 break;
1357 }
1358 /* BRK */
1359 gen_exception_insn(s, 0, EXCP_BKPT);
1360 break;
1361 case 2:
1362 if (op2_ll != 0) {
1363 unallocated_encoding(s);
1364 break;
1365 }
1366 /* HLT */
1367 unsupported_encoding(s, insn);
1368 break;
1369 case 5:
1370 if (op2_ll < 1 || op2_ll > 3) {
1371 unallocated_encoding(s);
1372 break;
1373 }
1374 /* DCPS1, DCPS2, DCPS3 */
1375 unsupported_encoding(s, insn);
1376 break;
1377 default:
1378 unallocated_encoding(s);
1379 break;
1380 }
ad7ee8a2
CF
1381}
1382
b001c8c3
AG
1383/* C3.2.7 Unconditional branch (register)
1384 * 31 25 24 21 20 16 15 10 9 5 4 0
1385 * +---------------+-------+-------+-------+------+-------+
1386 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 |
1387 * +---------------+-------+-------+-------+------+-------+
1388 */
ad7ee8a2
CF
1389static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1390{
b001c8c3
AG
1391 unsigned int opc, op2, op3, rn, op4;
1392
1393 opc = extract32(insn, 21, 4);
1394 op2 = extract32(insn, 16, 5);
1395 op3 = extract32(insn, 10, 6);
1396 rn = extract32(insn, 5, 5);
1397 op4 = extract32(insn, 0, 5);
1398
1399 if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1400 unallocated_encoding(s);
1401 return;
1402 }
1403
1404 switch (opc) {
1405 case 0: /* BR */
1406 case 2: /* RET */
1407 break;
1408 case 1: /* BLR */
1409 tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1410 break;
1411 case 4: /* ERET */
1412 case 5: /* DRPS */
1413 if (rn != 0x1f) {
1414 unallocated_encoding(s);
1415 } else {
1416 unsupported_encoding(s, insn);
1417 }
1418 return;
1419 default:
1420 unallocated_encoding(s);
1421 return;
1422 }
1423
1424 tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1425 s->is_jmp = DISAS_JUMP;
ad7ee8a2
CF
1426}
1427
1428/* C3.2 Branches, exception generating and system instructions */
1429static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1430{
1431 switch (extract32(insn, 25, 7)) {
1432 case 0x0a: case 0x0b:
1433 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1434 disas_uncond_b_imm(s, insn);
1435 break;
1436 case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1437 disas_comp_b_imm(s, insn);
1438 break;
1439 case 0x1b: case 0x5b: /* Test & branch (immediate) */
1440 disas_test_b_imm(s, insn);
1441 break;
1442 case 0x2a: /* Conditional branch (immediate) */
1443 disas_cond_b_imm(s, insn);
1444 break;
1445 case 0x6a: /* Exception generation / System */
1446 if (insn & (1 << 24)) {
1447 disas_system(s, insn);
1448 } else {
1449 disas_exc(s, insn);
1450 }
1451 break;
1452 case 0x6b: /* Unconditional branch (register) */
1453 disas_uncond_b_reg(s, insn);
1454 break;
1455 default:
1456 unallocated_encoding(s);
1457 break;
1458 }
1459}
1460
fa2ef212
MM
1461/*
1462 * Load/Store exclusive instructions are implemented by remembering
1463 * the value/address loaded, and seeing if these are the same
1464 * when the store is performed. This is not actually the architecturally
1465 * mandated semantics, but it works for typical guest code sequences
1466 * and avoids having to monitor regular stores.
1467 *
1468 * In system emulation mode only one CPU will be running at once, so
1469 * this sequence is effectively atomic. In user emulation mode we
1470 * throw an exception and handle the atomic operation elsewhere.
1471 */
1472static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1473 TCGv_i64 addr, int size, bool is_pair)
1474{
1475 TCGv_i64 tmp = tcg_temp_new_i64();
1476 TCGMemOp memop = MO_TE + size;
1477
1478 g_assert(size <= 3);
1479 tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1480
1481 if (is_pair) {
1482 TCGv_i64 addr2 = tcg_temp_new_i64();
1483 TCGv_i64 hitmp = tcg_temp_new_i64();
1484
1485 g_assert(size >= 2);
1486 tcg_gen_addi_i64(addr2, addr, 1 << size);
1487 tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1488 tcg_temp_free_i64(addr2);
1489 tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1490 tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1491 tcg_temp_free_i64(hitmp);
1492 }
1493
1494 tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1495 tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1496
1497 tcg_temp_free_i64(tmp);
1498 tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1499}
1500
1501#ifdef CONFIG_USER_ONLY
1502static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1503 TCGv_i64 addr, int size, int is_pair)
1504{
1505 tcg_gen_mov_i64(cpu_exclusive_test, addr);
1506 tcg_gen_movi_i32(cpu_exclusive_info,
1507 size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1508 gen_exception_insn(s, 4, EXCP_STREX);
1509}
1510#else
1511static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
d324b36a 1512 TCGv_i64 inaddr, int size, int is_pair)
fa2ef212 1513{
d324b36a
PM
1514 /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1515 * && (!is_pair || env->exclusive_high == [addr + datasize])) {
1516 * [addr] = {Rt};
1517 * if (is_pair) {
1518 * [addr + datasize] = {Rt2};
1519 * }
1520 * {Rd} = 0;
1521 * } else {
1522 * {Rd} = 1;
1523 * }
1524 * env->exclusive_addr = -1;
1525 */
1526 int fail_label = gen_new_label();
1527 int done_label = gen_new_label();
1528 TCGv_i64 addr = tcg_temp_local_new_i64();
1529 TCGv_i64 tmp;
1530
1531 /* Copy input into a local temp so it is not trashed when the
1532 * basic block ends at the branch insn.
1533 */
1534 tcg_gen_mov_i64(addr, inaddr);
1535 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1536
1537 tmp = tcg_temp_new_i64();
1538 tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size);
1539 tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1540 tcg_temp_free_i64(tmp);
1541
1542 if (is_pair) {
1543 TCGv_i64 addrhi = tcg_temp_new_i64();
1544 TCGv_i64 tmphi = tcg_temp_new_i64();
1545
1546 tcg_gen_addi_i64(addrhi, addr, 1 << size);
1547 tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size);
1548 tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1549
1550 tcg_temp_free_i64(tmphi);
1551 tcg_temp_free_i64(addrhi);
1552 }
1553
1554 /* We seem to still have the exclusive monitor, so do the store */
1555 tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
1556 if (is_pair) {
1557 TCGv_i64 addrhi = tcg_temp_new_i64();
1558
1559 tcg_gen_addi_i64(addrhi, addr, 1 << size);
1560 tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1561 get_mem_index(s), MO_TE + size);
1562 tcg_temp_free_i64(addrhi);
1563 }
1564
1565 tcg_temp_free_i64(addr);
1566
1567 tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1568 tcg_gen_br(done_label);
1569 gen_set_label(fail_label);
1570 tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1571 gen_set_label(done_label);
1572 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1573
fa2ef212
MM
1574}
1575#endif
1576
1577/* C3.3.6 Load/store exclusive
1578 *
1579 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0
1580 * +-----+-------------+----+---+----+------+----+-------+------+------+
1581 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt |
1582 * +-----+-------------+----+---+----+------+----+-------+------+------+
1583 *
1584 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1585 * L: 0 -> store, 1 -> load
1586 * o2: 0 -> exclusive, 1 -> not
1587 * o1: 0 -> single register, 1 -> register pair
1588 * o0: 1 -> load-acquire/store-release, 0 -> not
1589 *
1590 * o0 == 0 AND o2 == 1 is un-allocated
1591 * o1 == 1 is un-allocated except for 32 and 64 bit sizes
1592 */
ad7ee8a2
CF
1593static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1594{
fa2ef212
MM
1595 int rt = extract32(insn, 0, 5);
1596 int rn = extract32(insn, 5, 5);
1597 int rt2 = extract32(insn, 10, 5);
1598 int is_lasr = extract32(insn, 15, 1);
1599 int rs = extract32(insn, 16, 5);
1600 int is_pair = extract32(insn, 21, 1);
1601 int is_store = !extract32(insn, 22, 1);
1602 int is_excl = !extract32(insn, 23, 1);
1603 int size = extract32(insn, 30, 2);
1604 TCGv_i64 tcg_addr;
1605
1606 if ((!is_excl && !is_lasr) ||
1607 (is_pair && size < 2)) {
1608 unallocated_encoding(s);
1609 return;
1610 }
1611
1612 if (rn == 31) {
1613 gen_check_sp_alignment(s);
1614 }
1615 tcg_addr = read_cpu_reg_sp(s, rn, 1);
1616
1617 /* Note that since TCG is single threaded load-acquire/store-release
1618 * semantics require no extra if (is_lasr) { ... } handling.
1619 */
1620
1621 if (is_excl) {
1622 if (!is_store) {
1623 gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1624 } else {
1625 gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1626 }
1627 } else {
1628 TCGv_i64 tcg_rt = cpu_reg(s, rt);
1629 if (is_store) {
1630 do_gpr_st(s, tcg_rt, tcg_addr, size);
1631 } else {
1632 do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1633 }
1634 if (is_pair) {
1635 TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
1636 tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1637 if (is_store) {
1638 do_gpr_st(s, tcg_rt2, tcg_addr, size);
1639 } else {
1640 do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
1641 }
1642 }
1643 }
ad7ee8a2
CF
1644}
1645
32b64e86
AG
1646/*
1647 * C3.3.5 Load register (literal)
1648 *
1649 * 31 30 29 27 26 25 24 23 5 4 0
1650 * +-----+-------+---+-----+-------------------+-------+
1651 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt |
1652 * +-----+-------+---+-----+-------------------+-------+
1653 *
1654 * V: 1 -> vector (simd/fp)
1655 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1656 * 10-> 32 bit signed, 11 -> prefetch
1657 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1658 */
ad7ee8a2
CF
1659static void disas_ld_lit(DisasContext *s, uint32_t insn)
1660{
32b64e86
AG
1661 int rt = extract32(insn, 0, 5);
1662 int64_t imm = sextract32(insn, 5, 19) << 2;
1663 bool is_vector = extract32(insn, 26, 1);
1664 int opc = extract32(insn, 30, 2);
1665 bool is_signed = false;
1666 int size = 2;
1667 TCGv_i64 tcg_rt, tcg_addr;
1668
1669 if (is_vector) {
1670 if (opc == 3) {
1671 unallocated_encoding(s);
1672 return;
1673 }
1674 size = 2 + opc;
1675 } else {
1676 if (opc == 3) {
1677 /* PRFM (literal) : prefetch */
1678 return;
1679 }
1680 size = 2 + extract32(opc, 0, 1);
1681 is_signed = extract32(opc, 1, 1);
1682 }
1683
1684 tcg_rt = cpu_reg(s, rt);
1685
1686 tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1687 if (is_vector) {
1688 do_fp_ld(s, rt, tcg_addr, size);
1689 } else {
1690 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1691 }
1692 tcg_temp_free_i64(tcg_addr);
ad7ee8a2
CF
1693}
1694
4a08d475
PM
1695/*
1696 * C5.6.80 LDNP (Load Pair - non-temporal hint)
1697 * C5.6.81 LDP (Load Pair - non vector)
1698 * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1699 * C5.6.176 STNP (Store Pair - non-temporal hint)
1700 * C5.6.177 STP (Store Pair - non vector)
1701 * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1702 * C6.3.165 LDP (Load Pair of SIMD&FP)
1703 * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1704 * C6.3.284 STP (Store Pair of SIMD&FP)
1705 *
1706 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0
1707 * +-----+-------+---+---+-------+---+-----------------------------+
1708 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt |
1709 * +-----+-------+---+---+-------+---+-------+-------+------+------+
1710 *
1711 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit
1712 * LDPSW 01
1713 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1714 * V: 0 -> GPR, 1 -> Vector
1715 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1716 * 10 -> signed offset, 11 -> pre-index
1717 * L: 0 -> Store 1 -> Load
1718 *
1719 * Rt, Rt2 = GPR or SIMD registers to be stored
1720 * Rn = general purpose register containing address
1721 * imm7 = signed offset (multiple of 4 or 8 depending on size)
1722 */
ad7ee8a2
CF
1723static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1724{
4a08d475
PM
1725 int rt = extract32(insn, 0, 5);
1726 int rn = extract32(insn, 5, 5);
1727 int rt2 = extract32(insn, 10, 5);
1728 int64_t offset = sextract32(insn, 15, 7);
1729 int index = extract32(insn, 23, 2);
1730 bool is_vector = extract32(insn, 26, 1);
1731 bool is_load = extract32(insn, 22, 1);
1732 int opc = extract32(insn, 30, 2);
1733
1734 bool is_signed = false;
1735 bool postindex = false;
1736 bool wback = false;
1737
1738 TCGv_i64 tcg_addr; /* calculated address */
1739 int size;
1740
1741 if (opc == 3) {
1742 unallocated_encoding(s);
1743 return;
1744 }
1745
1746 if (is_vector) {
1747 size = 2 + opc;
1748 } else {
1749 size = 2 + extract32(opc, 1, 1);
1750 is_signed = extract32(opc, 0, 1);
1751 if (!is_load && is_signed) {
1752 unallocated_encoding(s);
1753 return;
1754 }
1755 }
1756
1757 switch (index) {
1758 case 1: /* post-index */
1759 postindex = true;
1760 wback = true;
1761 break;
1762 case 0:
1763 /* signed offset with "non-temporal" hint. Since we don't emulate
1764 * caches we don't care about hints to the cache system about
1765 * data access patterns, and handle this identically to plain
1766 * signed offset.
1767 */
1768 if (is_signed) {
1769 /* There is no non-temporal-hint version of LDPSW */
1770 unallocated_encoding(s);
1771 return;
1772 }
1773 postindex = false;
1774 break;
1775 case 2: /* signed offset, rn not updated */
1776 postindex = false;
1777 break;
1778 case 3: /* pre-index */
1779 postindex = false;
1780 wback = true;
1781 break;
1782 }
1783
1784 offset <<= size;
1785
1786 if (rn == 31) {
1787 gen_check_sp_alignment(s);
1788 }
1789
1790 tcg_addr = read_cpu_reg_sp(s, rn, 1);
1791
1792 if (!postindex) {
1793 tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
1794 }
1795
1796 if (is_vector) {
1797 if (is_load) {
1798 do_fp_ld(s, rt, tcg_addr, size);
1799 } else {
1800 do_fp_st(s, rt, tcg_addr, size);
1801 }
1802 } else {
1803 TCGv_i64 tcg_rt = cpu_reg(s, rt);
1804 if (is_load) {
1805 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1806 } else {
1807 do_gpr_st(s, tcg_rt, tcg_addr, size);
1808 }
1809 }
1810 tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1811 if (is_vector) {
1812 if (is_load) {
1813 do_fp_ld(s, rt2, tcg_addr, size);
1814 } else {
1815 do_fp_st(s, rt2, tcg_addr, size);
1816 }
1817 } else {
1818 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
1819 if (is_load) {
1820 do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
1821 } else {
1822 do_gpr_st(s, tcg_rt2, tcg_addr, size);
1823 }
1824 }
1825
1826 if (wback) {
1827 if (postindex) {
1828 tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
1829 } else {
1830 tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
1831 }
1832 tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
1833 }
ad7ee8a2
CF
1834}
1835
a5e94a9d
AB
1836/*
1837 * C3.3.8 Load/store (immediate post-indexed)
1838 * C3.3.9 Load/store (immediate pre-indexed)
1839 * C3.3.12 Load/store (unscaled immediate)
1840 *
1841 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0
1842 * +----+-------+---+-----+-----+---+--------+-----+------+------+
1843 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt |
1844 * +----+-------+---+-----+-----+---+--------+-----+------+------+
1845 *
1846 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
60510aed 1847 10 -> unprivileged
a5e94a9d
AB
1848 * V = 0 -> non-vector
1849 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
1850 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1851 */
1852static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
1853{
1854 int rt = extract32(insn, 0, 5);
1855 int rn = extract32(insn, 5, 5);
1856 int imm9 = sextract32(insn, 12, 9);
1857 int opc = extract32(insn, 22, 2);
1858 int size = extract32(insn, 30, 2);
1859 int idx = extract32(insn, 10, 2);
1860 bool is_signed = false;
1861 bool is_store = false;
1862 bool is_extended = false;
60510aed 1863 bool is_unpriv = (idx == 2);
a5e94a9d
AB
1864 bool is_vector = extract32(insn, 26, 1);
1865 bool post_index;
1866 bool writeback;
1867
1868 TCGv_i64 tcg_addr;
1869
1870 if (is_vector) {
1871 size |= (opc & 2) << 1;
60510aed 1872 if (size > 4 || is_unpriv) {
a5e94a9d
AB
1873 unallocated_encoding(s);
1874 return;
1875 }
1876 is_store = ((opc & 1) == 0);
1877 } else {
1878 if (size == 3 && opc == 2) {
1879 /* PRFM - prefetch */
60510aed
PM
1880 if (is_unpriv) {
1881 unallocated_encoding(s);
1882 return;
1883 }
a5e94a9d
AB
1884 return;
1885 }
1886 if (opc == 3 && size > 1) {
1887 unallocated_encoding(s);
1888 return;
1889 }
1890 is_store = (opc == 0);
1891 is_signed = opc & (1<<1);
1892 is_extended = (size < 3) && (opc & 1);
1893 }
1894
1895 switch (idx) {
1896 case 0:
60510aed 1897 case 2:
a5e94a9d
AB
1898 post_index = false;
1899 writeback = false;
1900 break;
1901 case 1:
1902 post_index = true;
1903 writeback = true;
1904 break;
1905 case 3:
1906 post_index = false;
1907 writeback = true;
1908 break;
a5e94a9d
AB
1909 }
1910
1911 if (rn == 31) {
1912 gen_check_sp_alignment(s);
1913 }
1914 tcg_addr = read_cpu_reg_sp(s, rn, 1);
1915
1916 if (!post_index) {
1917 tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
1918 }
1919
1920 if (is_vector) {
1921 if (is_store) {
1922 do_fp_st(s, rt, tcg_addr, size);
1923 } else {
1924 do_fp_ld(s, rt, tcg_addr, size);
1925 }
1926 } else {
1927 TCGv_i64 tcg_rt = cpu_reg(s, rt);
60510aed
PM
1928 int memidx = is_unpriv ? 1 : get_mem_index(s);
1929
a5e94a9d 1930 if (is_store) {
60510aed 1931 do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
a5e94a9d 1932 } else {
60510aed
PM
1933 do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
1934 is_signed, is_extended, memidx);
a5e94a9d
AB
1935 }
1936 }
1937
1938 if (writeback) {
1939 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
1940 if (post_index) {
1941 tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
1942 }
1943 tcg_gen_mov_i64(tcg_rn, tcg_addr);
1944 }
1945}
1946
229b7a05
AB
1947/*
1948 * C3.3.10 Load/store (register offset)
1949 *
1950 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0
1951 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
1952 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt |
1953 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
1954 *
1955 * For non-vector:
1956 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
1957 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1958 * For vector:
1959 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
1960 * opc<0>: 0 -> store, 1 -> load
1961 * V: 1 -> vector/simd
1962 * opt: extend encoding (see DecodeRegExtend)
1963 * S: if S=1 then scale (essentially index by sizeof(size))
1964 * Rt: register to transfer into/out of
1965 * Rn: address register or SP for base
1966 * Rm: offset register or ZR for offset
1967 */
1968static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
1969{
1970 int rt = extract32(insn, 0, 5);
1971 int rn = extract32(insn, 5, 5);
1972 int shift = extract32(insn, 12, 1);
1973 int rm = extract32(insn, 16, 5);
1974 int opc = extract32(insn, 22, 2);
1975 int opt = extract32(insn, 13, 3);
1976 int size = extract32(insn, 30, 2);
1977 bool is_signed = false;
1978 bool is_store = false;
1979 bool is_extended = false;
1980 bool is_vector = extract32(insn, 26, 1);
1981
1982 TCGv_i64 tcg_rm;
1983 TCGv_i64 tcg_addr;
1984
1985 if (extract32(opt, 1, 1) == 0) {
1986 unallocated_encoding(s);
1987 return;
1988 }
1989
1990 if (is_vector) {
1991 size |= (opc & 2) << 1;
1992 if (size > 4) {
1993 unallocated_encoding(s);
1994 return;
1995 }
1996 is_store = !extract32(opc, 0, 1);
1997 } else {
1998 if (size == 3 && opc == 2) {
1999 /* PRFM - prefetch */
2000 return;
2001 }
2002 if (opc == 3 && size > 1) {
2003 unallocated_encoding(s);
2004 return;
2005 }
2006 is_store = (opc == 0);
2007 is_signed = extract32(opc, 1, 1);
2008 is_extended = (size < 3) && extract32(opc, 0, 1);
2009 }
2010
2011 if (rn == 31) {
2012 gen_check_sp_alignment(s);
2013 }
2014 tcg_addr = read_cpu_reg_sp(s, rn, 1);
2015
2016 tcg_rm = read_cpu_reg(s, rm, 1);
2017 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2018
2019 tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2020
2021 if (is_vector) {
2022 if (is_store) {
2023 do_fp_st(s, rt, tcg_addr, size);
2024 } else {
2025 do_fp_ld(s, rt, tcg_addr, size);
2026 }
2027 } else {
2028 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2029 if (is_store) {
2030 do_gpr_st(s, tcg_rt, tcg_addr, size);
2031 } else {
2032 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2033 }
2034 }
2035}
2036
d5612f10
AB
2037/*
2038 * C3.3.13 Load/store (unsigned immediate)
2039 *
2040 * 31 30 29 27 26 25 24 23 22 21 10 9 5
2041 * +----+-------+---+-----+-----+------------+-------+------+
2042 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt |
2043 * +----+-------+---+-----+-----+------------+-------+------+
2044 *
2045 * For non-vector:
2046 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2047 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2048 * For vector:
2049 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2050 * opc<0>: 0 -> store, 1 -> load
2051 * Rn: base address register (inc SP)
2052 * Rt: target register
2053 */
2054static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2055{
2056 int rt = extract32(insn, 0, 5);
2057 int rn = extract32(insn, 5, 5);
2058 unsigned int imm12 = extract32(insn, 10, 12);
2059 bool is_vector = extract32(insn, 26, 1);
2060 int size = extract32(insn, 30, 2);
2061 int opc = extract32(insn, 22, 2);
2062 unsigned int offset;
2063
2064 TCGv_i64 tcg_addr;
2065
2066 bool is_store;
2067 bool is_signed = false;
2068 bool is_extended = false;
2069
2070 if (is_vector) {
2071 size |= (opc & 2) << 1;
2072 if (size > 4) {
2073 unallocated_encoding(s);
2074 return;
2075 }
2076 is_store = !extract32(opc, 0, 1);
2077 } else {
2078 if (size == 3 && opc == 2) {
2079 /* PRFM - prefetch */
2080 return;
2081 }
2082 if (opc == 3 && size > 1) {
2083 unallocated_encoding(s);
2084 return;
2085 }
2086 is_store = (opc == 0);
2087 is_signed = extract32(opc, 1, 1);
2088 is_extended = (size < 3) && extract32(opc, 0, 1);
2089 }
2090
2091 if (rn == 31) {
2092 gen_check_sp_alignment(s);
2093 }
2094 tcg_addr = read_cpu_reg_sp(s, rn, 1);
2095 offset = imm12 << size;
2096 tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2097
2098 if (is_vector) {
2099 if (is_store) {
2100 do_fp_st(s, rt, tcg_addr, size);
2101 } else {
2102 do_fp_ld(s, rt, tcg_addr, size);
2103 }
2104 } else {
2105 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2106 if (is_store) {
2107 do_gpr_st(s, tcg_rt, tcg_addr, size);
2108 } else {
2109 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2110 }
2111 }
2112}
2113
ad7ee8a2
CF
2114/* Load/store register (all forms) */
2115static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2116{
d5612f10
AB
2117 switch (extract32(insn, 24, 2)) {
2118 case 0:
229b7a05
AB
2119 if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2120 disas_ldst_reg_roffset(s, insn);
2121 } else {
60510aed
PM
2122 /* Load/store register (unscaled immediate)
2123 * Load/store immediate pre/post-indexed
2124 * Load/store register unprivileged
2125 */
2126 disas_ldst_reg_imm9(s, insn);
229b7a05 2127 }
d5612f10
AB
2128 break;
2129 case 1:
2130 disas_ldst_reg_unsigned_imm(s, insn);
2131 break;
2132 default:
2133 unallocated_encoding(s);
2134 break;
2135 }
ad7ee8a2
CF
2136}
2137
72430bf5
AB
2138/* C3.3.1 AdvSIMD load/store multiple structures
2139 *
2140 * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0
2141 * +---+---+---------------+---+-------------+--------+------+------+------+
2142 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt |
2143 * +---+---+---------------+---+-------------+--------+------+------+------+
2144 *
2145 * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2146 *
2147 * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0
2148 * +---+---+---------------+---+---+---------+--------+------+------+------+
2149 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt |
2150 * +---+---+---------------+---+---+---------+--------+------+------+------+
2151 *
2152 * Rt: first (or only) SIMD&FP register to be transferred
2153 * Rn: base address or SP
2154 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2155 */
ad7ee8a2
CF
2156static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2157{
72430bf5
AB
2158 int rt = extract32(insn, 0, 5);
2159 int rn = extract32(insn, 5, 5);
2160 int size = extract32(insn, 10, 2);
2161 int opcode = extract32(insn, 12, 4);
2162 bool is_store = !extract32(insn, 22, 1);
2163 bool is_postidx = extract32(insn, 23, 1);
2164 bool is_q = extract32(insn, 30, 1);
2165 TCGv_i64 tcg_addr, tcg_rn;
2166
2167 int ebytes = 1 << size;
2168 int elements = (is_q ? 128 : 64) / (8 << size);
2169 int rpt; /* num iterations */
2170 int selem; /* structure elements */
2171 int r;
2172
2173 if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2174 unallocated_encoding(s);
2175 return;
2176 }
2177
2178 /* From the shared decode logic */
2179 switch (opcode) {
2180 case 0x0:
2181 rpt = 1;
2182 selem = 4;
2183 break;
2184 case 0x2:
2185 rpt = 4;
2186 selem = 1;
2187 break;
2188 case 0x4:
2189 rpt = 1;
2190 selem = 3;
2191 break;
2192 case 0x6:
2193 rpt = 3;
2194 selem = 1;
2195 break;
2196 case 0x7:
2197 rpt = 1;
2198 selem = 1;
2199 break;
2200 case 0x8:
2201 rpt = 1;
2202 selem = 2;
2203 break;
2204 case 0xa:
2205 rpt = 2;
2206 selem = 1;
2207 break;
2208 default:
2209 unallocated_encoding(s);
2210 return;
2211 }
2212
2213 if (size == 3 && !is_q && selem != 1) {
2214 /* reserved */
2215 unallocated_encoding(s);
2216 return;
2217 }
2218
2219 if (rn == 31) {
2220 gen_check_sp_alignment(s);
2221 }
2222
2223 tcg_rn = cpu_reg_sp(s, rn);
2224 tcg_addr = tcg_temp_new_i64();
2225 tcg_gen_mov_i64(tcg_addr, tcg_rn);
2226
2227 for (r = 0; r < rpt; r++) {
2228 int e;
2229 for (e = 0; e < elements; e++) {
2230 int tt = (rt + r) % 32;
2231 int xs;
2232 for (xs = 0; xs < selem; xs++) {
2233 if (is_store) {
2234 do_vec_st(s, tt, e, tcg_addr, size);
2235 } else {
2236 do_vec_ld(s, tt, e, tcg_addr, size);
2237
2238 /* For non-quad operations, setting a slice of the low
2239 * 64 bits of the register clears the high 64 bits (in
2240 * the ARM ARM pseudocode this is implicit in the fact
2241 * that 'rval' is a 64 bit wide variable). We optimize
2242 * by noticing that we only need to do this the first
2243 * time we touch a register.
2244 */
2245 if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2246 clear_vec_high(s, tt);
2247 }
2248 }
2249 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2250 tt = (tt + 1) % 32;
2251 }
2252 }
2253 }
2254
2255 if (is_postidx) {
2256 int rm = extract32(insn, 16, 5);
2257 if (rm == 31) {
2258 tcg_gen_mov_i64(tcg_rn, tcg_addr);
2259 } else {
2260 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2261 }
2262 }
2263 tcg_temp_free_i64(tcg_addr);
ad7ee8a2
CF
2264}
2265
df54e47d
PM
2266/* C3.3.3 AdvSIMD load/store single structure
2267 *
2268 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2269 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2270 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt |
2271 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2272 *
2273 * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2274 *
2275 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2276 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2277 * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt |
2278 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2279 *
2280 * Rt: first (or only) SIMD&FP register to be transferred
2281 * Rn: base address or SP
2282 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2283 * index = encoded in Q:S:size dependent on size
2284 *
2285 * lane_size = encoded in R, opc
2286 * transfer width = encoded in opc, S, size
2287 */
ad7ee8a2
CF
2288static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2289{
df54e47d
PM
2290 int rt = extract32(insn, 0, 5);
2291 int rn = extract32(insn, 5, 5);
2292 int size = extract32(insn, 10, 2);
2293 int S = extract32(insn, 12, 1);
2294 int opc = extract32(insn, 13, 3);
2295 int R = extract32(insn, 21, 1);
2296 int is_load = extract32(insn, 22, 1);
2297 int is_postidx = extract32(insn, 23, 1);
2298 int is_q = extract32(insn, 30, 1);
2299
2300 int scale = extract32(opc, 1, 2);
2301 int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2302 bool replicate = false;
2303 int index = is_q << 3 | S << 2 | size;
2304 int ebytes, xs;
2305 TCGv_i64 tcg_addr, tcg_rn;
2306
2307 switch (scale) {
2308 case 3:
2309 if (!is_load || S) {
2310 unallocated_encoding(s);
2311 return;
2312 }
2313 scale = size;
2314 replicate = true;
2315 break;
2316 case 0:
2317 break;
2318 case 1:
2319 if (extract32(size, 0, 1)) {
2320 unallocated_encoding(s);
2321 return;
2322 }
2323 index >>= 1;
2324 break;
2325 case 2:
2326 if (extract32(size, 1, 1)) {
2327 unallocated_encoding(s);
2328 return;
2329 }
2330 if (!extract32(size, 0, 1)) {
2331 index >>= 2;
2332 } else {
2333 if (S) {
2334 unallocated_encoding(s);
2335 return;
2336 }
2337 index >>= 3;
2338 scale = 3;
2339 }
2340 break;
2341 default:
2342 g_assert_not_reached();
2343 }
2344
2345 ebytes = 1 << scale;
2346
2347 if (rn == 31) {
2348 gen_check_sp_alignment(s);
2349 }
2350
2351 tcg_rn = cpu_reg_sp(s, rn);
2352 tcg_addr = tcg_temp_new_i64();
2353 tcg_gen_mov_i64(tcg_addr, tcg_rn);
2354
2355 for (xs = 0; xs < selem; xs++) {
2356 if (replicate) {
2357 /* Load and replicate to all elements */
2358 uint64_t mulconst;
2359 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2360
2361 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2362 get_mem_index(s), MO_TE + scale);
2363 switch (scale) {
2364 case 0:
2365 mulconst = 0x0101010101010101ULL;
2366 break;
2367 case 1:
2368 mulconst = 0x0001000100010001ULL;
2369 break;
2370 case 2:
2371 mulconst = 0x0000000100000001ULL;
2372 break;
2373 case 3:
2374 mulconst = 0;
2375 break;
2376 default:
2377 g_assert_not_reached();
2378 }
2379 if (mulconst) {
2380 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2381 }
2382 write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2383 if (is_q) {
2384 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2385 } else {
2386 clear_vec_high(s, rt);
2387 }
2388 tcg_temp_free_i64(tcg_tmp);
2389 } else {
2390 /* Load/store one element per register */
2391 if (is_load) {
2392 do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2393 } else {
2394 do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2395 }
2396 }
2397 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2398 rt = (rt + 1) % 32;
2399 }
2400
2401 if (is_postidx) {
2402 int rm = extract32(insn, 16, 5);
2403 if (rm == 31) {
2404 tcg_gen_mov_i64(tcg_rn, tcg_addr);
2405 } else {
2406 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2407 }
2408 }
2409 tcg_temp_free_i64(tcg_addr);
ad7ee8a2
CF
2410}
2411
2412/* C3.3 Loads and stores */
2413static void disas_ldst(DisasContext *s, uint32_t insn)
2414{
2415 switch (extract32(insn, 24, 6)) {
2416 case 0x08: /* Load/store exclusive */
2417 disas_ldst_excl(s, insn);
2418 break;
2419 case 0x18: case 0x1c: /* Load register (literal) */
2420 disas_ld_lit(s, insn);
2421 break;
2422 case 0x28: case 0x29:
2423 case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2424 disas_ldst_pair(s, insn);
2425 break;
2426 case 0x38: case 0x39:
2427 case 0x3c: case 0x3d: /* Load/store register (all forms) */
2428 disas_ldst_reg(s, insn);
2429 break;
2430 case 0x0c: /* AdvSIMD load/store multiple structures */
2431 disas_ldst_multiple_struct(s, insn);
2432 break;
2433 case 0x0d: /* AdvSIMD load/store single structure */
2434 disas_ldst_single_struct(s, insn);
2435 break;
2436 default:
2437 unallocated_encoding(s);
2438 break;
2439 }
2440}
2441
15bfe8b6
AG
2442/* C3.4.6 PC-rel. addressing
2443 * 31 30 29 28 24 23 5 4 0
2444 * +----+-------+-----------+-------------------+------+
2445 * | op | immlo | 1 0 0 0 0 | immhi | Rd |
2446 * +----+-------+-----------+-------------------+------+
2447 */
ad7ee8a2
CF
2448static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2449{
15bfe8b6
AG
2450 unsigned int page, rd;
2451 uint64_t base;
2452 int64_t offset;
2453
2454 page = extract32(insn, 31, 1);
2455 /* SignExtend(immhi:immlo) -> offset */
2456 offset = ((int64_t)sextract32(insn, 5, 19) << 2) | extract32(insn, 29, 2);
2457 rd = extract32(insn, 0, 5);
2458 base = s->pc - 4;
2459
2460 if (page) {
2461 /* ADRP (page based) */
2462 base &= ~0xfff;
2463 offset <<= 12;
2464 }
2465
2466 tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
ad7ee8a2
CF
2467}
2468
b0ff21b4
AB
2469/*
2470 * C3.4.1 Add/subtract (immediate)
2471 *
2472 * 31 30 29 28 24 23 22 21 10 9 5 4 0
2473 * +--+--+--+-----------+-----+-------------+-----+-----+
2474 * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd |
2475 * +--+--+--+-----------+-----+-------------+-----+-----+
2476 *
2477 * sf: 0 -> 32bit, 1 -> 64bit
2478 * op: 0 -> add , 1 -> sub
2479 * S: 1 -> set flags
2480 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2481 */
ad7ee8a2
CF
2482static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2483{
b0ff21b4
AB
2484 int rd = extract32(insn, 0, 5);
2485 int rn = extract32(insn, 5, 5);
2486 uint64_t imm = extract32(insn, 10, 12);
2487 int shift = extract32(insn, 22, 2);
2488 bool setflags = extract32(insn, 29, 1);
2489 bool sub_op = extract32(insn, 30, 1);
2490 bool is_64bit = extract32(insn, 31, 1);
2491
2492 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2493 TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2494 TCGv_i64 tcg_result;
2495
2496 switch (shift) {
2497 case 0x0:
2498 break;
2499 case 0x1:
2500 imm <<= 12;
2501 break;
2502 default:
2503 unallocated_encoding(s);
2504 return;
2505 }
2506
2507 tcg_result = tcg_temp_new_i64();
2508 if (!setflags) {
2509 if (sub_op) {
2510 tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2511 } else {
2512 tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2513 }
2514 } else {
2515 TCGv_i64 tcg_imm = tcg_const_i64(imm);
2516 if (sub_op) {
2517 gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2518 } else {
2519 gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2520 }
2521 tcg_temp_free_i64(tcg_imm);
2522 }
2523
2524 if (is_64bit) {
2525 tcg_gen_mov_i64(tcg_rd, tcg_result);
2526 } else {
2527 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2528 }
2529
2530 tcg_temp_free_i64(tcg_result);
ad7ee8a2
CF
2531}
2532
71b46089
AG
2533/* The input should be a value in the bottom e bits (with higher
2534 * bits zero); returns that value replicated into every element
2535 * of size e in a 64 bit integer.
2536 */
2537static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2538{
2539 assert(e != 0);
2540 while (e < 64) {
2541 mask |= mask << e;
2542 e *= 2;
2543 }
2544 return mask;
2545}
2546
2547/* Return a value with the bottom len bits set (where 0 < len <= 64) */
2548static inline uint64_t bitmask64(unsigned int length)
2549{
2550 assert(length > 0 && length <= 64);
2551 return ~0ULL >> (64 - length);
2552}
2553
2554/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2555 * only require the wmask. Returns false if the imms/immr/immn are a reserved
2556 * value (ie should cause a guest UNDEF exception), and true if they are
2557 * valid, in which case the decoded bit pattern is written to result.
2558 */
2559static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2560 unsigned int imms, unsigned int immr)
2561{
2562 uint64_t mask;
2563 unsigned e, levels, s, r;
2564 int len;
2565
2566 assert(immn < 2 && imms < 64 && immr < 64);
2567
2568 /* The bit patterns we create here are 64 bit patterns which
2569 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2570 * 64 bits each. Each element contains the same value: a run
2571 * of between 1 and e-1 non-zero bits, rotated within the
2572 * element by between 0 and e-1 bits.
2573 *
2574 * The element size and run length are encoded into immn (1 bit)
2575 * and imms (6 bits) as follows:
2576 * 64 bit elements: immn = 1, imms = <length of run - 1>
2577 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2578 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2579 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2580 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2581 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2582 * Notice that immn = 0, imms = 11111x is the only combination
2583 * not covered by one of the above options; this is reserved.
2584 * Further, <length of run - 1> all-ones is a reserved pattern.
2585 *
2586 * In all cases the rotation is by immr % e (and immr is 6 bits).
2587 */
2588
2589 /* First determine the element size */
2590 len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2591 if (len < 1) {
2592 /* This is the immn == 0, imms == 0x11111x case */
2593 return false;
2594 }
2595 e = 1 << len;
2596
2597 levels = e - 1;
2598 s = imms & levels;
2599 r = immr & levels;
2600
2601 if (s == levels) {
2602 /* <length of run - 1> mustn't be all-ones. */
2603 return false;
2604 }
2605
2606 /* Create the value of one element: s+1 set bits rotated
2607 * by r within the element (which is e bits wide)...
2608 */
2609 mask = bitmask64(s + 1);
2610 mask = (mask >> r) | (mask << (e - r));
2611 /* ...then replicate the element over the whole 64 bit value */
2612 mask = bitfield_replicate(mask, e);
2613 *result = mask;
2614 return true;
2615}
2616
2617/* C3.4.4 Logical (immediate)
2618 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
2619 * +----+-----+-------------+---+------+------+------+------+
2620 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd |
2621 * +----+-----+-------------+---+------+------+------+------+
2622 */
ad7ee8a2
CF
2623static void disas_logic_imm(DisasContext *s, uint32_t insn)
2624{
71b46089
AG
2625 unsigned int sf, opc, is_n, immr, imms, rn, rd;
2626 TCGv_i64 tcg_rd, tcg_rn;
2627 uint64_t wmask;
2628 bool is_and = false;
2629
2630 sf = extract32(insn, 31, 1);
2631 opc = extract32(insn, 29, 2);
2632 is_n = extract32(insn, 22, 1);
2633 immr = extract32(insn, 16, 6);
2634 imms = extract32(insn, 10, 6);
2635 rn = extract32(insn, 5, 5);
2636 rd = extract32(insn, 0, 5);
2637
2638 if (!sf && is_n) {
2639 unallocated_encoding(s);
2640 return;
2641 }
2642
2643 if (opc == 0x3) { /* ANDS */
2644 tcg_rd = cpu_reg(s, rd);
2645 } else {
2646 tcg_rd = cpu_reg_sp(s, rd);
2647 }
2648 tcg_rn = cpu_reg(s, rn);
2649
2650 if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2651 /* some immediate field values are reserved */
2652 unallocated_encoding(s);
2653 return;
2654 }
2655
2656 if (!sf) {
2657 wmask &= 0xffffffff;
2658 }
2659
2660 switch (opc) {
2661 case 0x3: /* ANDS */
2662 case 0x0: /* AND */
2663 tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2664 is_and = true;
2665 break;
2666 case 0x1: /* ORR */
2667 tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2668 break;
2669 case 0x2: /* EOR */
2670 tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2671 break;
2672 default:
2673 assert(FALSE); /* must handle all above */
2674 break;
2675 }
2676
2677 if (!sf && !is_and) {
2678 /* zero extend final result; we know we can skip this for AND
2679 * since the immediate had the high 32 bits clear.
2680 */
2681 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2682 }
2683
2684 if (opc == 3) { /* ANDS */
2685 gen_logic_CC(sf, tcg_rd);
2686 }
ad7ee8a2
CF
2687}
2688
ed6ec679
AB
2689/*
2690 * C3.4.5 Move wide (immediate)
2691 *
2692 * 31 30 29 28 23 22 21 20 5 4 0
2693 * +--+-----+-------------+-----+----------------+------+
2694 * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd |
2695 * +--+-----+-------------+-----+----------------+------+
2696 *
2697 * sf: 0 -> 32 bit, 1 -> 64 bit
2698 * opc: 00 -> N, 10 -> Z, 11 -> K
2699 * hw: shift/16 (0,16, and sf only 32, 48)
2700 */
ad7ee8a2
CF
2701static void disas_movw_imm(DisasContext *s, uint32_t insn)
2702{
ed6ec679
AB
2703 int rd = extract32(insn, 0, 5);
2704 uint64_t imm = extract32(insn, 5, 16);
2705 int sf = extract32(insn, 31, 1);
2706 int opc = extract32(insn, 29, 2);
2707 int pos = extract32(insn, 21, 2) << 4;
2708 TCGv_i64 tcg_rd = cpu_reg(s, rd);
2709 TCGv_i64 tcg_imm;
2710
2711 if (!sf && (pos >= 32)) {
2712 unallocated_encoding(s);
2713 return;
2714 }
2715
2716 switch (opc) {
2717 case 0: /* MOVN */
2718 case 2: /* MOVZ */
2719 imm <<= pos;
2720 if (opc == 0) {
2721 imm = ~imm;
2722 }
2723 if (!sf) {
2724 imm &= 0xffffffffu;
2725 }
2726 tcg_gen_movi_i64(tcg_rd, imm);
2727 break;
2728 case 3: /* MOVK */
2729 tcg_imm = tcg_const_i64(imm);
2730 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2731 tcg_temp_free_i64(tcg_imm);
2732 if (!sf) {
2733 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2734 }
2735 break;
2736 default:
2737 unallocated_encoding(s);
2738 break;
2739 }
ad7ee8a2
CF
2740}
2741
88077742
CF
2742/* C3.4.2 Bitfield
2743 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
2744 * +----+-----+-------------+---+------+------+------+------+
2745 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd |
2746 * +----+-----+-------------+---+------+------+------+------+
2747 */
ad7ee8a2
CF
2748static void disas_bitfield(DisasContext *s, uint32_t insn)
2749{
88077742
CF
2750 unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2751 TCGv_i64 tcg_rd, tcg_tmp;
2752
2753 sf = extract32(insn, 31, 1);
2754 opc = extract32(insn, 29, 2);
2755 n = extract32(insn, 22, 1);
2756 ri = extract32(insn, 16, 6);
2757 si = extract32(insn, 10, 6);
2758 rn = extract32(insn, 5, 5);
2759 rd = extract32(insn, 0, 5);
2760 bitsize = sf ? 64 : 32;
2761
2762 if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
2763 unallocated_encoding(s);
2764 return;
2765 }
2766
2767 tcg_rd = cpu_reg(s, rd);
2768 tcg_tmp = read_cpu_reg(s, rn, sf);
2769
2770 /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */
2771
2772 if (opc != 1) { /* SBFM or UBFM */
2773 tcg_gen_movi_i64(tcg_rd, 0);
2774 }
2775
2776 /* do the bit move operation */
2777 if (si >= ri) {
2778 /* Wd<s-r:0> = Wn<s:r> */
2779 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
2780 pos = 0;
2781 len = (si - ri) + 1;
2782 } else {
2783 /* Wd<32+s-r,32-r> = Wn<s:0> */
2784 pos = bitsize - ri;
2785 len = si + 1;
2786 }
2787
2788 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
2789
2790 if (opc == 0) { /* SBFM - sign extend the destination field */
2791 tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
2792 tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
2793 }
2794
2795 if (!sf) { /* zero extend final result */
2796 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2797 }
ad7ee8a2
CF
2798}
2799
e801de93
AG
2800/* C3.4.3 Extract
2801 * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0
2802 * +----+------+-------------+---+----+------+--------+------+------+
2803 * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd |
2804 * +----+------+-------------+---+----+------+--------+------+------+
2805 */
ad7ee8a2
CF
2806static void disas_extract(DisasContext *s, uint32_t insn)
2807{
e801de93
AG
2808 unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
2809
2810 sf = extract32(insn, 31, 1);
2811 n = extract32(insn, 22, 1);
2812 rm = extract32(insn, 16, 5);
2813 imm = extract32(insn, 10, 6);
2814 rn = extract32(insn, 5, 5);
2815 rd = extract32(insn, 0, 5);
2816 op21 = extract32(insn, 29, 2);
2817 op0 = extract32(insn, 21, 1);
2818 bitsize = sf ? 64 : 32;
2819
2820 if (sf != n || op21 || op0 || imm >= bitsize) {
2821 unallocated_encoding(s);
2822 } else {
2823 TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
2824
2825 tcg_rd = cpu_reg(s, rd);
2826
2827 if (imm) {
2828 /* OPTME: we can special case rm==rn as a rotate */
2829 tcg_rm = read_cpu_reg(s, rm, sf);
2830 tcg_rn = read_cpu_reg(s, rn, sf);
2831 tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
2832 tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
2833 tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
2834 if (!sf) {
2835 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2836 }
2837 } else {
2838 /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
2839 * so an extract from bit 0 is a special case.
2840 */
2841 if (sf) {
2842 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
2843 } else {
2844 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
2845 }
2846 }
2847
2848 }
ad7ee8a2
CF
2849}
2850
2851/* C3.4 Data processing - immediate */
2852static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
2853{
2854 switch (extract32(insn, 23, 6)) {
2855 case 0x20: case 0x21: /* PC-rel. addressing */
2856 disas_pc_rel_adr(s, insn);
2857 break;
2858 case 0x22: case 0x23: /* Add/subtract (immediate) */
2859 disas_add_sub_imm(s, insn);
2860 break;
2861 case 0x24: /* Logical (immediate) */
2862 disas_logic_imm(s, insn);
2863 break;
2864 case 0x25: /* Move wide (immediate) */
2865 disas_movw_imm(s, insn);
2866 break;
2867 case 0x26: /* Bitfield */
2868 disas_bitfield(s, insn);
2869 break;
2870 case 0x27: /* Extract */
2871 disas_extract(s, insn);
2872 break;
2873 default:
2874 unallocated_encoding(s);
2875 break;
2876 }
2877}
2878
832ffa1c
AG
2879/* Shift a TCGv src by TCGv shift_amount, put result in dst.
2880 * Note that it is the caller's responsibility to ensure that the
2881 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
2882 * mandated semantics for out of range shifts.
2883 */
2884static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
2885 enum a64_shift_type shift_type, TCGv_i64 shift_amount)
2886{
2887 switch (shift_type) {
2888 case A64_SHIFT_TYPE_LSL:
2889 tcg_gen_shl_i64(dst, src, shift_amount);
2890 break;
2891 case A64_SHIFT_TYPE_LSR:
2892 tcg_gen_shr_i64(dst, src, shift_amount);
2893 break;
2894 case A64_SHIFT_TYPE_ASR:
2895 if (!sf) {
2896 tcg_gen_ext32s_i64(dst, src);
2897 }
2898 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
2899 break;
2900 case A64_SHIFT_TYPE_ROR:
2901 if (sf) {
2902 tcg_gen_rotr_i64(dst, src, shift_amount);
2903 } else {
2904 TCGv_i32 t0, t1;
2905 t0 = tcg_temp_new_i32();
2906 t1 = tcg_temp_new_i32();
2907 tcg_gen_trunc_i64_i32(t0, src);
2908 tcg_gen_trunc_i64_i32(t1, shift_amount);
2909 tcg_gen_rotr_i32(t0, t0, t1);
2910 tcg_gen_extu_i32_i64(dst, t0);
2911 tcg_temp_free_i32(t0);
2912 tcg_temp_free_i32(t1);
2913 }
2914 break;
2915 default:
2916 assert(FALSE); /* all shift types should be handled */
2917 break;
2918 }
2919
2920 if (!sf) { /* zero extend final result */
2921 tcg_gen_ext32u_i64(dst, dst);
2922 }
2923}
2924
2925/* Shift a TCGv src by immediate, put result in dst.
2926 * The shift amount must be in range (this should always be true as the
2927 * relevant instructions will UNDEF on bad shift immediates).
2928 */
2929static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
2930 enum a64_shift_type shift_type, unsigned int shift_i)
2931{
2932 assert(shift_i < (sf ? 64 : 32));
2933
2934 if (shift_i == 0) {
2935 tcg_gen_mov_i64(dst, src);
2936 } else {
2937 TCGv_i64 shift_const;
2938
2939 shift_const = tcg_const_i64(shift_i);
2940 shift_reg(dst, src, sf, shift_type, shift_const);
2941 tcg_temp_free_i64(shift_const);
2942 }
2943}
2944
2945/* C3.5.10 Logical (shifted register)
2946 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
2947 * +----+-----+-----------+-------+---+------+--------+------+------+
2948 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd |
2949 * +----+-----+-----------+-------+---+------+--------+------+------+
2950 */
ad7ee8a2
CF
2951static void disas_logic_reg(DisasContext *s, uint32_t insn)
2952{
832ffa1c
AG
2953 TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
2954 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
2955
2956 sf = extract32(insn, 31, 1);
2957 opc = extract32(insn, 29, 2);
2958 shift_type = extract32(insn, 22, 2);
2959 invert = extract32(insn, 21, 1);
2960 rm = extract32(insn, 16, 5);
2961 shift_amount = extract32(insn, 10, 6);
2962 rn = extract32(insn, 5, 5);
2963 rd = extract32(insn, 0, 5);
2964
2965 if (!sf && (shift_amount & (1 << 5))) {
2966 unallocated_encoding(s);
2967 return;
2968 }
2969
2970 tcg_rd = cpu_reg(s, rd);
2971
2972 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
2973 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
2974 * register-register MOV and MVN, so it is worth special casing.
2975 */
2976 tcg_rm = cpu_reg(s, rm);
2977 if (invert) {
2978 tcg_gen_not_i64(tcg_rd, tcg_rm);
2979 if (!sf) {
2980 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2981 }
2982 } else {
2983 if (sf) {
2984 tcg_gen_mov_i64(tcg_rd, tcg_rm);
2985 } else {
2986 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
2987 }
2988 }
2989 return;
2990 }
2991
2992 tcg_rm = read_cpu_reg(s, rm, sf);
2993
2994 if (shift_amount) {
2995 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
2996 }
2997
2998 tcg_rn = cpu_reg(s, rn);
2999
3000 switch (opc | (invert << 2)) {
3001 case 0: /* AND */
3002 case 3: /* ANDS */
3003 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3004 break;
3005 case 1: /* ORR */
3006 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3007 break;
3008 case 2: /* EOR */
3009 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3010 break;
3011 case 4: /* BIC */
3012 case 7: /* BICS */
3013 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3014 break;
3015 case 5: /* ORN */
3016 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3017 break;
3018 case 6: /* EON */
3019 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3020 break;
3021 default:
3022 assert(FALSE);
3023 break;
3024 }
3025
3026 if (!sf) {
3027 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3028 }
3029
3030 if (opc == 3) {
3031 gen_logic_CC(sf, tcg_rd);
3032 }
ad7ee8a2
CF
3033}
3034
b0ff21b4
AB
3035/*
3036 * C3.5.1 Add/subtract (extended register)
3037 *
3038 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0|
3039 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3040 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd |
3041 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3042 *
3043 * sf: 0 -> 32bit, 1 -> 64bit
3044 * op: 0 -> add , 1 -> sub
3045 * S: 1 -> set flags
3046 * opt: 00
3047 * option: extension type (see DecodeRegExtend)
3048 * imm3: optional shift to Rm
3049 *
3050 * Rd = Rn + LSL(extend(Rm), amount)
3051 */
ad7ee8a2
CF
3052static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3053{
b0ff21b4
AB
3054 int rd = extract32(insn, 0, 5);
3055 int rn = extract32(insn, 5, 5);
3056 int imm3 = extract32(insn, 10, 3);
3057 int option = extract32(insn, 13, 3);
3058 int rm = extract32(insn, 16, 5);
3059 bool setflags = extract32(insn, 29, 1);
3060 bool sub_op = extract32(insn, 30, 1);
3061 bool sf = extract32(insn, 31, 1);
3062
3063 TCGv_i64 tcg_rm, tcg_rn; /* temps */
3064 TCGv_i64 tcg_rd;
3065 TCGv_i64 tcg_result;
3066
3067 if (imm3 > 4) {
3068 unallocated_encoding(s);
3069 return;
3070 }
3071
3072 /* non-flag setting ops may use SP */
3073 if (!setflags) {
3074 tcg_rn = read_cpu_reg_sp(s, rn, sf);
3075 tcg_rd = cpu_reg_sp(s, rd);
3076 } else {
3077 tcg_rn = read_cpu_reg(s, rn, sf);
3078 tcg_rd = cpu_reg(s, rd);
3079 }
3080
3081 tcg_rm = read_cpu_reg(s, rm, sf);
3082 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3083
3084 tcg_result = tcg_temp_new_i64();
3085
3086 if (!setflags) {
3087 if (sub_op) {
3088 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3089 } else {
3090 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3091 }
3092 } else {
3093 if (sub_op) {
3094 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3095 } else {
3096 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3097 }
3098 }
3099
3100 if (sf) {
3101 tcg_gen_mov_i64(tcg_rd, tcg_result);
3102 } else {
3103 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3104 }
3105
3106 tcg_temp_free_i64(tcg_result);
ad7ee8a2
CF
3107}
3108
b0ff21b4
AB
3109/*
3110 * C3.5.2 Add/subtract (shifted register)
3111 *
3112 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
3113 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3114 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd |
3115 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3116 *
3117 * sf: 0 -> 32bit, 1 -> 64bit
3118 * op: 0 -> add , 1 -> sub
3119 * S: 1 -> set flags
3120 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3121 * imm6: Shift amount to apply to Rm before the add/sub
3122 */
ad7ee8a2
CF
3123static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3124{
b0ff21b4
AB
3125 int rd = extract32(insn, 0, 5);
3126 int rn = extract32(insn, 5, 5);
3127 int imm6 = extract32(insn, 10, 6);
3128 int rm = extract32(insn, 16, 5);
3129 int shift_type = extract32(insn, 22, 2);
3130 bool setflags = extract32(insn, 29, 1);
3131 bool sub_op = extract32(insn, 30, 1);
3132 bool sf = extract32(insn, 31, 1);
3133
3134 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3135 TCGv_i64 tcg_rn, tcg_rm;
3136 TCGv_i64 tcg_result;
3137
3138 if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3139 unallocated_encoding(s);
3140 return;
3141 }
3142
3143 tcg_rn = read_cpu_reg(s, rn, sf);
3144 tcg_rm = read_cpu_reg(s, rm, sf);
3145
3146 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3147
3148 tcg_result = tcg_temp_new_i64();
3149
3150 if (!setflags) {
3151 if (sub_op) {
3152 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3153 } else {
3154 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3155 }
3156 } else {
3157 if (sub_op) {
3158 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3159 } else {
3160 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3161 }
3162 }
3163
3164 if (sf) {
3165 tcg_gen_mov_i64(tcg_rd, tcg_result);
3166 } else {
3167 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3168 }
3169
3170 tcg_temp_free_i64(tcg_result);
ad7ee8a2
CF
3171}
3172
52c8b9af
AG
3173/* C3.5.9 Data-processing (3 source)
3174
3175 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0
3176 +--+------+-----------+------+------+----+------+------+------+
3177 |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd |
3178 +--+------+-----------+------+------+----+------+------+------+
3179
3180 */
ad7ee8a2
CF
3181static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3182{
52c8b9af
AG
3183 int rd = extract32(insn, 0, 5);
3184 int rn = extract32(insn, 5, 5);
3185 int ra = extract32(insn, 10, 5);
3186 int rm = extract32(insn, 16, 5);
3187 int op_id = (extract32(insn, 29, 3) << 4) |
3188 (extract32(insn, 21, 3) << 1) |
3189 extract32(insn, 15, 1);
3190 bool sf = extract32(insn, 31, 1);
3191 bool is_sub = extract32(op_id, 0, 1);
3192 bool is_high = extract32(op_id, 2, 1);
3193 bool is_signed = false;
3194 TCGv_i64 tcg_op1;
3195 TCGv_i64 tcg_op2;
3196 TCGv_i64 tcg_tmp;
3197
3198 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3199 switch (op_id) {
3200 case 0x42: /* SMADDL */
3201 case 0x43: /* SMSUBL */
3202 case 0x44: /* SMULH */
3203 is_signed = true;
3204 break;
3205 case 0x0: /* MADD (32bit) */
3206 case 0x1: /* MSUB (32bit) */
3207 case 0x40: /* MADD (64bit) */
3208 case 0x41: /* MSUB (64bit) */
3209 case 0x4a: /* UMADDL */
3210 case 0x4b: /* UMSUBL */
3211 case 0x4c: /* UMULH */
3212 break;
3213 default:
3214 unallocated_encoding(s);
3215 return;
3216 }
3217
3218 if (is_high) {
3219 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3220 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3221 TCGv_i64 tcg_rn = cpu_reg(s, rn);
3222 TCGv_i64 tcg_rm = cpu_reg(s, rm);
3223
3224 if (is_signed) {
3225 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3226 } else {
3227 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3228 }
3229
3230 tcg_temp_free_i64(low_bits);
3231 return;
3232 }
3233
3234 tcg_op1 = tcg_temp_new_i64();
3235 tcg_op2 = tcg_temp_new_i64();
3236 tcg_tmp = tcg_temp_new_i64();
3237
3238 if (op_id < 0x42) {
3239 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3240 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3241 } else {
3242 if (is_signed) {
3243 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3244 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3245 } else {
3246 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3247 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3248 }
3249 }
3250
3251 if (ra == 31 && !is_sub) {
3252 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3253 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3254 } else {
3255 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3256 if (is_sub) {
3257 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3258 } else {
3259 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3260 }
3261 }
3262
3263 if (!sf) {
3264 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3265 }
3266
3267 tcg_temp_free_i64(tcg_op1);
3268 tcg_temp_free_i64(tcg_op2);
3269 tcg_temp_free_i64(tcg_tmp);
ad7ee8a2
CF
3270}
3271
643dbb07
CF
3272/* C3.5.3 - Add/subtract (with carry)
3273 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
3274 * +--+--+--+------------------------+------+---------+------+-----+
3275 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | opcode2 | Rn | Rd |
3276 * +--+--+--+------------------------+------+---------+------+-----+
3277 * [000000]
3278 */
3279
ad7ee8a2
CF
3280static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3281{
643dbb07
CF
3282 unsigned int sf, op, setflags, rm, rn, rd;
3283 TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3284
3285 if (extract32(insn, 10, 6) != 0) {
3286 unallocated_encoding(s);
3287 return;
3288 }
3289
3290 sf = extract32(insn, 31, 1);
3291 op = extract32(insn, 30, 1);
3292 setflags = extract32(insn, 29, 1);
3293 rm = extract32(insn, 16, 5);
3294 rn = extract32(insn, 5, 5);
3295 rd = extract32(insn, 0, 5);
3296
3297 tcg_rd = cpu_reg(s, rd);
3298 tcg_rn = cpu_reg(s, rn);
3299
3300 if (op) {
3301 tcg_y = new_tmp_a64(s);
3302 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3303 } else {
3304 tcg_y = cpu_reg(s, rm);
3305 }
3306
3307 if (setflags) {
3308 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3309 } else {
3310 gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3311 }
ad7ee8a2
CF
3312}
3313
750813cf
CF
3314/* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3315 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
3316 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3317 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv |
3318 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3319 * [1] y [0] [0]
3320 */
3321static void disas_cc(DisasContext *s, uint32_t insn)
ad7ee8a2 3322{
750813cf
CF
3323 unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3324 int label_continue = -1;
3325 TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
ad7ee8a2 3326
750813cf
CF
3327 if (!extract32(insn, 29, 1)) {
3328 unallocated_encoding(s);
3329 return;
3330 }
3331 if (insn & (1 << 10 | 1 << 4)) {
3332 unallocated_encoding(s);
3333 return;
3334 }
3335 sf = extract32(insn, 31, 1);
3336 op = extract32(insn, 30, 1);
3337 is_imm = extract32(insn, 11, 1);
3338 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3339 cond = extract32(insn, 12, 4);
3340 rn = extract32(insn, 5, 5);
3341 nzcv = extract32(insn, 0, 4);
3342
3343 if (cond < 0x0e) { /* not always */
3344 int label_match = gen_new_label();
3345 label_continue = gen_new_label();
3346 arm_gen_test_cc(cond, label_match);
3347 /* nomatch: */
3348 tcg_tmp = tcg_temp_new_i64();
3349 tcg_gen_movi_i64(tcg_tmp, nzcv << 28);
3350 gen_set_nzcv(tcg_tmp);
3351 tcg_temp_free_i64(tcg_tmp);
3352 tcg_gen_br(label_continue);
3353 gen_set_label(label_match);
3354 }
3355 /* match, or condition is always */
3356 if (is_imm) {
3357 tcg_y = new_tmp_a64(s);
3358 tcg_gen_movi_i64(tcg_y, y);
3359 } else {
3360 tcg_y = cpu_reg(s, y);
3361 }
3362 tcg_rn = cpu_reg(s, rn);
3363
3364 tcg_tmp = tcg_temp_new_i64();
3365 if (op) {
3366 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3367 } else {
3368 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3369 }
3370 tcg_temp_free_i64(tcg_tmp);
3371
3372 if (cond < 0x0e) { /* continue */
3373 gen_set_label(label_continue);
3374 }
ad7ee8a2
CF
3375}
3376
e952d8c7
CF
3377/* C3.5.6 Conditional select
3378 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0
3379 * +----+----+---+-----------------+------+------+-----+------+------+
3380 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd |
3381 * +----+----+---+-----------------+------+------+-----+------+------+
3382 */
ad7ee8a2
CF
3383static void disas_cond_select(DisasContext *s, uint32_t insn)
3384{
e952d8c7
CF
3385 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3386 TCGv_i64 tcg_rd, tcg_src;
3387
3388 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3389 /* S == 1 or op2<1> == 1 */
3390 unallocated_encoding(s);
3391 return;
3392 }
3393 sf = extract32(insn, 31, 1);
3394 else_inv = extract32(insn, 30, 1);
3395 rm = extract32(insn, 16, 5);
3396 cond = extract32(insn, 12, 4);
3397 else_inc = extract32(insn, 10, 1);
3398 rn = extract32(insn, 5, 5);
3399 rd = extract32(insn, 0, 5);
3400
3401 if (rd == 31) {
3402 /* silly no-op write; until we use movcond we must special-case
3403 * this to avoid a dead temporary across basic blocks.
3404 */
3405 return;
3406 }
3407
3408 tcg_rd = cpu_reg(s, rd);
3409
3410 if (cond >= 0x0e) { /* condition "always" */
3411 tcg_src = read_cpu_reg(s, rn, sf);
3412 tcg_gen_mov_i64(tcg_rd, tcg_src);
3413 } else {
3414 /* OPTME: we could use movcond here, at the cost of duplicating
3415 * a lot of the arm_gen_test_cc() logic.
3416 */
3417 int label_match = gen_new_label();
3418 int label_continue = gen_new_label();
3419
3420 arm_gen_test_cc(cond, label_match);
3421 /* nomatch: */
3422 tcg_src = cpu_reg(s, rm);
3423
3424 if (else_inv && else_inc) {
3425 tcg_gen_neg_i64(tcg_rd, tcg_src);
3426 } else if (else_inv) {
3427 tcg_gen_not_i64(tcg_rd, tcg_src);
3428 } else if (else_inc) {
3429 tcg_gen_addi_i64(tcg_rd, tcg_src, 1);
3430 } else {
3431 tcg_gen_mov_i64(tcg_rd, tcg_src);
3432 }
3433 if (!sf) {
3434 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3435 }
3436 tcg_gen_br(label_continue);
3437 /* match: */
3438 gen_set_label(label_match);
3439 tcg_src = read_cpu_reg(s, rn, sf);
3440 tcg_gen_mov_i64(tcg_rd, tcg_src);
3441 /* continue: */
3442 gen_set_label(label_continue);
3443 }
ad7ee8a2
CF
3444}
3445
680ead21
CF
3446static void handle_clz(DisasContext *s, unsigned int sf,
3447 unsigned int rn, unsigned int rd)
3448{
3449 TCGv_i64 tcg_rd, tcg_rn;
3450 tcg_rd = cpu_reg(s, rd);
3451 tcg_rn = cpu_reg(s, rn);
3452
3453 if (sf) {
3454 gen_helper_clz64(tcg_rd, tcg_rn);
3455 } else {
3456 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3457 tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3458 gen_helper_clz(tcg_tmp32, tcg_tmp32);
3459 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3460 tcg_temp_free_i32(tcg_tmp32);
3461 }
3462}
3463
e80c5020
CF
3464static void handle_cls(DisasContext *s, unsigned int sf,
3465 unsigned int rn, unsigned int rd)
3466{
3467 TCGv_i64 tcg_rd, tcg_rn;
3468 tcg_rd = cpu_reg(s, rd);
3469 tcg_rn = cpu_reg(s, rn);
3470
3471 if (sf) {
3472 gen_helper_cls64(tcg_rd, tcg_rn);
3473 } else {
3474 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3475 tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3476 gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3477 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3478 tcg_temp_free_i32(tcg_tmp32);
3479 }
3480}
3481
82e14b02
AG
3482static void handle_rbit(DisasContext *s, unsigned int sf,
3483 unsigned int rn, unsigned int rd)
3484{
3485 TCGv_i64 tcg_rd, tcg_rn;
3486 tcg_rd = cpu_reg(s, rd);
3487 tcg_rn = cpu_reg(s, rn);
3488
3489 if (sf) {
3490 gen_helper_rbit64(tcg_rd, tcg_rn);
3491 } else {
3492 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3493 tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3494 gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3495 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3496 tcg_temp_free_i32(tcg_tmp32);
3497 }
3498}
3499
45323209
CF
3500/* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3501static void handle_rev64(DisasContext *s, unsigned int sf,
3502 unsigned int rn, unsigned int rd)
3503{
3504 if (!sf) {
3505 unallocated_encoding(s);
3506 return;
3507 }
3508 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3509}
3510
3511/* C5.6.149 REV with sf==0, opcode==2
3512 * C5.6.151 REV32 (sf==1, opcode==2)
3513 */
3514static void handle_rev32(DisasContext *s, unsigned int sf,
3515 unsigned int rn, unsigned int rd)
3516{
3517 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3518
3519 if (sf) {
3520 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3521 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3522
3523 /* bswap32_i64 requires zero high word */
3524 tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3525 tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3526 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3527 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3528 tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3529
3530 tcg_temp_free_i64(tcg_tmp);
3531 } else {
3532 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3533 tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3534 }
3535}
3536
3537/* C5.6.150 REV16 (opcode==1) */
3538static void handle_rev16(DisasContext *s, unsigned int sf,
3539 unsigned int rn, unsigned int rd)
3540{
3541 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3542 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3543 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3544
3545 tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3546 tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3547
3548 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3549 tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3550 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3551 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3552
3553 if (sf) {
3554 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3555 tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3556 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3557 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3558
3559 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3560 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3561 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3562 }
3563
3564 tcg_temp_free_i64(tcg_tmp);
3565}
3566
680ead21
CF
3567/* C3.5.7 Data-processing (1 source)
3568 * 31 30 29 28 21 20 16 15 10 9 5 4 0
3569 * +----+---+---+-----------------+---------+--------+------+------+
3570 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd |
3571 * +----+---+---+-----------------+---------+--------+------+------+
3572 */
ad7ee8a2
CF
3573static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3574{
680ead21
CF
3575 unsigned int sf, opcode, rn, rd;
3576
3577 if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3578 unallocated_encoding(s);
3579 return;
3580 }
3581
3582 sf = extract32(insn, 31, 1);
3583 opcode = extract32(insn, 10, 6);
3584 rn = extract32(insn, 5, 5);
3585 rd = extract32(insn, 0, 5);
3586
3587 switch (opcode) {
3588 case 0: /* RBIT */
82e14b02
AG
3589 handle_rbit(s, sf, rn, rd);
3590 break;
680ead21 3591 case 1: /* REV16 */
45323209
CF
3592 handle_rev16(s, sf, rn, rd);
3593 break;
680ead21 3594 case 2: /* REV32 */
45323209
CF
3595 handle_rev32(s, sf, rn, rd);
3596 break;
680ead21 3597 case 3: /* REV64 */
45323209 3598 handle_rev64(s, sf, rn, rd);
680ead21
CF
3599 break;
3600 case 4: /* CLZ */
3601 handle_clz(s, sf, rn, rd);
3602 break;
3603 case 5: /* CLS */
e80c5020 3604 handle_cls(s, sf, rn, rd);
680ead21
CF
3605 break;
3606 }
ad7ee8a2
CF
3607}
3608
8220e911
AG
3609static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3610 unsigned int rm, unsigned int rn, unsigned int rd)
3611{
3612 TCGv_i64 tcg_n, tcg_m, tcg_rd;
3613 tcg_rd = cpu_reg(s, rd);
3614
3615 if (!sf && is_signed) {
3616 tcg_n = new_tmp_a64(s);
3617 tcg_m = new_tmp_a64(s);
3618 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3619 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3620 } else {
3621 tcg_n = read_cpu_reg(s, rn, sf);
3622 tcg_m = read_cpu_reg(s, rm, sf);
3623 }
3624
3625 if (is_signed) {
3626 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3627 } else {
3628 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3629 }
3630
3631 if (!sf) { /* zero extend final result */
3632 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3633 }
3634}
3635
6c1adc91
AG
3636/* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3637static void handle_shift_reg(DisasContext *s,
3638 enum a64_shift_type shift_type, unsigned int sf,
3639 unsigned int rm, unsigned int rn, unsigned int rd)
3640{
3641 TCGv_i64 tcg_shift = tcg_temp_new_i64();
3642 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3643 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3644
3645 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3646 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3647 tcg_temp_free_i64(tcg_shift);
3648}
3649
8220e911
AG
3650/* C3.5.8 Data-processing (2 source)
3651 * 31 30 29 28 21 20 16 15 10 9 5 4 0
3652 * +----+---+---+-----------------+------+--------+------+------+
3653 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd |
3654 * +----+---+---+-----------------+------+--------+------+------+
3655 */
ad7ee8a2
CF
3656static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
3657{
8220e911
AG
3658 unsigned int sf, rm, opcode, rn, rd;
3659 sf = extract32(insn, 31, 1);
3660 rm = extract32(insn, 16, 5);
3661 opcode = extract32(insn, 10, 6);
3662 rn = extract32(insn, 5, 5);
3663 rd = extract32(insn, 0, 5);
3664
3665 if (extract32(insn, 29, 1)) {
3666 unallocated_encoding(s);
3667 return;
3668 }
3669
3670 switch (opcode) {
3671 case 2: /* UDIV */
3672 handle_div(s, false, sf, rm, rn, rd);
3673 break;
3674 case 3: /* SDIV */
3675 handle_div(s, true, sf, rm, rn, rd);
3676 break;
3677 case 8: /* LSLV */
6c1adc91
AG
3678 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
3679 break;
8220e911 3680 case 9: /* LSRV */
6c1adc91
AG
3681 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
3682 break;
8220e911 3683 case 10: /* ASRV */
6c1adc91
AG
3684 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
3685 break;
8220e911 3686 case 11: /* RORV */
6c1adc91
AG
3687 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
3688 break;
8220e911
AG
3689 case 16:
3690 case 17:
3691 case 18:
3692 case 19:
3693 case 20:
3694 case 21:
3695 case 22:
3696 case 23: /* CRC32 */
3697 unsupported_encoding(s, insn);
3698 break;
3699 default:
3700 unallocated_encoding(s);
3701 break;
3702 }
ad7ee8a2
CF
3703}
3704
3705/* C3.5 Data processing - register */
3706static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
3707{
3708 switch (extract32(insn, 24, 5)) {
3709 case 0x0a: /* Logical (shifted register) */
3710 disas_logic_reg(s, insn);
3711 break;
3712 case 0x0b: /* Add/subtract */
3713 if (insn & (1 << 21)) { /* (extended register) */
3714 disas_add_sub_ext_reg(s, insn);
3715 } else {
3716 disas_add_sub_reg(s, insn);
3717 }
3718 break;
3719 case 0x1b: /* Data-processing (3 source) */
3720 disas_data_proc_3src(s, insn);
3721 break;
3722 case 0x1a:
3723 switch (extract32(insn, 21, 3)) {
3724 case 0x0: /* Add/subtract (with carry) */
3725 disas_adc_sbc(s, insn);
3726 break;
3727 case 0x2: /* Conditional compare */
750813cf 3728 disas_cc(s, insn); /* both imm and reg forms */
ad7ee8a2
CF
3729 break;
3730 case 0x4: /* Conditional select */
3731 disas_cond_select(s, insn);
3732 break;
3733 case 0x6: /* Data-processing */
3734 if (insn & (1 << 30)) { /* (1 source) */
3735 disas_data_proc_1src(s, insn);
3736 } else { /* (2 source) */
3737 disas_data_proc_2src(s, insn);
3738 }
3739 break;
3740 default:
3741 unallocated_encoding(s);
3742 break;
3743 }
3744 break;
3745 default:
3746 unallocated_encoding(s);
3747 break;
3748 }
3749}
3750
da7dafe7
CF
3751static void handle_fp_compare(DisasContext *s, bool is_double,
3752 unsigned int rn, unsigned int rm,
3753 bool cmp_with_zero, bool signal_all_nans)
3754{
3755 TCGv_i64 tcg_flags = tcg_temp_new_i64();
3756 TCGv_ptr fpst = get_fpstatus_ptr();
3757
3758 if (is_double) {
3759 TCGv_i64 tcg_vn, tcg_vm;
3760
3761 tcg_vn = read_fp_dreg(s, rn);
3762 if (cmp_with_zero) {
3763 tcg_vm = tcg_const_i64(0);
3764 } else {
3765 tcg_vm = read_fp_dreg(s, rm);
3766 }
3767 if (signal_all_nans) {
3768 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3769 } else {
3770 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3771 }
3772 tcg_temp_free_i64(tcg_vn);
3773 tcg_temp_free_i64(tcg_vm);
3774 } else {
3775 TCGv_i32 tcg_vn, tcg_vm;
3776
3777 tcg_vn = read_fp_sreg(s, rn);
3778 if (cmp_with_zero) {
3779 tcg_vm = tcg_const_i32(0);
3780 } else {
3781 tcg_vm = read_fp_sreg(s, rm);
3782 }
3783 if (signal_all_nans) {
3784 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3785 } else {
3786 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3787 }
3788 tcg_temp_free_i32(tcg_vn);
3789 tcg_temp_free_i32(tcg_vm);
3790 }
3791
3792 tcg_temp_free_ptr(fpst);
3793
3794 gen_set_nzcv(tcg_flags);
3795
3796 tcg_temp_free_i64(tcg_flags);
3797}
3798
faa0ba46
PM
3799/* C3.6.22 Floating point compare
3800 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0
3801 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
3802 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 |
3803 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
3804 */
3805static void disas_fp_compare(DisasContext *s, uint32_t insn)
3806{
da7dafe7
CF
3807 unsigned int mos, type, rm, op, rn, opc, op2r;
3808
3809 mos = extract32(insn, 29, 3);
3810 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3811 rm = extract32(insn, 16, 5);
3812 op = extract32(insn, 14, 2);
3813 rn = extract32(insn, 5, 5);
3814 opc = extract32(insn, 3, 2);
3815 op2r = extract32(insn, 0, 3);
3816
3817 if (mos || op || op2r || type > 1) {
3818 unallocated_encoding(s);
3819 return;
3820 }
3821
3822 handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
faa0ba46
PM
3823}
3824
3825/* C3.6.23 Floating point conditional compare
3826 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
3827 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
3828 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv |
3829 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
3830 */
3831static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
3832{
513f1d76
CF
3833 unsigned int mos, type, rm, cond, rn, op, nzcv;
3834 TCGv_i64 tcg_flags;
3835 int label_continue = -1;
3836
3837 mos = extract32(insn, 29, 3);
3838 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3839 rm = extract32(insn, 16, 5);
3840 cond = extract32(insn, 12, 4);
3841 rn = extract32(insn, 5, 5);
3842 op = extract32(insn, 4, 1);
3843 nzcv = extract32(insn, 0, 4);
3844
3845 if (mos || type > 1) {
3846 unallocated_encoding(s);
3847 return;
3848 }
3849
3850 if (cond < 0x0e) { /* not always */
3851 int label_match = gen_new_label();
3852 label_continue = gen_new_label();
3853 arm_gen_test_cc(cond, label_match);
3854 /* nomatch: */
3855 tcg_flags = tcg_const_i64(nzcv << 28);
3856 gen_set_nzcv(tcg_flags);
3857 tcg_temp_free_i64(tcg_flags);
3858 tcg_gen_br(label_continue);
3859 gen_set_label(label_match);
3860 }
3861
3862 handle_fp_compare(s, type, rn, rm, false, op);
3863
3864 if (cond < 0x0e) {
3865 gen_set_label(label_continue);
3866 }
faa0ba46
PM
3867}
3868
5640ff62
CF
3869/* copy src FP register to dst FP register; type specifies single or double */
3870static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
3871{
3872 if (type) {
3873 TCGv_i64 v = read_fp_dreg(s, src);
3874 write_fp_dreg(s, dst, v);
3875 tcg_temp_free_i64(v);
3876 } else {
3877 TCGv_i32 v = read_fp_sreg(s, src);
3878 write_fp_sreg(s, dst, v);
3879 tcg_temp_free_i32(v);
3880 }
3881}
3882
faa0ba46
PM
3883/* C3.6.24 Floating point conditional select
3884 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
3885 * +---+---+---+-----------+------+---+------+------+-----+------+------+
3886 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd |
3887 * +---+---+---+-----------+------+---+------+------+-----+------+------+
3888 */
3889static void disas_fp_csel(DisasContext *s, uint32_t insn)
3890{
5640ff62
CF
3891 unsigned int mos, type, rm, cond, rn, rd;
3892 int label_continue = -1;
3893
3894 mos = extract32(insn, 29, 3);
3895 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3896 rm = extract32(insn, 16, 5);
3897 cond = extract32(insn, 12, 4);
3898 rn = extract32(insn, 5, 5);
3899 rd = extract32(insn, 0, 5);
3900
3901 if (mos || type > 1) {
3902 unallocated_encoding(s);
3903 return;
3904 }
3905
3906 if (cond < 0x0e) { /* not always */
3907 int label_match = gen_new_label();
3908 label_continue = gen_new_label();
3909 arm_gen_test_cc(cond, label_match);
3910 /* nomatch: */
3911 gen_mov_fp2fp(s, type, rd, rm);
3912 tcg_gen_br(label_continue);
3913 gen_set_label(label_match);
3914 }
3915
3916 gen_mov_fp2fp(s, type, rd, rn);
3917
3918 if (cond < 0x0e) { /* continue */
3919 gen_set_label(label_continue);
3920 }
faa0ba46
PM
3921}
3922
d9b0848d
PM
3923/* C3.6.25 Floating-point data-processing (1 source) - single precision */
3924static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
3925{
3926 TCGv_ptr fpst;
3927 TCGv_i32 tcg_op;
3928 TCGv_i32 tcg_res;
3929
3930 fpst = get_fpstatus_ptr();
3931 tcg_op = read_fp_sreg(s, rn);
3932 tcg_res = tcg_temp_new_i32();
3933
3934 switch (opcode) {
3935 case 0x0: /* FMOV */
3936 tcg_gen_mov_i32(tcg_res, tcg_op);
3937 break;
3938 case 0x1: /* FABS */
3939 gen_helper_vfp_abss(tcg_res, tcg_op);
3940 break;
3941 case 0x2: /* FNEG */
3942 gen_helper_vfp_negs(tcg_res, tcg_op);
3943 break;
3944 case 0x3: /* FSQRT */
3945 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
3946 break;
3947 case 0x8: /* FRINTN */
3948 case 0x9: /* FRINTP */
3949 case 0xa: /* FRINTM */
3950 case 0xb: /* FRINTZ */
3951 case 0xc: /* FRINTA */
3952 {
3953 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
3954
3955 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3956 gen_helper_rints(tcg_res, tcg_op, fpst);
3957
3958 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3959 tcg_temp_free_i32(tcg_rmode);
3960 break;
3961 }
3962 case 0xe: /* FRINTX */
3963 gen_helper_rints_exact(tcg_res, tcg_op, fpst);
3964 break;
3965 case 0xf: /* FRINTI */
3966 gen_helper_rints(tcg_res, tcg_op, fpst);
3967 break;
3968 default:
3969 abort();
3970 }
3971
3972 write_fp_sreg(s, rd, tcg_res);
3973
3974 tcg_temp_free_ptr(fpst);
3975 tcg_temp_free_i32(tcg_op);
3976 tcg_temp_free_i32(tcg_res);
3977}
3978
3979/* C3.6.25 Floating-point data-processing (1 source) - double precision */
3980static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
3981{
3982 TCGv_ptr fpst;
3983 TCGv_i64 tcg_op;
3984 TCGv_i64 tcg_res;
3985
3986 fpst = get_fpstatus_ptr();
3987 tcg_op = read_fp_dreg(s, rn);
3988 tcg_res = tcg_temp_new_i64();
3989
3990 switch (opcode) {
3991 case 0x0: /* FMOV */
3992 tcg_gen_mov_i64(tcg_res, tcg_op);
3993 break;
3994 case 0x1: /* FABS */
3995 gen_helper_vfp_absd(tcg_res, tcg_op);
3996 break;
3997 case 0x2: /* FNEG */
3998 gen_helper_vfp_negd(tcg_res, tcg_op);
3999 break;
4000 case 0x3: /* FSQRT */
4001 gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4002 break;
4003 case 0x8: /* FRINTN */
4004 case 0x9: /* FRINTP */
4005 case 0xa: /* FRINTM */
4006 case 0xb: /* FRINTZ */
4007 case 0xc: /* FRINTA */
4008 {
4009 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4010
4011 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4012 gen_helper_rintd(tcg_res, tcg_op, fpst);
4013
4014 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4015 tcg_temp_free_i32(tcg_rmode);
4016 break;
4017 }
4018 case 0xe: /* FRINTX */
4019 gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4020 break;
4021 case 0xf: /* FRINTI */
4022 gen_helper_rintd(tcg_res, tcg_op, fpst);
4023 break;
4024 default:
4025 abort();
4026 }
4027
4028 write_fp_dreg(s, rd, tcg_res);
4029
4030 tcg_temp_free_ptr(fpst);
4031 tcg_temp_free_i64(tcg_op);
4032 tcg_temp_free_i64(tcg_res);
4033}
4034
8900aad2
PM
4035static void handle_fp_fcvt(DisasContext *s, int opcode,
4036 int rd, int rn, int dtype, int ntype)
4037{
4038 switch (ntype) {
4039 case 0x0:
4040 {
4041 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4042 if (dtype == 1) {
4043 /* Single to double */
4044 TCGv_i64 tcg_rd = tcg_temp_new_i64();
4045 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4046 write_fp_dreg(s, rd, tcg_rd);
4047 tcg_temp_free_i64(tcg_rd);
4048 } else {
4049 /* Single to half */
4050 TCGv_i32 tcg_rd = tcg_temp_new_i32();
4051 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4052 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4053 write_fp_sreg(s, rd, tcg_rd);
4054 tcg_temp_free_i32(tcg_rd);
4055 }
4056 tcg_temp_free_i32(tcg_rn);
4057 break;
4058 }
4059 case 0x1:
4060 {
4061 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4062 TCGv_i32 tcg_rd = tcg_temp_new_i32();
4063 if (dtype == 0) {
4064 /* Double to single */
4065 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4066 } else {
4067 /* Double to half */
4068 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4069 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4070 }
4071 write_fp_sreg(s, rd, tcg_rd);
4072 tcg_temp_free_i32(tcg_rd);
4073 tcg_temp_free_i64(tcg_rn);
4074 break;
4075 }
4076 case 0x3:
4077 {
4078 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4079 tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4080 if (dtype == 0) {
4081 /* Half to single */
4082 TCGv_i32 tcg_rd = tcg_temp_new_i32();
4083 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4084 write_fp_sreg(s, rd, tcg_rd);
4085 tcg_temp_free_i32(tcg_rd);
4086 } else {
4087 /* Half to double */
4088 TCGv_i64 tcg_rd = tcg_temp_new_i64();
4089 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4090 write_fp_dreg(s, rd, tcg_rd);
4091 tcg_temp_free_i64(tcg_rd);
4092 }
4093 tcg_temp_free_i32(tcg_rn);
4094 break;
4095 }
4096 default:
4097 abort();
4098 }
4099}
4100
faa0ba46
PM
4101/* C3.6.25 Floating point data-processing (1 source)
4102 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0
4103 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4104 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd |
4105 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4106 */
4107static void disas_fp_1src(DisasContext *s, uint32_t insn)
4108{
d9b0848d
PM
4109 int type = extract32(insn, 22, 2);
4110 int opcode = extract32(insn, 15, 6);
4111 int rn = extract32(insn, 5, 5);
4112 int rd = extract32(insn, 0, 5);
4113
4114 switch (opcode) {
4115 case 0x4: case 0x5: case 0x7:
8900aad2 4116 {
d9b0848d 4117 /* FCVT between half, single and double precision */
8900aad2
PM
4118 int dtype = extract32(opcode, 0, 2);
4119 if (type == 2 || dtype == type) {
4120 unallocated_encoding(s);
4121 return;
4122 }
4123 handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
d9b0848d 4124 break;
8900aad2 4125 }
d9b0848d
PM
4126 case 0x0 ... 0x3:
4127 case 0x8 ... 0xc:
4128 case 0xe ... 0xf:
4129 /* 32-to-32 and 64-to-64 ops */
4130 switch (type) {
4131 case 0:
4132 handle_fp_1src_single(s, opcode, rd, rn);
4133 break;
4134 case 1:
4135 handle_fp_1src_double(s, opcode, rd, rn);
4136 break;
4137 default:
4138 unallocated_encoding(s);
4139 }
4140 break;
4141 default:
4142 unallocated_encoding(s);
4143 break;
4144 }
faa0ba46
PM
4145}
4146
ec73d2e0
AG
4147/* C3.6.26 Floating-point data-processing (2 source) - single precision */
4148static void handle_fp_2src_single(DisasContext *s, int opcode,
4149 int rd, int rn, int rm)
4150{
4151 TCGv_i32 tcg_op1;
4152 TCGv_i32 tcg_op2;
4153 TCGv_i32 tcg_res;
4154 TCGv_ptr fpst;
4155
4156 tcg_res = tcg_temp_new_i32();
4157 fpst = get_fpstatus_ptr();
4158 tcg_op1 = read_fp_sreg(s, rn);
4159 tcg_op2 = read_fp_sreg(s, rm);
4160
4161 switch (opcode) {
4162 case 0x0: /* FMUL */
4163 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4164 break;
4165 case 0x1: /* FDIV */
4166 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4167 break;
4168 case 0x2: /* FADD */
4169 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4170 break;
4171 case 0x3: /* FSUB */
4172 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4173 break;
4174 case 0x4: /* FMAX */
4175 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4176 break;
4177 case 0x5: /* FMIN */
4178 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4179 break;
4180 case 0x6: /* FMAXNM */
4181 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4182 break;
4183 case 0x7: /* FMINNM */
4184 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4185 break;
4186 case 0x8: /* FNMUL */
4187 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4188 gen_helper_vfp_negs(tcg_res, tcg_res);
4189 break;
4190 }
4191
4192 write_fp_sreg(s, rd, tcg_res);
4193
4194 tcg_temp_free_ptr(fpst);
4195 tcg_temp_free_i32(tcg_op1);
4196 tcg_temp_free_i32(tcg_op2);
4197 tcg_temp_free_i32(tcg_res);
4198}
4199
4200/* C3.6.26 Floating-point data-processing (2 source) - double precision */
4201static void handle_fp_2src_double(DisasContext *s, int opcode,
4202 int rd, int rn, int rm)
4203{
4204 TCGv_i64 tcg_op1;
4205 TCGv_i64 tcg_op2;
4206 TCGv_i64 tcg_res;
4207 TCGv_ptr fpst;
4208
4209 tcg_res = tcg_temp_new_i64();
4210 fpst = get_fpstatus_ptr();
4211 tcg_op1 = read_fp_dreg(s, rn);
4212 tcg_op2 = read_fp_dreg(s, rm);
4213
4214 switch (opcode) {
4215 case 0x0: /* FMUL */
4216 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4217 break;
4218 case 0x1: /* FDIV */
4219 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4220 break;
4221 case 0x2: /* FADD */
4222 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4223 break;
4224 case 0x3: /* FSUB */
4225 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4226 break;
4227 case 0x4: /* FMAX */
4228 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4229 break;
4230 case 0x5: /* FMIN */
4231 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4232 break;
4233 case 0x6: /* FMAXNM */
4234 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4235 break;
4236 case 0x7: /* FMINNM */
4237 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4238 break;
4239 case 0x8: /* FNMUL */
4240 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4241 gen_helper_vfp_negd(tcg_res, tcg_res);
4242 break;
4243 }
4244
4245 write_fp_dreg(s, rd, tcg_res);
4246
4247 tcg_temp_free_ptr(fpst);
4248 tcg_temp_free_i64(tcg_op1);
4249 tcg_temp_free_i64(tcg_op2);
4250 tcg_temp_free_i64(tcg_res);
4251}
4252
faa0ba46
PM
4253/* C3.6.26 Floating point data-processing (2 source)
4254 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
4255 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4256 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd |
4257 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4258 */
4259static void disas_fp_2src(DisasContext *s, uint32_t insn)
4260{
ec73d2e0
AG
4261 int type = extract32(insn, 22, 2);
4262 int rd = extract32(insn, 0, 5);
4263 int rn = extract32(insn, 5, 5);
4264 int rm = extract32(insn, 16, 5);
4265 int opcode = extract32(insn, 12, 4);
4266
4267 if (opcode > 8) {
4268 unallocated_encoding(s);
4269 return;
4270 }
4271
4272 switch (type) {
4273 case 0:
4274 handle_fp_2src_single(s, opcode, rd, rn, rm);
4275 break;
4276 case 1:
4277 handle_fp_2src_double(s, opcode, rd, rn, rm);
4278 break;
4279 default:
4280 unallocated_encoding(s);
4281 }
faa0ba46
PM
4282}
4283
6a30667f
AG
4284/* C3.6.27 Floating-point data-processing (3 source) - single precision */
4285static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4286 int rd, int rn, int rm, int ra)
4287{
4288 TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4289 TCGv_i32 tcg_res = tcg_temp_new_i32();
4290 TCGv_ptr fpst = get_fpstatus_ptr();
4291
4292 tcg_op1 = read_fp_sreg(s, rn);
4293 tcg_op2 = read_fp_sreg(s, rm);
4294 tcg_op3 = read_fp_sreg(s, ra);
4295
4296 /* These are fused multiply-add, and must be done as one
4297 * floating point operation with no rounding between the
4298 * multiplication and addition steps.
4299 * NB that doing the negations here as separate steps is
4300 * correct : an input NaN should come out with its sign bit
4301 * flipped if it is a negated-input.
4302 */
4303 if (o1 == true) {
4304 gen_helper_vfp_negs(tcg_op3, tcg_op3);
4305 }
4306
4307 if (o0 != o1) {
4308 gen_helper_vfp_negs(tcg_op1, tcg_op1);
4309 }
4310
4311 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4312
4313 write_fp_sreg(s, rd, tcg_res);
4314
4315 tcg_temp_free_ptr(fpst);
4316 tcg_temp_free_i32(tcg_op1);
4317 tcg_temp_free_i32(tcg_op2);
4318 tcg_temp_free_i32(tcg_op3);
4319 tcg_temp_free_i32(tcg_res);
4320}
4321
4322/* C3.6.27 Floating-point data-processing (3 source) - double precision */
4323static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4324 int rd, int rn, int rm, int ra)
4325{
4326 TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4327 TCGv_i64 tcg_res = tcg_temp_new_i64();
4328 TCGv_ptr fpst = get_fpstatus_ptr();
4329
4330 tcg_op1 = read_fp_dreg(s, rn);
4331 tcg_op2 = read_fp_dreg(s, rm);
4332 tcg_op3 = read_fp_dreg(s, ra);
4333
4334 /* These are fused multiply-add, and must be done as one
4335 * floating point operation with no rounding between the
4336 * multiplication and addition steps.
4337 * NB that doing the negations here as separate steps is
4338 * correct : an input NaN should come out with its sign bit
4339 * flipped if it is a negated-input.
4340 */
4341 if (o1 == true) {
4342 gen_helper_vfp_negd(tcg_op3, tcg_op3);
4343 }
4344
4345 if (o0 != o1) {
4346 gen_helper_vfp_negd(tcg_op1, tcg_op1);
4347 }
4348
4349 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4350
4351 write_fp_dreg(s, rd, tcg_res);
4352
4353 tcg_temp_free_ptr(fpst);
4354 tcg_temp_free_i64(tcg_op1);
4355 tcg_temp_free_i64(tcg_op2);
4356 tcg_temp_free_i64(tcg_op3);
4357 tcg_temp_free_i64(tcg_res);
4358}
4359
faa0ba46
PM
4360/* C3.6.27 Floating point data-processing (3 source)
4361 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0
4362 * +---+---+---+-----------+------+----+------+----+------+------+------+
4363 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd |
4364 * +---+---+---+-----------+------+----+------+----+------+------+------+
4365 */
4366static void disas_fp_3src(DisasContext *s, uint32_t insn)
4367{
6a30667f
AG
4368 int type = extract32(insn, 22, 2);
4369 int rd = extract32(insn, 0, 5);
4370 int rn = extract32(insn, 5, 5);
4371 int ra = extract32(insn, 10, 5);
4372 int rm = extract32(insn, 16, 5);
4373 bool o0 = extract32(insn, 15, 1);
4374 bool o1 = extract32(insn, 21, 1);
4375
4376 switch (type) {
4377 case 0:
4378 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4379 break;
4380 case 1:
4381 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4382 break;
4383 default:
4384 unallocated_encoding(s);
4385 }
faa0ba46
PM
4386}
4387
4388/* C3.6.28 Floating point immediate
4389 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0
4390 * +---+---+---+-----------+------+---+------------+-------+------+------+
4391 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd |
4392 * +---+---+---+-----------+------+---+------------+-------+------+------+
4393 */
4394static void disas_fp_imm(DisasContext *s, uint32_t insn)
4395{
6163f868
AG
4396 int rd = extract32(insn, 0, 5);
4397 int imm8 = extract32(insn, 13, 8);
4398 int is_double = extract32(insn, 22, 2);
4399 uint64_t imm;
4400 TCGv_i64 tcg_res;
4401
4402 if (is_double > 1) {
4403 unallocated_encoding(s);
4404 return;
4405 }
4406
4407 /* The imm8 encodes the sign bit, enough bits to represent
4408 * an exponent in the range 01....1xx to 10....0xx,
4409 * and the most significant 4 bits of the mantissa; see
4410 * VFPExpandImm() in the v8 ARM ARM.
4411 */
4412 if (is_double) {
4413 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4414 (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4415 extract32(imm8, 0, 6);
4416 imm <<= 48;
4417 } else {
4418 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4419 (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4420 (extract32(imm8, 0, 6) << 3);
4421 imm <<= 16;
4422 }
4423
4424 tcg_res = tcg_const_i64(imm);
4425 write_fp_dreg(s, rd, tcg_res);
4426 tcg_temp_free_i64(tcg_res);
faa0ba46
PM
4427}
4428
52a1f6a3
AG
4429/* Handle floating point <=> fixed point conversions. Note that we can
4430 * also deal with fp <=> integer conversions as a special case (scale == 64)
4431 * OPTME: consider handling that special case specially or at least skipping
4432 * the call to scalbn in the helpers for zero shifts.
4433 */
4434static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4435 bool itof, int rmode, int scale, int sf, int type)
4436{
4437 bool is_signed = !(opcode & 1);
4438 bool is_double = type;
4439 TCGv_ptr tcg_fpstatus;
4440 TCGv_i32 tcg_shift;
4441
4442 tcg_fpstatus = get_fpstatus_ptr();
4443
4444 tcg_shift = tcg_const_i32(64 - scale);
4445
4446 if (itof) {
4447 TCGv_i64 tcg_int = cpu_reg(s, rn);
4448 if (!sf) {
4449 TCGv_i64 tcg_extend = new_tmp_a64(s);
4450
4451 if (is_signed) {
4452 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4453 } else {
4454 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4455 }
4456
4457 tcg_int = tcg_extend;
4458 }
4459
4460 if (is_double) {
4461 TCGv_i64 tcg_double = tcg_temp_new_i64();
4462 if (is_signed) {
4463 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4464 tcg_shift, tcg_fpstatus);
4465 } else {
4466 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4467 tcg_shift, tcg_fpstatus);
4468 }
4469 write_fp_dreg(s, rd, tcg_double);
4470 tcg_temp_free_i64(tcg_double);
4471 } else {
4472 TCGv_i32 tcg_single = tcg_temp_new_i32();
4473 if (is_signed) {
4474 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4475 tcg_shift, tcg_fpstatus);
4476 } else {
4477 gen_helper_vfp_uqtos(tcg_single, tcg_int,
4478 tcg_shift, tcg_fpstatus);
4479 }
4480 write_fp_sreg(s, rd, tcg_single);
4481 tcg_temp_free_i32(tcg_single);
4482 }
4483 } else {
4484 TCGv_i64 tcg_int = cpu_reg(s, rd);
4485 TCGv_i32 tcg_rmode;
4486
4487 if (extract32(opcode, 2, 1)) {
4488 /* There are too many rounding modes to all fit into rmode,
4489 * so FCVTA[US] is a special case.
4490 */
4491 rmode = FPROUNDING_TIEAWAY;
4492 }
4493
4494 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4495
4496 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4497
4498 if (is_double) {
4499 TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4500 if (is_signed) {
4501 if (!sf) {
4502 gen_helper_vfp_tosld(tcg_int, tcg_double,
4503 tcg_shift, tcg_fpstatus);
4504 } else {
4505 gen_helper_vfp_tosqd(tcg_int, tcg_double,
4506 tcg_shift, tcg_fpstatus);
4507 }
4508 } else {
4509 if (!sf) {
4510 gen_helper_vfp_tould(tcg_int, tcg_double,
4511 tcg_shift, tcg_fpstatus);
4512 } else {
4513 gen_helper_vfp_touqd(tcg_int, tcg_double,
4514 tcg_shift, tcg_fpstatus);
4515 }
4516 }
4517 tcg_temp_free_i64(tcg_double);
4518 } else {
4519 TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4520 if (sf) {
4521 if (is_signed) {
4522 gen_helper_vfp_tosqs(tcg_int, tcg_single,
4523 tcg_shift, tcg_fpstatus);
4524 } else {
4525 gen_helper_vfp_touqs(tcg_int, tcg_single,
4526 tcg_shift, tcg_fpstatus);
4527 }
4528 } else {
4529 TCGv_i32 tcg_dest = tcg_temp_new_i32();
4530 if (is_signed) {
4531 gen_helper_vfp_tosls(tcg_dest, tcg_single,
4532 tcg_shift, tcg_fpstatus);
4533 } else {
4534 gen_helper_vfp_touls(tcg_dest, tcg_single,
4535 tcg_shift, tcg_fpstatus);
4536 }
4537 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4538 tcg_temp_free_i32(tcg_dest);
4539 }
4540 tcg_temp_free_i32(tcg_single);
4541 }
4542
4543 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4544 tcg_temp_free_i32(tcg_rmode);
4545
4546 if (!sf) {
4547 tcg_gen_ext32u_i64(tcg_int, tcg_int);
4548 }
4549 }
4550
4551 tcg_temp_free_ptr(tcg_fpstatus);
4552 tcg_temp_free_i32(tcg_shift);
4553}
4554
faa0ba46
PM
4555/* C3.6.29 Floating point <-> fixed point conversions
4556 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
4557 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4558 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd |
4559 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4560 */
4561static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4562{
52a1f6a3
AG
4563 int rd = extract32(insn, 0, 5);
4564 int rn = extract32(insn, 5, 5);
4565 int scale = extract32(insn, 10, 6);
4566 int opcode = extract32(insn, 16, 3);
4567 int rmode = extract32(insn, 19, 2);
4568 int type = extract32(insn, 22, 2);
4569 bool sbit = extract32(insn, 29, 1);
4570 bool sf = extract32(insn, 31, 1);
4571 bool itof;
4572
4573 if (sbit || (type > 1)
4574 || (!sf && scale < 32)) {
4575 unallocated_encoding(s);
4576 return;
4577 }
4578
4579 switch ((rmode << 3) | opcode) {
4580 case 0x2: /* SCVTF */
4581 case 0x3: /* UCVTF */
4582 itof = true;
4583 break;
4584 case 0x18: /* FCVTZS */
4585 case 0x19: /* FCVTZU */
4586 itof = false;
4587 break;
4588 default:
4589 unallocated_encoding(s);
4590 return;
4591 }
4592
4593 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
faa0ba46
PM
4594}
4595
ce5458e8
PM
4596static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
4597{
4598 /* FMOV: gpr to or from float, double, or top half of quad fp reg,
4599 * without conversion.
4600 */
4601
4602 if (itof) {
ce5458e8
PM
4603 TCGv_i64 tcg_rn = cpu_reg(s, rn);
4604
4605 switch (type) {
4606 case 0:
4607 {
4608 /* 32 bit */
4609 TCGv_i64 tmp = tcg_temp_new_i64();
4610 tcg_gen_ext32u_i64(tmp, tcg_rn);
e2f90565 4611 tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(rd, MO_64));
ce5458e8 4612 tcg_gen_movi_i64(tmp, 0);
e2f90565 4613 tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
ce5458e8
PM
4614 tcg_temp_free_i64(tmp);
4615 break;
4616 }
4617 case 1:
4618 {
4619 /* 64 bit */
4620 TCGv_i64 tmp = tcg_const_i64(0);
e2f90565
PM
4621 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(rd, MO_64));
4622 tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
ce5458e8
PM
4623 tcg_temp_free_i64(tmp);
4624 break;
4625 }
4626 case 2:
4627 /* 64 bit to top half. */
e2f90565 4628 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(rd));
ce5458e8
PM
4629 break;
4630 }
4631 } else {
ce5458e8
PM
4632 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4633
4634 switch (type) {
4635 case 0:
4636 /* 32 bit */
e2f90565 4637 tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_32));
ce5458e8 4638 break;
ce5458e8
PM
4639 case 1:
4640 /* 64 bit */
e2f90565
PM
4641 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_64));
4642 break;
4643 case 2:
4644 /* 64 bits from top half */
4645 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(rn));
ce5458e8
PM
4646 break;
4647 }
4648 }
4649}
4650
faa0ba46
PM
4651/* C3.6.30 Floating point <-> integer conversions
4652 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
4653 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
c436d406 4654 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
faa0ba46
PM
4655 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
4656 */
4657static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
4658{
ce5458e8
PM
4659 int rd = extract32(insn, 0, 5);
4660 int rn = extract32(insn, 5, 5);
4661 int opcode = extract32(insn, 16, 3);
4662 int rmode = extract32(insn, 19, 2);
4663 int type = extract32(insn, 22, 2);
4664 bool sbit = extract32(insn, 29, 1);
4665 bool sf = extract32(insn, 31, 1);
4666
c436d406
WN
4667 if (sbit) {
4668 unallocated_encoding(s);
4669 return;
4670 }
4671
4672 if (opcode > 5) {
ce5458e8
PM
4673 /* FMOV */
4674 bool itof = opcode & 1;
4675
c436d406
WN
4676 if (rmode >= 2) {
4677 unallocated_encoding(s);
4678 return;
4679 }
4680
ce5458e8
PM
4681 switch (sf << 3 | type << 1 | rmode) {
4682 case 0x0: /* 32 bit */
4683 case 0xa: /* 64 bit */
4684 case 0xd: /* 64 bit to top half of quad */
4685 break;
4686 default:
4687 /* all other sf/type/rmode combinations are invalid */
4688 unallocated_encoding(s);
4689 break;
4690 }
4691
4692 handle_fmov(s, rd, rn, type, itof);
4693 } else {
4694 /* actual FP conversions */
c436d406
WN
4695 bool itof = extract32(opcode, 1, 1);
4696
4697 if (type > 1 || (rmode != 0 && opcode > 1)) {
4698 unallocated_encoding(s);
4699 return;
4700 }
4701
4702 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
ce5458e8 4703 }
faa0ba46
PM
4704}
4705
4706/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
4707 * 31 30 29 28 25 24 0
4708 * +---+---+---+---------+-----------------------------+
4709 * | | 0 | | 1 1 1 1 | |
4710 * +---+---+---+---------+-----------------------------+
4711 */
4712static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
4713{
4714 if (extract32(insn, 24, 1)) {
4715 /* Floating point data-processing (3 source) */
4716 disas_fp_3src(s, insn);
4717 } else if (extract32(insn, 21, 1) == 0) {
4718 /* Floating point to fixed point conversions */
4719 disas_fp_fixed_conv(s, insn);
4720 } else {
4721 switch (extract32(insn, 10, 2)) {
4722 case 1:
4723 /* Floating point conditional compare */
4724 disas_fp_ccomp(s, insn);
4725 break;
4726 case 2:
4727 /* Floating point data-processing (2 source) */
4728 disas_fp_2src(s, insn);
4729 break;
4730 case 3:
4731 /* Floating point conditional select */
4732 disas_fp_csel(s, insn);
4733 break;
4734 case 0:
4735 switch (ctz32(extract32(insn, 12, 4))) {
4736 case 0: /* [15:12] == xxx1 */
4737 /* Floating point immediate */
4738 disas_fp_imm(s, insn);
4739 break;
4740 case 1: /* [15:12] == xx10 */
4741 /* Floating point compare */
4742 disas_fp_compare(s, insn);
4743 break;
4744 case 2: /* [15:12] == x100 */
4745 /* Floating point data-processing (1 source) */
4746 disas_fp_1src(s, insn);
4747 break;
4748 case 3: /* [15:12] == 1000 */
4749 unallocated_encoding(s);
4750 break;
4751 default: /* [15:12] == 0000 */
4752 /* Floating point <-> integer conversions */
4753 disas_fp_int_conv(s, insn);
4754 break;
4755 }
4756 break;
4757 }
4758 }
4759}
4760
5c73747f
PM
4761static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
4762 int pos)
4763{
4764 /* Extract 64 bits from the middle of two concatenated 64 bit
4765 * vector register slices left:right. The extracted bits start
4766 * at 'pos' bits into the right (least significant) side.
4767 * We return the result in tcg_right, and guarantee not to
4768 * trash tcg_left.
4769 */
4770 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4771 assert(pos > 0 && pos < 64);
4772
4773 tcg_gen_shri_i64(tcg_right, tcg_right, pos);
4774 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
4775 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
4776
4777 tcg_temp_free_i64(tcg_tmp);
4778}
4779
384b26fb
AB
4780/* C3.6.1 EXT
4781 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0
4782 * +---+---+-------------+-----+---+------+---+------+---+------+------+
4783 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd |
4784 * +---+---+-------------+-----+---+------+---+------+---+------+------+
4785 */
4786static void disas_simd_ext(DisasContext *s, uint32_t insn)
4787{
5c73747f
PM
4788 int is_q = extract32(insn, 30, 1);
4789 int op2 = extract32(insn, 22, 2);
4790 int imm4 = extract32(insn, 11, 4);
4791 int rm = extract32(insn, 16, 5);
4792 int rn = extract32(insn, 5, 5);
4793 int rd = extract32(insn, 0, 5);
4794 int pos = imm4 << 3;
4795 TCGv_i64 tcg_resl, tcg_resh;
4796
4797 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
4798 unallocated_encoding(s);
4799 return;
4800 }
4801
4802 tcg_resh = tcg_temp_new_i64();
4803 tcg_resl = tcg_temp_new_i64();
4804
4805 /* Vd gets bits starting at pos bits into Vm:Vn. This is
4806 * either extracting 128 bits from a 128:128 concatenation, or
4807 * extracting 64 bits from a 64:64 concatenation.
4808 */
4809 if (!is_q) {
4810 read_vec_element(s, tcg_resl, rn, 0, MO_64);
4811 if (pos != 0) {
4812 read_vec_element(s, tcg_resh, rm, 0, MO_64);
4813 do_ext64(s, tcg_resh, tcg_resl, pos);
4814 }
4815 tcg_gen_movi_i64(tcg_resh, 0);
4816 } else {
4817 TCGv_i64 tcg_hh;
4818 typedef struct {
4819 int reg;
4820 int elt;
4821 } EltPosns;
4822 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
4823 EltPosns *elt = eltposns;
4824
4825 if (pos >= 64) {
4826 elt++;
4827 pos -= 64;
4828 }
4829
4830 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
4831 elt++;
4832 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
4833 elt++;
4834 if (pos != 0) {
4835 do_ext64(s, tcg_resh, tcg_resl, pos);
4836 tcg_hh = tcg_temp_new_i64();
4837 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
4838 do_ext64(s, tcg_hh, tcg_resh, pos);
4839 tcg_temp_free_i64(tcg_hh);
4840 }
4841 }
4842
4843 write_vec_element(s, tcg_resl, rd, 0, MO_64);
4844 tcg_temp_free_i64(tcg_resl);
4845 write_vec_element(s, tcg_resh, rd, 1, MO_64);
4846 tcg_temp_free_i64(tcg_resh);
384b26fb
AB
4847}
4848
4849/* C3.6.2 TBL/TBX
4850 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0
4851 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
4852 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd |
4853 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
4854 */
4855static void disas_simd_tb(DisasContext *s, uint32_t insn)
4856{
7c51048f
MM
4857 int op2 = extract32(insn, 22, 2);
4858 int is_q = extract32(insn, 30, 1);
4859 int rm = extract32(insn, 16, 5);
4860 int rn = extract32(insn, 5, 5);
4861 int rd = extract32(insn, 0, 5);
4862 int is_tblx = extract32(insn, 12, 1);
4863 int len = extract32(insn, 13, 2);
4864 TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
4865 TCGv_i32 tcg_regno, tcg_numregs;
4866
4867 if (op2 != 0) {
4868 unallocated_encoding(s);
4869 return;
4870 }
4871
4872 /* This does a table lookup: for every byte element in the input
4873 * we index into a table formed from up to four vector registers,
4874 * and then the output is the result of the lookups. Our helper
4875 * function does the lookup operation for a single 64 bit part of
4876 * the input.
4877 */
4878 tcg_resl = tcg_temp_new_i64();
4879 tcg_resh = tcg_temp_new_i64();
4880
4881 if (is_tblx) {
4882 read_vec_element(s, tcg_resl, rd, 0, MO_64);
4883 } else {
4884 tcg_gen_movi_i64(tcg_resl, 0);
4885 }
4886 if (is_tblx && is_q) {
4887 read_vec_element(s, tcg_resh, rd, 1, MO_64);
4888 } else {
4889 tcg_gen_movi_i64(tcg_resh, 0);
4890 }
4891
4892 tcg_idx = tcg_temp_new_i64();
4893 tcg_regno = tcg_const_i32(rn);
4894 tcg_numregs = tcg_const_i32(len + 1);
4895 read_vec_element(s, tcg_idx, rm, 0, MO_64);
4896 gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
4897 tcg_regno, tcg_numregs);
4898 if (is_q) {
4899 read_vec_element(s, tcg_idx, rm, 1, MO_64);
4900 gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
4901 tcg_regno, tcg_numregs);
4902 }
4903 tcg_temp_free_i64(tcg_idx);
4904 tcg_temp_free_i32(tcg_regno);
4905 tcg_temp_free_i32(tcg_numregs);
4906
4907 write_vec_element(s, tcg_resl, rd, 0, MO_64);
4908 tcg_temp_free_i64(tcg_resl);
4909 write_vec_element(s, tcg_resh, rd, 1, MO_64);
4910 tcg_temp_free_i64(tcg_resh);
384b26fb
AB
4911}
4912
4913/* C3.6.3 ZIP/UZP/TRN
4914 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
4915 * +---+---+-------------+------+---+------+---+------------------+------+
4916 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
4917 * +---+---+-------------+------+---+------+---+------------------+------+
4918 */
4919static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
4920{
5fa5469c
MM
4921 int rd = extract32(insn, 0, 5);
4922 int rn = extract32(insn, 5, 5);
4923 int rm = extract32(insn, 16, 5);
4924 int size = extract32(insn, 22, 2);
4925 /* opc field bits [1:0] indicate ZIP/UZP/TRN;
4926 * bit 2 indicates 1 vs 2 variant of the insn.
4927 */
4928 int opcode = extract32(insn, 12, 2);
4929 bool part = extract32(insn, 14, 1);
4930 bool is_q = extract32(insn, 30, 1);
4931 int esize = 8 << size;
4932 int i, ofs;
4933 int datasize = is_q ? 128 : 64;
4934 int elements = datasize / esize;
4935 TCGv_i64 tcg_res, tcg_resl, tcg_resh;
4936
4937 if (opcode == 0 || (size == 3 && !is_q)) {
4938 unallocated_encoding(s);
4939 return;
4940 }
4941
4942 tcg_resl = tcg_const_i64(0);
4943 tcg_resh = tcg_const_i64(0);
4944 tcg_res = tcg_temp_new_i64();
4945
4946 for (i = 0; i < elements; i++) {
4947 switch (opcode) {
4948 case 1: /* UZP1/2 */
4949 {
4950 int midpoint = elements / 2;
4951 if (i < midpoint) {
4952 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
4953 } else {
4954 read_vec_element(s, tcg_res, rm,
4955 2 * (i - midpoint) + part, size);
4956 }
4957 break;
4958 }
4959 case 2: /* TRN1/2 */
4960 if (i & 1) {
4961 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
4962 } else {
4963 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
4964 }
4965 break;
4966 case 3: /* ZIP1/2 */
4967 {
4968 int base = part * elements / 2;
4969 if (i & 1) {
4970 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
4971 } else {
4972 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
4973 }
4974 break;
4975 }
4976 default:
4977 g_assert_not_reached();
4978 }
4979
4980 ofs = i * esize;
4981 if (ofs < 64) {
4982 tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
4983 tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
4984 } else {
4985 tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
4986 tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
4987 }
4988 }
4989
4990 tcg_temp_free_i64(tcg_res);
4991
4992 write_vec_element(s, tcg_resl, rd, 0, MO_64);
4993 tcg_temp_free_i64(tcg_resl);
4994 write_vec_element(s, tcg_resh, rd, 1, MO_64);
4995 tcg_temp_free_i64(tcg_resh);
384b26fb
AB
4996}
4997
4a0ff1ce
MM
4998static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
4999 int opc, bool is_min, TCGv_ptr fpst)
5000{
5001 /* Helper function for disas_simd_across_lanes: do a single precision
5002 * min/max operation on the specified two inputs,
5003 * and return the result in tcg_elt1.
5004 */
5005 if (opc == 0xc) {
5006 if (is_min) {
5007 gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5008 } else {
5009 gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5010 }
5011 } else {
5012 assert(opc == 0xf);
5013 if (is_min) {
5014 gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5015 } else {
5016 gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5017 }
5018 }
5019}
5020
384b26fb
AB
5021/* C3.6.4 AdvSIMD across lanes
5022 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
5023 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5024 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
5025 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5026 */
5027static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5028{
4a0ff1ce
MM
5029 int rd = extract32(insn, 0, 5);
5030 int rn = extract32(insn, 5, 5);
5031 int size = extract32(insn, 22, 2);
5032 int opcode = extract32(insn, 12, 5);
5033 bool is_q = extract32(insn, 30, 1);
5034 bool is_u = extract32(insn, 29, 1);
5035 bool is_fp = false;
5036 bool is_min = false;
5037 int esize;
5038 int elements;
5039 int i;
5040 TCGv_i64 tcg_res, tcg_elt;
5041
5042 switch (opcode) {
5043 case 0x1b: /* ADDV */
5044 if (is_u) {
5045 unallocated_encoding(s);
5046 return;
5047 }
5048 /* fall through */
5049 case 0x3: /* SADDLV, UADDLV */
5050 case 0xa: /* SMAXV, UMAXV */
5051 case 0x1a: /* SMINV, UMINV */
5052 if (size == 3 || (size == 2 && !is_q)) {
5053 unallocated_encoding(s);
5054 return;
5055 }
5056 break;
5057 case 0xc: /* FMAXNMV, FMINNMV */
5058 case 0xf: /* FMAXV, FMINV */
5059 if (!is_u || !is_q || extract32(size, 0, 1)) {
5060 unallocated_encoding(s);
5061 return;
5062 }
5063 /* Bit 1 of size field encodes min vs max, and actual size is always
5064 * 32 bits: adjust the size variable so following code can rely on it
5065 */
5066 is_min = extract32(size, 1, 1);
5067 is_fp = true;
5068 size = 2;
5069 break;
5070 default:
5071 unallocated_encoding(s);
5072 return;
5073 }
5074
5075 esize = 8 << size;
5076 elements = (is_q ? 128 : 64) / esize;
5077
5078 tcg_res = tcg_temp_new_i64();
5079 tcg_elt = tcg_temp_new_i64();
5080
5081 /* These instructions operate across all lanes of a vector
5082 * to produce a single result. We can guarantee that a 64
5083 * bit intermediate is sufficient:
5084 * + for [US]ADDLV the maximum element size is 32 bits, and
5085 * the result type is 64 bits
5086 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5087 * same as the element size, which is 32 bits at most
5088 * For the integer operations we can choose to work at 64
5089 * or 32 bits and truncate at the end; for simplicity
5090 * we use 64 bits always. The floating point
5091 * ops do require 32 bit intermediates, though.
5092 */
5093 if (!is_fp) {
5094 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5095
5096 for (i = 1; i < elements; i++) {
5097 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5098
5099 switch (opcode) {
5100 case 0x03: /* SADDLV / UADDLV */
5101 case 0x1b: /* ADDV */
5102 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5103 break;
5104 case 0x0a: /* SMAXV / UMAXV */
5105 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5106 tcg_res,
5107 tcg_res, tcg_elt, tcg_res, tcg_elt);
5108 break;
5109 case 0x1a: /* SMINV / UMINV */
5110 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5111 tcg_res,
5112 tcg_res, tcg_elt, tcg_res, tcg_elt);
5113 break;
5114 break;
5115 default:
5116 g_assert_not_reached();
5117 }
5118
5119 }
5120 } else {
5121 /* Floating point ops which work on 32 bit (single) intermediates.
5122 * Note that correct NaN propagation requires that we do these
5123 * operations in exactly the order specified by the pseudocode.
5124 */
5125 TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5126 TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5127 TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5128 TCGv_ptr fpst = get_fpstatus_ptr();
5129
5130 assert(esize == 32);
5131 assert(elements == 4);
5132
5133 read_vec_element(s, tcg_elt, rn, 0, MO_32);
5134 tcg_gen_trunc_i64_i32(tcg_elt1, tcg_elt);
5135 read_vec_element(s, tcg_elt, rn, 1, MO_32);
5136 tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
5137
5138 do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5139
5140 read_vec_element(s, tcg_elt, rn, 2, MO_32);
5141 tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
5142 read_vec_element(s, tcg_elt, rn, 3, MO_32);
5143 tcg_gen_trunc_i64_i32(tcg_elt3, tcg_elt);
5144
5145 do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5146
5147 do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5148
5149 tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5150 tcg_temp_free_i32(tcg_elt1);
5151 tcg_temp_free_i32(tcg_elt2);
5152 tcg_temp_free_i32(tcg_elt3);
5153 tcg_temp_free_ptr(fpst);
5154 }
5155
5156 tcg_temp_free_i64(tcg_elt);
5157
5158 /* Now truncate the result to the width required for the final output */
5159 if (opcode == 0x03) {
5160 /* SADDLV, UADDLV: result is 2*esize */
5161 size++;
5162 }
5163
5164 switch (size) {
5165 case 0:
5166 tcg_gen_ext8u_i64(tcg_res, tcg_res);
5167 break;
5168 case 1:
5169 tcg_gen_ext16u_i64(tcg_res, tcg_res);
5170 break;
5171 case 2:
5172 tcg_gen_ext32u_i64(tcg_res, tcg_res);
5173 break;
5174 case 3:
5175 break;
5176 default:
5177 g_assert_not_reached();
5178 }
5179
5180 write_fp_dreg(s, rd, tcg_res);
5181 tcg_temp_free_i64(tcg_res);
384b26fb
AB
5182}
5183
67bb9389
AB
5184/* C6.3.31 DUP (Element, Vector)
5185 *
5186 * 31 30 29 21 20 16 15 10 9 5 4 0
5187 * +---+---+-------------------+--------+-------------+------+------+
5188 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
5189 * +---+---+-------------------+--------+-------------+------+------+
5190 *
5191 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5192 */
5193static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5194 int imm5)
5195{
5196 int size = ctz32(imm5);
5197 int esize = 8 << size;
5198 int elements = (is_q ? 128 : 64) / esize;
5199 int index, i;
5200 TCGv_i64 tmp;
5201
5202 if (size > 3 || (size == 3 && !is_q)) {
5203 unallocated_encoding(s);
5204 return;
5205 }
5206
5207 index = imm5 >> (size + 1);
5208
5209 tmp = tcg_temp_new_i64();
5210 read_vec_element(s, tmp, rn, index, size);
5211
5212 for (i = 0; i < elements; i++) {
5213 write_vec_element(s, tmp, rd, i, size);
5214 }
5215
5216 if (!is_q) {
5217 clear_vec_high(s, rd);
5218 }
5219
5220 tcg_temp_free_i64(tmp);
5221}
5222
360a6f2d
PM
5223/* C6.3.31 DUP (element, scalar)
5224 * 31 21 20 16 15 10 9 5 4 0
5225 * +-----------------------+--------+-------------+------+------+
5226 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
5227 * +-----------------------+--------+-------------+------+------+
5228 */
5229static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5230 int imm5)
5231{
5232 int size = ctz32(imm5);
5233 int index;
5234 TCGv_i64 tmp;
5235
5236 if (size > 3) {
5237 unallocated_encoding(s);
5238 return;
5239 }
5240
5241 index = imm5 >> (size + 1);
5242
5243 /* This instruction just extracts the specified element and
5244 * zero-extends it into the bottom of the destination register.
5245 */
5246 tmp = tcg_temp_new_i64();
5247 read_vec_element(s, tmp, rn, index, size);
5248 write_fp_dreg(s, rd, tmp);
5249 tcg_temp_free_i64(tmp);
5250}
5251
67bb9389
AB
5252/* C6.3.32 DUP (General)
5253 *
5254 * 31 30 29 21 20 16 15 10 9 5 4 0
5255 * +---+---+-------------------+--------+-------------+------+------+
5256 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd |
5257 * +---+---+-------------------+--------+-------------+------+------+
5258 *
5259 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5260 */
5261static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5262 int imm5)
5263{
5264 int size = ctz32(imm5);
5265 int esize = 8 << size;
5266 int elements = (is_q ? 128 : 64)/esize;
5267 int i = 0;
5268
5269 if (size > 3 || ((size == 3) && !is_q)) {
5270 unallocated_encoding(s);
5271 return;
5272 }
5273 for (i = 0; i < elements; i++) {
5274 write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5275 }
5276 if (!is_q) {
5277 clear_vec_high(s, rd);
5278 }
5279}
5280
5281/* C6.3.150 INS (Element)
5282 *
5283 * 31 21 20 16 15 14 11 10 9 5 4 0
5284 * +-----------------------+--------+------------+---+------+------+
5285 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5286 * +-----------------------+--------+------------+---+------+------+
5287 *
5288 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5289 * index: encoded in imm5<4:size+1>
5290 */
5291static void handle_simd_inse(DisasContext *s, int rd, int rn,
5292 int imm4, int imm5)
5293{
5294 int size = ctz32(imm5);
5295 int src_index, dst_index;
5296 TCGv_i64 tmp;
5297
5298 if (size > 3) {
5299 unallocated_encoding(s);
5300 return;
5301 }
5302 dst_index = extract32(imm5, 1+size, 5);
5303 src_index = extract32(imm4, size, 4);
5304
5305 tmp = tcg_temp_new_i64();
5306
5307 read_vec_element(s, tmp, rn, src_index, size);
5308 write_vec_element(s, tmp, rd, dst_index, size);
5309
5310 tcg_temp_free_i64(tmp);
5311}
5312
5313
5314/* C6.3.151 INS (General)
5315 *
5316 * 31 21 20 16 15 10 9 5 4 0
5317 * +-----------------------+--------+-------------+------+------+
5318 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd |
5319 * +-----------------------+--------+-------------+------+------+
5320 *
5321 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5322 * index: encoded in imm5<4:size+1>
5323 */
5324static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5325{
5326 int size = ctz32(imm5);
5327 int idx;
5328
5329 if (size > 3) {
5330 unallocated_encoding(s);
5331 return;
5332 }
5333
5334 idx = extract32(imm5, 1 + size, 4 - size);
5335 write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5336}
5337
5338/*
5339 * C6.3.321 UMOV (General)
5340 * C6.3.237 SMOV (General)
5341 *
5342 * 31 30 29 21 20 16 15 12 10 9 5 4 0
5343 * +---+---+-------------------+--------+-------------+------+------+
5344 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd |
5345 * +---+---+-------------------+--------+-------------+------+------+
5346 *
5347 * U: unsigned when set
5348 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5349 */
5350static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5351 int rn, int rd, int imm5)
5352{
5353 int size = ctz32(imm5);
5354 int element;
5355 TCGv_i64 tcg_rd;
5356
5357 /* Check for UnallocatedEncodings */
5358 if (is_signed) {
5359 if (size > 2 || (size == 2 && !is_q)) {
5360 unallocated_encoding(s);
5361 return;
5362 }
5363 } else {
5364 if (size > 3
5365 || (size < 3 && is_q)
5366 || (size == 3 && !is_q)) {
5367 unallocated_encoding(s);
5368 return;
5369 }
5370 }
5371 element = extract32(imm5, 1+size, 4);
5372
5373 tcg_rd = cpu_reg(s, rd);
5374 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5375 if (is_signed && !is_q) {
5376 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5377 }
5378}
5379
384b26fb
AB
5380/* C3.6.5 AdvSIMD copy
5381 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
5382 * +---+---+----+-----------------+------+---+------+---+------+------+
5383 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5384 * +---+---+----+-----------------+------+---+------+---+------+------+
5385 */
5386static void disas_simd_copy(DisasContext *s, uint32_t insn)
5387{
67bb9389
AB
5388 int rd = extract32(insn, 0, 5);
5389 int rn = extract32(insn, 5, 5);
5390 int imm4 = extract32(insn, 11, 4);
5391 int op = extract32(insn, 29, 1);
5392 int is_q = extract32(insn, 30, 1);
5393 int imm5 = extract32(insn, 16, 5);
5394
5395 if (op) {
5396 if (is_q) {
5397 /* INS (element) */
5398 handle_simd_inse(s, rd, rn, imm4, imm5);
5399 } else {
5400 unallocated_encoding(s);
5401 }
5402 } else {
5403 switch (imm4) {
5404 case 0:
5405 /* DUP (element - vector) */
5406 handle_simd_dupe(s, is_q, rd, rn, imm5);
5407 break;
5408 case 1:
5409 /* DUP (general) */
5410 handle_simd_dupg(s, is_q, rd, rn, imm5);
5411 break;
5412 case 3:
5413 if (is_q) {
5414 /* INS (general) */
5415 handle_simd_insg(s, rd, rn, imm5);
5416 } else {
5417 unallocated_encoding(s);
5418 }
5419 break;
5420 case 5:
5421 case 7:
5422 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5423 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5424 break;
5425 default:
5426 unallocated_encoding(s);
5427 break;
5428 }
5429 }
384b26fb
AB
5430}
5431
5432/* C3.6.6 AdvSIMD modified immediate
5433 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
5434 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5435 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd |
5436 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
f3f8c4f4
AB
5437 *
5438 * There are a number of operations that can be carried out here:
5439 * MOVI - move (shifted) imm into register
5440 * MVNI - move inverted (shifted) imm into register
5441 * ORR - bitwise OR of (shifted) imm with register
5442 * BIC - bitwise clear of (shifted) imm with register
384b26fb
AB
5443 */
5444static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5445{
f3f8c4f4
AB
5446 int rd = extract32(insn, 0, 5);
5447 int cmode = extract32(insn, 12, 4);
5448 int cmode_3_1 = extract32(cmode, 1, 3);
5449 int cmode_0 = extract32(cmode, 0, 1);
5450 int o2 = extract32(insn, 11, 1);
5451 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5452 bool is_neg = extract32(insn, 29, 1);
5453 bool is_q = extract32(insn, 30, 1);
5454 uint64_t imm = 0;
5455 TCGv_i64 tcg_rd, tcg_imm;
5456 int i;
5457
5458 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5459 unallocated_encoding(s);
5460 return;
5461 }
5462
5463 /* See AdvSIMDExpandImm() in ARM ARM */
5464 switch (cmode_3_1) {
5465 case 0: /* Replicate(Zeros(24):imm8, 2) */
5466 case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5467 case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5468 case 3: /* Replicate(imm8:Zeros(24), 2) */
5469 {
5470 int shift = cmode_3_1 * 8;
5471 imm = bitfield_replicate(abcdefgh << shift, 32);
5472 break;
5473 }
5474 case 4: /* Replicate(Zeros(8):imm8, 4) */
5475 case 5: /* Replicate(imm8:Zeros(8), 4) */
5476 {
5477 int shift = (cmode_3_1 & 0x1) * 8;
5478 imm = bitfield_replicate(abcdefgh << shift, 16);
5479 break;
5480 }
5481 case 6:
5482 if (cmode_0) {
5483 /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5484 imm = (abcdefgh << 16) | 0xffff;
5485 } else {
5486 /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5487 imm = (abcdefgh << 8) | 0xff;
5488 }
5489 imm = bitfield_replicate(imm, 32);
5490 break;
5491 case 7:
5492 if (!cmode_0 && !is_neg) {
5493 imm = bitfield_replicate(abcdefgh, 8);
5494 } else if (!cmode_0 && is_neg) {
5495 int i;
5496 imm = 0;
5497 for (i = 0; i < 8; i++) {
5498 if ((abcdefgh) & (1 << i)) {
5499 imm |= 0xffULL << (i * 8);
5500 }
5501 }
5502 } else if (cmode_0) {
5503 if (is_neg) {
5504 imm = (abcdefgh & 0x3f) << 48;
5505 if (abcdefgh & 0x80) {
5506 imm |= 0x8000000000000000ULL;
5507 }
5508 if (abcdefgh & 0x40) {
5509 imm |= 0x3fc0000000000000ULL;
5510 } else {
5511 imm |= 0x4000000000000000ULL;
5512 }
5513 } else {
5514 imm = (abcdefgh & 0x3f) << 19;
5515 if (abcdefgh & 0x80) {
5516 imm |= 0x80000000;
5517 }
5518 if (abcdefgh & 0x40) {
5519 imm |= 0x3e000000;
5520 } else {
5521 imm |= 0x40000000;
5522 }
5523 imm |= (imm << 32);
5524 }
5525 }
5526 break;
5527 }
5528
5529 if (cmode_3_1 != 7 && is_neg) {
5530 imm = ~imm;
5531 }
5532
5533 tcg_imm = tcg_const_i64(imm);
5534 tcg_rd = new_tmp_a64(s);
5535
5536 for (i = 0; i < 2; i++) {
5537 int foffs = i ? fp_reg_hi_offset(rd) : fp_reg_offset(rd, MO_64);
5538
5539 if (i == 1 && !is_q) {
5540 /* non-quad ops clear high half of vector */
5541 tcg_gen_movi_i64(tcg_rd, 0);
5542 } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
5543 tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
5544 if (is_neg) {
5545 /* AND (BIC) */
5546 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
5547 } else {
5548 /* ORR */
5549 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
5550 }
5551 } else {
5552 /* MOVI */
5553 tcg_gen_mov_i64(tcg_rd, tcg_imm);
5554 }
5555 tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
5556 }
5557
5558 tcg_temp_free_i64(tcg_imm);
384b26fb
AB
5559}
5560
5561/* C3.6.7 AdvSIMD scalar copy
5562 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
5563 * +-----+----+-----------------+------+---+------+---+------+------+
5564 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5565 * +-----+----+-----------------+------+---+------+---+------+------+
5566 */
5567static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
5568{
360a6f2d
PM
5569 int rd = extract32(insn, 0, 5);
5570 int rn = extract32(insn, 5, 5);
5571 int imm4 = extract32(insn, 11, 4);
5572 int imm5 = extract32(insn, 16, 5);
5573 int op = extract32(insn, 29, 1);
5574
5575 if (op != 0 || imm4 != 0) {
5576 unallocated_encoding(s);
5577 return;
5578 }
5579
5580 /* DUP (element, scalar) */
5581 handle_simd_dupes(s, rd, rn, imm5);
384b26fb
AB
5582}
5583
5584/* C3.6.8 AdvSIMD scalar pairwise
5585 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
5586 * +-----+---+-----------+------+-----------+--------+-----+------+------+
5587 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
5588 * +-----+---+-----------+------+-----------+--------+-----+------+------+
5589 */
5590static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
5591{
3720a7ea
PM
5592 int u = extract32(insn, 29, 1);
5593 int size = extract32(insn, 22, 2);
5594 int opcode = extract32(insn, 12, 5);
5595 int rn = extract32(insn, 5, 5);
5596 int rd = extract32(insn, 0, 5);
5597 TCGv_ptr fpst;
5598
5599 /* For some ops (the FP ones), size[1] is part of the encoding.
5600 * For ADDP strictly it is not but size[1] is always 1 for valid
5601 * encodings.
5602 */
5603 opcode |= (extract32(size, 1, 1) << 5);
5604
5605 switch (opcode) {
5606 case 0x3b: /* ADDP */
5607 if (u || size != 3) {
5608 unallocated_encoding(s);
5609 return;
5610 }
5611 TCGV_UNUSED_PTR(fpst);
5612 break;
5613 case 0xc: /* FMAXNMP */
5614 case 0xd: /* FADDP */
5615 case 0xf: /* FMAXP */
5616 case 0x2c: /* FMINNMP */
5617 case 0x2f: /* FMINP */
5618 /* FP op, size[0] is 32 or 64 bit */
5619 if (!u) {
5620 unallocated_encoding(s);
5621 return;
5622 }
5623 size = extract32(size, 0, 1) ? 3 : 2;
5624 fpst = get_fpstatus_ptr();
5625 break;
5626 default:
5627 unallocated_encoding(s);
5628 return;
5629 }
5630
5631 if (size == 3) {
5632 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5633 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5634 TCGv_i64 tcg_res = tcg_temp_new_i64();
5635
5636 read_vec_element(s, tcg_op1, rn, 0, MO_64);
5637 read_vec_element(s, tcg_op2, rn, 1, MO_64);
5638
5639 switch (opcode) {
5640 case 0x3b: /* ADDP */
5641 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
5642 break;
5643 case 0xc: /* FMAXNMP */
5644 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5645 break;
5646 case 0xd: /* FADDP */
5647 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5648 break;
5649 case 0xf: /* FMAXP */
5650 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5651 break;
5652 case 0x2c: /* FMINNMP */
5653 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5654 break;
5655 case 0x2f: /* FMINP */
5656 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5657 break;
5658 default:
5659 g_assert_not_reached();
5660 }
5661
5662 write_fp_dreg(s, rd, tcg_res);
5663
5664 tcg_temp_free_i64(tcg_op1);
5665 tcg_temp_free_i64(tcg_op2);
5666 tcg_temp_free_i64(tcg_res);
5667 } else {
5668 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5669 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5670 TCGv_i32 tcg_res = tcg_temp_new_i32();
5671
5672 read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
5673 read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
5674
5675 switch (opcode) {
5676 case 0xc: /* FMAXNMP */
5677 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
5678 break;
5679 case 0xd: /* FADDP */
5680 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
5681 break;
5682 case 0xf: /* FMAXP */
5683 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
5684 break;
5685 case 0x2c: /* FMINNMP */
5686 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
5687 break;
5688 case 0x2f: /* FMINP */
5689 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
5690 break;
5691 default:
5692 g_assert_not_reached();
5693 }
5694
5695 write_fp_sreg(s, rd, tcg_res);
5696
5697 tcg_temp_free_i32(tcg_op1);
5698 tcg_temp_free_i32(tcg_op2);
5699 tcg_temp_free_i32(tcg_res);
5700 }
5701
5702 if (!TCGV_IS_UNUSED_PTR(fpst)) {
5703 tcg_temp_free_ptr(fpst);
5704 }
384b26fb
AB
5705}
5706
4d1cef84
AB
5707/*
5708 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
5709 *
5710 * This code is handles the common shifting code and is used by both
5711 * the vector and scalar code.
5712 */
5713static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
5714 TCGv_i64 tcg_rnd, bool accumulate,
5715 bool is_u, int size, int shift)
5716{
5717 bool extended_result = false;
5718 bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
5719 int ext_lshift = 0;
5720 TCGv_i64 tcg_src_hi;
5721
5722 if (round && size == 3) {
5723 extended_result = true;
5724 ext_lshift = 64 - shift;
5725 tcg_src_hi = tcg_temp_new_i64();
5726 } else if (shift == 64) {
5727 if (!accumulate && is_u) {
5728 /* result is zero */
5729 tcg_gen_movi_i64(tcg_res, 0);
5730 return;
5731 }
5732 }
5733
5734 /* Deal with the rounding step */
5735 if (round) {
5736 if (extended_result) {
5737 TCGv_i64 tcg_zero = tcg_const_i64(0);
5738 if (!is_u) {
5739 /* take care of sign extending tcg_res */
5740 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
5741 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
5742 tcg_src, tcg_src_hi,
5743 tcg_rnd, tcg_zero);
5744 } else {
5745 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
5746 tcg_src, tcg_zero,
5747 tcg_rnd, tcg_zero);
5748 }
5749 tcg_temp_free_i64(tcg_zero);
5750 } else {
5751 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
5752 }
5753 }
5754
5755 /* Now do the shift right */
5756 if (round && extended_result) {
5757 /* extended case, >64 bit precision required */
5758 if (ext_lshift == 0) {
5759 /* special case, only high bits matter */
5760 tcg_gen_mov_i64(tcg_src, tcg_src_hi);
5761 } else {
5762 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
5763 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
5764 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
5765 }
5766 } else {
5767 if (is_u) {
5768 if (shift == 64) {
5769 /* essentially shifting in 64 zeros */
5770 tcg_gen_movi_i64(tcg_src, 0);
5771 } else {
5772 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
5773 }
5774 } else {
5775 if (shift == 64) {
5776 /* effectively extending the sign-bit */
5777 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
5778 } else {
5779 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
5780 }
5781 }
5782 }
5783
5784 if (accumulate) {
5785 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
5786 } else {
5787 tcg_gen_mov_i64(tcg_res, tcg_src);
5788 }
5789
5790 if (extended_result) {
5791 tcg_temp_free_i64(tcg_src_hi);
5792 }
5793}
5794
5795/* Common SHL/SLI - Shift left with an optional insert */
5796static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
5797 bool insert, int shift)
5798{
5799 if (insert) { /* SLI */
5800 tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
5801 } else { /* SHL */
5802 tcg_gen_shli_i64(tcg_res, tcg_src, shift);
5803 }
5804}
5805
5806/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
5807static void handle_scalar_simd_shri(DisasContext *s,
5808 bool is_u, int immh, int immb,
5809 int opcode, int rn, int rd)
5810{
5811 const int size = 3;
5812 int immhb = immh << 3 | immb;
5813 int shift = 2 * (8 << size) - immhb;
5814 bool accumulate = false;
5815 bool round = false;
5816 TCGv_i64 tcg_rn;
5817 TCGv_i64 tcg_rd;
5818 TCGv_i64 tcg_round;
5819
5820 if (!extract32(immh, 3, 1)) {
5821 unallocated_encoding(s);
5822 return;
5823 }
5824
5825 switch (opcode) {
5826 case 0x02: /* SSRA / USRA (accumulate) */
5827 accumulate = true;
5828 break;
5829 case 0x04: /* SRSHR / URSHR (rounding) */
5830 round = true;
5831 break;
5832 case 0x06: /* SRSRA / URSRA (accum + rounding) */
5833 accumulate = round = true;
5834 break;
5835 }
5836
5837 if (round) {
5838 uint64_t round_const = 1ULL << (shift - 1);
5839 tcg_round = tcg_const_i64(round_const);
5840 } else {
5841 TCGV_UNUSED_I64(tcg_round);
5842 }
5843
5844 tcg_rn = read_fp_dreg(s, rn);
5845 tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
5846
5847 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
5848 accumulate, is_u, size, shift);
5849
5850 write_fp_dreg(s, rd, tcg_rd);
5851
5852 tcg_temp_free_i64(tcg_rn);
5853 tcg_temp_free_i64(tcg_rd);
5854 if (round) {
5855 tcg_temp_free_i64(tcg_round);
5856 }
5857}
5858
5859/* SHL/SLI - Scalar shift left */
5860static void handle_scalar_simd_shli(DisasContext *s, bool insert,
5861 int immh, int immb, int opcode,
5862 int rn, int rd)
5863{
5864 int size = 32 - clz32(immh) - 1;
5865 int immhb = immh << 3 | immb;
5866 int shift = immhb - (8 << size);
5867 TCGv_i64 tcg_rn = new_tmp_a64(s);
5868 TCGv_i64 tcg_rd = new_tmp_a64(s);
5869
5870 if (!extract32(immh, 3, 1)) {
5871 unallocated_encoding(s);
5872 return;
5873 }
5874
5875 tcg_rn = read_fp_dreg(s, rn);
5876 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
5877
5878 handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
5879
5880 write_fp_dreg(s, rd, tcg_rd);
5881
5882 tcg_temp_free_i64(tcg_rn);
5883 tcg_temp_free_i64(tcg_rd);
5884}
5885
384b26fb
AB
5886/* C3.6.9 AdvSIMD scalar shift by immediate
5887 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
5888 * +-----+---+-------------+------+------+--------+---+------+------+
5889 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
5890 * +-----+---+-------------+------+------+--------+---+------+------+
4d1cef84
AB
5891 *
5892 * This is the scalar version so it works on a fixed sized registers
384b26fb
AB
5893 */
5894static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
5895{
4d1cef84
AB
5896 int rd = extract32(insn, 0, 5);
5897 int rn = extract32(insn, 5, 5);
5898 int opcode = extract32(insn, 11, 5);
5899 int immb = extract32(insn, 16, 3);
5900 int immh = extract32(insn, 19, 4);
5901 bool is_u = extract32(insn, 29, 1);
5902
5903 switch (opcode) {
5904 case 0x00: /* SSHR / USHR */
5905 case 0x02: /* SSRA / USRA */
5906 case 0x04: /* SRSHR / URSHR */
5907 case 0x06: /* SRSRA / URSRA */
5908 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
5909 break;
5910 case 0x0a: /* SHL / SLI */
5911 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
5912 break;
5913 default:
5914 unsupported_encoding(s, insn);
5915 break;
5916 }
384b26fb
AB
5917}
5918
5919/* C3.6.10 AdvSIMD scalar three different
5920 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
5921 * +-----+---+-----------+------+---+------+--------+-----+------+------+
5922 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
5923 * +-----+---+-----------+------+---+------+--------+-----+------+------+
5924 */
5925static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
5926{
b033cd3d
PM
5927 bool is_u = extract32(insn, 29, 1);
5928 int size = extract32(insn, 22, 2);
5929 int opcode = extract32(insn, 12, 4);
5930 int rm = extract32(insn, 16, 5);
5931 int rn = extract32(insn, 5, 5);
5932 int rd = extract32(insn, 0, 5);
5933
5934 if (is_u) {
5935 unallocated_encoding(s);
5936 return;
5937 }
5938
5939 switch (opcode) {
5940 case 0x9: /* SQDMLAL, SQDMLAL2 */
5941 case 0xb: /* SQDMLSL, SQDMLSL2 */
5942 case 0xd: /* SQDMULL, SQDMULL2 */
5943 if (size == 0 || size == 3) {
5944 unallocated_encoding(s);
5945 return;
5946 }
5947 break;
5948 default:
5949 unallocated_encoding(s);
5950 return;
5951 }
5952
5953 if (size == 2) {
5954 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5955 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5956 TCGv_i64 tcg_res = tcg_temp_new_i64();
5957
5958 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
5959 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
5960
5961 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
5962 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
5963
5964 switch (opcode) {
5965 case 0xd: /* SQDMULL, SQDMULL2 */
5966 break;
5967 case 0xb: /* SQDMLSL, SQDMLSL2 */
5968 tcg_gen_neg_i64(tcg_res, tcg_res);
5969 /* fall through */
5970 case 0x9: /* SQDMLAL, SQDMLAL2 */
5971 read_vec_element(s, tcg_op1, rd, 0, MO_64);
5972 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
5973 tcg_res, tcg_op1);
5974 break;
5975 default:
5976 g_assert_not_reached();
5977 }
5978
5979 write_fp_dreg(s, rd, tcg_res);
5980
5981 tcg_temp_free_i64(tcg_op1);
5982 tcg_temp_free_i64(tcg_op2);
5983 tcg_temp_free_i64(tcg_res);
5984 } else {
5985 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5986 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5987 TCGv_i64 tcg_res = tcg_temp_new_i64();
5988
5989 read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
5990 read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
5991
5992 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
5993 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
5994
5995 switch (opcode) {
5996 case 0xd: /* SQDMULL, SQDMULL2 */
5997 break;
5998 case 0xb: /* SQDMLSL, SQDMLSL2 */
5999 gen_helper_neon_negl_u32(tcg_res, tcg_res);
6000 /* fall through */
6001 case 0x9: /* SQDMLAL, SQDMLAL2 */
6002 {
6003 TCGv_i64 tcg_op3 = tcg_temp_new_i64();
6004 read_vec_element(s, tcg_op3, rd, 0, MO_32);
6005 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
6006 tcg_res, tcg_op3);
6007 tcg_temp_free_i64(tcg_op3);
6008 break;
6009 }
6010 default:
6011 g_assert_not_reached();
6012 }
6013
6014 tcg_gen_ext32u_i64(tcg_res, tcg_res);
6015 write_fp_dreg(s, rd, tcg_res);
6016
6017 tcg_temp_free_i32(tcg_op1);
6018 tcg_temp_free_i32(tcg_op2);
6019 tcg_temp_free_i64(tcg_res);
6020 }
384b26fb
AB
6021}
6022
b305dba6
PM
6023static void handle_3same_64(DisasContext *s, int opcode, bool u,
6024 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6025{
6026 /* Handle 64x64->64 opcodes which are shared between the scalar
6027 * and vector 3-same groups. We cover every opcode where size == 3
6028 * is valid in either the three-reg-same (integer, not pairwise)
6029 * or scalar-three-reg-same groups. (Some opcodes are not yet
6030 * implemented.)
6031 */
6032 TCGCond cond;
6033
6034 switch (opcode) {
6d9571f7
PM
6035 case 0x1: /* SQADD */
6036 if (u) {
6037 gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6038 } else {
6039 gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6040 }
6041 break;
6042 case 0x5: /* SQSUB */
6043 if (u) {
6044 gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6045 } else {
6046 gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6047 }
6048 break;
b305dba6
PM
6049 case 0x6: /* CMGT, CMHI */
6050 /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6051 * We implement this using setcond (test) and then negating.
6052 */
6053 cond = u ? TCG_COND_GTU : TCG_COND_GT;
6054 do_cmop:
6055 tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
6056 tcg_gen_neg_i64(tcg_rd, tcg_rd);
6057 break;
6058 case 0x7: /* CMGE, CMHS */
6059 cond = u ? TCG_COND_GEU : TCG_COND_GE;
6060 goto do_cmop;
6061 case 0x11: /* CMTST, CMEQ */
6062 if (u) {
6063 cond = TCG_COND_EQ;
6064 goto do_cmop;
6065 }
6066 /* CMTST : test is "if (X & Y != 0)". */
6067 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
6068 tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
6069 tcg_gen_neg_i64(tcg_rd, tcg_rd);
6070 break;
6d9571f7 6071 case 0x8: /* SSHL, USHL */
b305dba6 6072 if (u) {
6d9571f7 6073 gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
b305dba6 6074 } else {
6d9571f7 6075 gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
b305dba6
PM
6076 }
6077 break;
b305dba6 6078 case 0x9: /* SQSHL, UQSHL */
6d9571f7
PM
6079 if (u) {
6080 gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6081 } else {
6082 gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6083 }
6084 break;
b305dba6 6085 case 0xa: /* SRSHL, URSHL */
6d9571f7
PM
6086 if (u) {
6087 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
6088 } else {
6089 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
6090 }
6091 break;
b305dba6 6092 case 0xb: /* SQRSHL, UQRSHL */
6d9571f7
PM
6093 if (u) {
6094 gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6095 } else {
6096 gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6097 }
6098 break;
6099 case 0x10: /* ADD, SUB */
6100 if (u) {
6101 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
6102 } else {
6103 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
6104 }
6105 break;
b305dba6
PM
6106 default:
6107 g_assert_not_reached();
6108 }
6109}
6110
845ea09a
PM
6111/* Handle the 3-same-operands float operations; shared by the scalar
6112 * and vector encodings. The caller must filter out any encodings
6113 * not allocated for the encoding it is dealing with.
6114 */
6115static void handle_3same_float(DisasContext *s, int size, int elements,
6116 int fpopcode, int rd, int rn, int rm)
6117{
6118 int pass;
6119 TCGv_ptr fpst = get_fpstatus_ptr();
6120
6121 for (pass = 0; pass < elements; pass++) {
6122 if (size) {
6123 /* Double */
6124 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6125 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6126 TCGv_i64 tcg_res = tcg_temp_new_i64();
6127
6128 read_vec_element(s, tcg_op1, rn, pass, MO_64);
6129 read_vec_element(s, tcg_op2, rm, pass, MO_64);
6130
6131 switch (fpopcode) {
057d5f62
PM
6132 case 0x39: /* FMLS */
6133 /* As usual for ARM, separate negation for fused multiply-add */
6134 gen_helper_vfp_negd(tcg_op1, tcg_op1);
6135 /* fall through */
6136 case 0x19: /* FMLA */
6137 read_vec_element(s, tcg_res, rd, pass, MO_64);
6138 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
6139 tcg_res, fpst);
6140 break;
845ea09a
PM
6141 case 0x18: /* FMAXNM */
6142 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6143 break;
6144 case 0x1a: /* FADD */
6145 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6146 break;
057d5f62
PM
6147 case 0x1b: /* FMULX */
6148 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
6149 break;
8908f4d1
AB
6150 case 0x1c: /* FCMEQ */
6151 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6152 break;
845ea09a
PM
6153 case 0x1e: /* FMAX */
6154 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6155 break;
057d5f62
PM
6156 case 0x1f: /* FRECPS */
6157 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6158 break;
845ea09a
PM
6159 case 0x38: /* FMINNM */
6160 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6161 break;
6162 case 0x3a: /* FSUB */
6163 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6164 break;
6165 case 0x3e: /* FMIN */
6166 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6167 break;
057d5f62
PM
6168 case 0x3f: /* FRSQRTS */
6169 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6170 break;
845ea09a
PM
6171 case 0x5b: /* FMUL */
6172 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6173 break;
8908f4d1
AB
6174 case 0x5c: /* FCMGE */
6175 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6176 break;
057d5f62
PM
6177 case 0x5d: /* FACGE */
6178 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6179 break;
845ea09a
PM
6180 case 0x5f: /* FDIV */
6181 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6182 break;
6183 case 0x7a: /* FABD */
6184 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6185 gen_helper_vfp_absd(tcg_res, tcg_res);
6186 break;
8908f4d1
AB
6187 case 0x7c: /* FCMGT */
6188 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6189 break;
057d5f62
PM
6190 case 0x7d: /* FACGT */
6191 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6192 break;
845ea09a
PM
6193 default:
6194 g_assert_not_reached();
6195 }
6196
6197 write_vec_element(s, tcg_res, rd, pass, MO_64);
6198
6199 tcg_temp_free_i64(tcg_res);
6200 tcg_temp_free_i64(tcg_op1);
6201 tcg_temp_free_i64(tcg_op2);
6202 } else {
6203 /* Single */
6204 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6205 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6206 TCGv_i32 tcg_res = tcg_temp_new_i32();
6207
6208 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
6209 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
6210
6211 switch (fpopcode) {
057d5f62
PM
6212 case 0x39: /* FMLS */
6213 /* As usual for ARM, separate negation for fused multiply-add */
6214 gen_helper_vfp_negs(tcg_op1, tcg_op1);
6215 /* fall through */
6216 case 0x19: /* FMLA */
6217 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
6218 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
6219 tcg_res, fpst);
6220 break;
845ea09a
PM
6221 case 0x1a: /* FADD */
6222 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6223 break;
057d5f62
PM
6224 case 0x1b: /* FMULX */
6225 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
6226 break;
8908f4d1
AB
6227 case 0x1c: /* FCMEQ */
6228 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6229 break;
845ea09a
PM
6230 case 0x1e: /* FMAX */
6231 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6232 break;
057d5f62
PM
6233 case 0x1f: /* FRECPS */
6234 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6235 break;
845ea09a
PM
6236 case 0x18: /* FMAXNM */
6237 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6238 break;
6239 case 0x38: /* FMINNM */
6240 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6241 break;
6242 case 0x3a: /* FSUB */
6243 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6244 break;
6245 case 0x3e: /* FMIN */
6246 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6247 break;
057d5f62
PM
6248 case 0x3f: /* FRSQRTS */
6249 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6250 break;
845ea09a
PM
6251 case 0x5b: /* FMUL */
6252 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6253 break;
8908f4d1
AB
6254 case 0x5c: /* FCMGE */
6255 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6256 break;
057d5f62
PM
6257 case 0x5d: /* FACGE */
6258 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6259 break;
845ea09a
PM
6260 case 0x5f: /* FDIV */
6261 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6262 break;
6263 case 0x7a: /* FABD */
6264 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6265 gen_helper_vfp_abss(tcg_res, tcg_res);
6266 break;
8908f4d1
AB
6267 case 0x7c: /* FCMGT */
6268 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6269 break;
057d5f62
PM
6270 case 0x7d: /* FACGT */
6271 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6272 break;
845ea09a
PM
6273 default:
6274 g_assert_not_reached();
6275 }
6276
6277 if (elements == 1) {
6278 /* scalar single so clear high part */
6279 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6280
6281 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
6282 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
6283 tcg_temp_free_i64(tcg_tmp);
6284 } else {
6285 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
6286 }
6287
6288 tcg_temp_free_i32(tcg_res);
6289 tcg_temp_free_i32(tcg_op1);
6290 tcg_temp_free_i32(tcg_op2);
6291 }
6292 }
6293
6294 tcg_temp_free_ptr(fpst);
6295
6296 if ((elements << size) < 4) {
6297 /* scalar, or non-quad vector op */
6298 clear_vec_high(s, rd);
6299 }
6300}
6301
384b26fb
AB
6302/* C3.6.11 AdvSIMD scalar three same
6303 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
6304 * +-----+---+-----------+------+---+------+--------+---+------+------+
6305 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
6306 * +-----+---+-----------+------+---+------+--------+---+------+------+
6307 */
6308static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
6309{
b305dba6
PM
6310 int rd = extract32(insn, 0, 5);
6311 int rn = extract32(insn, 5, 5);
6312 int opcode = extract32(insn, 11, 5);
6313 int rm = extract32(insn, 16, 5);
6314 int size = extract32(insn, 22, 2);
6315 bool u = extract32(insn, 29, 1);
b305dba6
PM
6316 TCGv_i64 tcg_rd;
6317
6318 if (opcode >= 0x18) {
6319 /* Floating point: U, size[1] and opcode indicate operation */
6320 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
6321 switch (fpopcode) {
6322 case 0x1b: /* FMULX */
b305dba6
PM
6323 case 0x1f: /* FRECPS */
6324 case 0x3f: /* FRSQRTS */
b305dba6 6325 case 0x5d: /* FACGE */
b305dba6 6326 case 0x7d: /* FACGT */
8908f4d1
AB
6327 case 0x1c: /* FCMEQ */
6328 case 0x5c: /* FCMGE */
6329 case 0x7c: /* FCMGT */
845ea09a
PM
6330 case 0x7a: /* FABD */
6331 break;
b305dba6
PM
6332 default:
6333 unallocated_encoding(s);
6334 return;
6335 }
845ea09a
PM
6336
6337 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
6338 return;
b305dba6
PM
6339 }
6340
6341 switch (opcode) {
6342 case 0x1: /* SQADD, UQADD */
6343 case 0x5: /* SQSUB, UQSUB */
c0b2b5fa
PM
6344 case 0x9: /* SQSHL, UQSHL */
6345 case 0xb: /* SQRSHL, UQRSHL */
6346 break;
6d9571f7
PM
6347 case 0x8: /* SSHL, USHL */
6348 case 0xa: /* SRSHL, URSHL */
b305dba6
PM
6349 case 0x6: /* CMGT, CMHI */
6350 case 0x7: /* CMGE, CMHS */
6351 case 0x11: /* CMTST, CMEQ */
6352 case 0x10: /* ADD, SUB (vector) */
6353 if (size != 3) {
6354 unallocated_encoding(s);
6355 return;
6356 }
6357 break;
b305dba6
PM
6358 case 0x16: /* SQDMULH, SQRDMULH (vector) */
6359 if (size != 1 && size != 2) {
6360 unallocated_encoding(s);
6361 return;
6362 }
c0b2b5fa 6363 break;
b305dba6
PM
6364 default:
6365 unallocated_encoding(s);
6366 return;
6367 }
6368
b305dba6
PM
6369 tcg_rd = tcg_temp_new_i64();
6370
c0b2b5fa
PM
6371 if (size == 3) {
6372 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6373 TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
6374
6375 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
6376 tcg_temp_free_i64(tcg_rn);
6377 tcg_temp_free_i64(tcg_rm);
6378 } else {
6379 /* Do a single operation on the lowest element in the vector.
6380 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
6381 * no side effects for all these operations.
6382 * OPTME: special-purpose helpers would avoid doing some
6383 * unnecessary work in the helper for the 8 and 16 bit cases.
6384 */
6385 NeonGenTwoOpEnvFn *genenvfn;
6386 TCGv_i32 tcg_rn = tcg_temp_new_i32();
6387 TCGv_i32 tcg_rm = tcg_temp_new_i32();
6388 TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
6389
6390 read_vec_element_i32(s, tcg_rn, rn, 0, size);
6391 read_vec_element_i32(s, tcg_rm, rm, 0, size);
6392
6393 switch (opcode) {
6394 case 0x1: /* SQADD, UQADD */
6395 {
6396 static NeonGenTwoOpEnvFn * const fns[3][2] = {
6397 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
6398 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
6399 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
6400 };
6401 genenvfn = fns[size][u];
6402 break;
6403 }
6404 case 0x5: /* SQSUB, UQSUB */
6405 {
6406 static NeonGenTwoOpEnvFn * const fns[3][2] = {
6407 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
6408 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
6409 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
6410 };
6411 genenvfn = fns[size][u];
6412 break;
6413 }
6414 case 0x9: /* SQSHL, UQSHL */
6415 {
6416 static NeonGenTwoOpEnvFn * const fns[3][2] = {
6417 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
6418 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
6419 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
6420 };
6421 genenvfn = fns[size][u];
6422 break;
6423 }
6424 case 0xb: /* SQRSHL, UQRSHL */
6425 {
6426 static NeonGenTwoOpEnvFn * const fns[3][2] = {
6427 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
6428 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
6429 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
6430 };
6431 genenvfn = fns[size][u];
6432 break;
6433 }
6434 case 0x16: /* SQDMULH, SQRDMULH */
6435 {
6436 static NeonGenTwoOpEnvFn * const fns[2][2] = {
6437 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
6438 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
6439 };
6440 assert(size == 1 || size == 2);
6441 genenvfn = fns[size - 1][u];
6442 break;
6443 }
6444 default:
6445 g_assert_not_reached();
6446 }
6447
6448 genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
6449 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
6450 tcg_temp_free_i32(tcg_rd32);
6451 tcg_temp_free_i32(tcg_rn);
6452 tcg_temp_free_i32(tcg_rm);
6453 }
b305dba6
PM
6454
6455 write_fp_dreg(s, rd, tcg_rd);
6456
b305dba6 6457 tcg_temp_free_i64(tcg_rd);
384b26fb
AB
6458}
6459
effa8e06
PM
6460static void handle_2misc_64(DisasContext *s, int opcode, bool u,
6461 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
6462{
6463 /* Handle 64->64 opcodes which are shared between the scalar and
6464 * vector 2-reg-misc groups. We cover every integer opcode where size == 3
f93d0138 6465 * is valid in either group and also the double-precision fp ops.
effa8e06
PM
6466 */
6467 TCGCond cond;
6468
6469 switch (opcode) {
86cbc418
PM
6470 case 0x5: /* NOT */
6471 /* This opcode is shared with CNT and RBIT but we have earlier
6472 * enforced that size == 3 if and only if this is the NOT insn.
6473 */
6474 tcg_gen_not_i64(tcg_rd, tcg_rn);
6475 break;
effa8e06
PM
6476 case 0xa: /* CMLT */
6477 /* 64 bit integer comparison against zero, result is
6478 * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
6479 * subtracting 1.
6480 */
6481 cond = TCG_COND_LT;
6482 do_cmop:
6483 tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
6484 tcg_gen_neg_i64(tcg_rd, tcg_rd);
6485 break;
6486 case 0x8: /* CMGT, CMGE */
6487 cond = u ? TCG_COND_GE : TCG_COND_GT;
6488 goto do_cmop;
6489 case 0x9: /* CMEQ, CMLE */
6490 cond = u ? TCG_COND_LE : TCG_COND_EQ;
6491 goto do_cmop;
6492 case 0xb: /* ABS, NEG */
6493 if (u) {
6494 tcg_gen_neg_i64(tcg_rd, tcg_rn);
6495 } else {
6496 TCGv_i64 tcg_zero = tcg_const_i64(0);
6497 tcg_gen_neg_i64(tcg_rd, tcg_rn);
6498 tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
6499 tcg_rn, tcg_rd);
6500 tcg_temp_free_i64(tcg_zero);
6501 }
6502 break;
f93d0138
PM
6503 case 0x2f: /* FABS */
6504 gen_helper_vfp_absd(tcg_rd, tcg_rn);
6505 break;
6506 case 0x6f: /* FNEG */
6507 gen_helper_vfp_negd(tcg_rd, tcg_rn);
6508 break;
effa8e06
PM
6509 default:
6510 g_assert_not_reached();
6511 }
6512}
6513
8908f4d1
AB
6514static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
6515 bool is_scalar, bool is_u, bool is_q,
6516 int size, int rn, int rd)
6517{
6518 bool is_double = (size == 3);
6519 TCGv_ptr fpst = get_fpstatus_ptr();
6520
6521 if (is_double) {
6522 TCGv_i64 tcg_op = tcg_temp_new_i64();
6523 TCGv_i64 tcg_zero = tcg_const_i64(0);
6524 TCGv_i64 tcg_res = tcg_temp_new_i64();
6525 NeonGenTwoDoubleOPFn *genfn;
6526 bool swap = false;
6527 int pass;
6528
6529 switch (opcode) {
6530 case 0x2e: /* FCMLT (zero) */
6531 swap = true;
6532 /* fallthrough */
6533 case 0x2c: /* FCMGT (zero) */
6534 genfn = gen_helper_neon_cgt_f64;
6535 break;
6536 case 0x2d: /* FCMEQ (zero) */
6537 genfn = gen_helper_neon_ceq_f64;
6538 break;
6539 case 0x6d: /* FCMLE (zero) */
6540 swap = true;
6541 /* fall through */
6542 case 0x6c: /* FCMGE (zero) */
6543 genfn = gen_helper_neon_cge_f64;
6544 break;
6545 default:
6546 g_assert_not_reached();
6547 }
6548
6549 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
6550 read_vec_element(s, tcg_op, rn, pass, MO_64);
6551 if (swap) {
6552 genfn(tcg_res, tcg_zero, tcg_op, fpst);
6553 } else {
6554 genfn(tcg_res, tcg_op, tcg_zero, fpst);
6555 }
6556 write_vec_element(s, tcg_res, rd, pass, MO_64);
6557 }
6558 if (is_scalar) {
6559 clear_vec_high(s, rd);
6560 }
6561
6562 tcg_temp_free_i64(tcg_res);
6563 tcg_temp_free_i64(tcg_zero);
6564 tcg_temp_free_i64(tcg_op);
6565 } else {
6566 TCGv_i32 tcg_op = tcg_temp_new_i32();
6567 TCGv_i32 tcg_zero = tcg_const_i32(0);
6568 TCGv_i32 tcg_res = tcg_temp_new_i32();
6569 NeonGenTwoSingleOPFn *genfn;
6570 bool swap = false;
6571 int pass, maxpasses;
6572
6573 switch (opcode) {
6574 case 0x2e: /* FCMLT (zero) */
6575 swap = true;
6576 /* fall through */
6577 case 0x2c: /* FCMGT (zero) */
6578 genfn = gen_helper_neon_cgt_f32;
6579 break;
6580 case 0x2d: /* FCMEQ (zero) */
6581 genfn = gen_helper_neon_ceq_f32;
6582 break;
6583 case 0x6d: /* FCMLE (zero) */
6584 swap = true;
6585 /* fall through */
6586 case 0x6c: /* FCMGE (zero) */
6587 genfn = gen_helper_neon_cge_f32;
6588 break;
6589 default:
6590 g_assert_not_reached();
6591 }
6592
6593 if (is_scalar) {
6594 maxpasses = 1;
6595 } else {
6596 maxpasses = is_q ? 4 : 2;
6597 }
6598
6599 for (pass = 0; pass < maxpasses; pass++) {
6600 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6601 if (swap) {
6602 genfn(tcg_res, tcg_zero, tcg_op, fpst);
6603 } else {
6604 genfn(tcg_res, tcg_op, tcg_zero, fpst);
6605 }
6606 if (is_scalar) {
6607 write_fp_sreg(s, rd, tcg_res);
6608 } else {
6609 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
6610 }
6611 }
6612 tcg_temp_free_i32(tcg_res);
6613 tcg_temp_free_i32(tcg_zero);
6614 tcg_temp_free_i32(tcg_op);
6615 if (!is_q && !is_scalar) {
6616 clear_vec_high(s, rd);
6617 }
6618 }
6619
6620 tcg_temp_free_ptr(fpst);
6621}
6622
384b26fb
AB
6623/* C3.6.12 AdvSIMD scalar two reg misc
6624 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
6625 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6626 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
6627 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6628 */
6629static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
6630{
effa8e06
PM
6631 int rd = extract32(insn, 0, 5);
6632 int rn = extract32(insn, 5, 5);
6633 int opcode = extract32(insn, 12, 5);
6634 int size = extract32(insn, 22, 2);
6635 bool u = extract32(insn, 29, 1);
6636
6637 switch (opcode) {
6638 case 0xa: /* CMLT */
6639 if (u) {
6640 unallocated_encoding(s);
6641 return;
6642 }
6643 /* fall through */
6644 case 0x8: /* CMGT, CMGE */
6645 case 0x9: /* CMEQ, CMLE */
6646 case 0xb: /* ABS, NEG */
6647 if (size != 3) {
6648 unallocated_encoding(s);
6649 return;
6650 }
6651 break;
8908f4d1
AB
6652 case 0xc ... 0xf:
6653 case 0x16 ... 0x1d:
6654 case 0x1f:
6655 /* Floating point: U, size[1] and opcode indicate operation;
6656 * size[0] indicates single or double precision.
6657 */
6658 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
6659 size = extract32(size, 0, 1) ? 3 : 2;
6660 switch (opcode) {
6661 case 0x2c: /* FCMGT (zero) */
6662 case 0x2d: /* FCMEQ (zero) */
6663 case 0x2e: /* FCMLT (zero) */
6664 case 0x6c: /* FCMGE (zero) */
6665 case 0x6d: /* FCMLE (zero) */
6666 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
6667 return;
6668 case 0x1a: /* FCVTNS */
6669 case 0x1b: /* FCVTMS */
6670 case 0x1c: /* FCVTAS */
6671 case 0x1d: /* SCVTF */
6672 case 0x3a: /* FCVTPS */
6673 case 0x3b: /* FCVTZS */
6674 case 0x3d: /* FRECPE */
6675 case 0x3f: /* FRECPX */
6676 case 0x56: /* FCVTXN, FCVTXN2 */
6677 case 0x5a: /* FCVTNU */
6678 case 0x5b: /* FCVTMU */
6679 case 0x5c: /* FCVTAU */
6680 case 0x5d: /* UCVTF */
6681 case 0x7a: /* FCVTPU */
6682 case 0x7b: /* FCVTZU */
6683 case 0x7d: /* FRSQRTE */
6684 unsupported_encoding(s, insn);
6685 return;
6686 default:
6687 unallocated_encoding(s);
6688 return;
6689 }
6690 break;
effa8e06
PM
6691 default:
6692 /* Other categories of encoding in this class:
effa8e06
PM
6693 * + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64
6694 * + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2:
6695 * narrowing saturate ops: size 64/32/16 -> 32/16/8
6696 */
6697 unsupported_encoding(s, insn);
6698 return;
6699 }
6700
6701 if (size == 3) {
6702 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6703 TCGv_i64 tcg_rd = tcg_temp_new_i64();
6704
6705 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn);
6706 write_fp_dreg(s, rd, tcg_rd);
6707 tcg_temp_free_i64(tcg_rd);
6708 tcg_temp_free_i64(tcg_rn);
6709 } else {
6710 /* the 'size might not be 64' ops aren't implemented yet */
6711 g_assert_not_reached();
6712 }
384b26fb
AB
6713}
6714
4d1cef84
AB
6715/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
6716static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
6717 int immh, int immb, int opcode, int rn, int rd)
6718{
6719 int size = 32 - clz32(immh) - 1;
6720 int immhb = immh << 3 | immb;
6721 int shift = 2 * (8 << size) - immhb;
6722 bool accumulate = false;
6723 bool round = false;
6724 int dsize = is_q ? 128 : 64;
6725 int esize = 8 << size;
6726 int elements = dsize/esize;
6727 TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
6728 TCGv_i64 tcg_rn = new_tmp_a64(s);
6729 TCGv_i64 tcg_rd = new_tmp_a64(s);
6730 TCGv_i64 tcg_round;
6731 int i;
6732
6733 if (extract32(immh, 3, 1) && !is_q) {
6734 unallocated_encoding(s);
6735 return;
6736 }
6737
6738 if (size > 3 && !is_q) {
6739 unallocated_encoding(s);
6740 return;
6741 }
6742
6743 switch (opcode) {
6744 case 0x02: /* SSRA / USRA (accumulate) */
6745 accumulate = true;
6746 break;
6747 case 0x04: /* SRSHR / URSHR (rounding) */
6748 round = true;
6749 break;
6750 case 0x06: /* SRSRA / URSRA (accum + rounding) */
6751 accumulate = round = true;
6752 break;
6753 }
6754
6755 if (round) {
6756 uint64_t round_const = 1ULL << (shift - 1);
6757 tcg_round = tcg_const_i64(round_const);
6758 } else {
6759 TCGV_UNUSED_I64(tcg_round);
6760 }
6761
6762 for (i = 0; i < elements; i++) {
6763 read_vec_element(s, tcg_rn, rn, i, memop);
6764 if (accumulate) {
6765 read_vec_element(s, tcg_rd, rd, i, memop);
6766 }
6767
6768 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6769 accumulate, is_u, size, shift);
6770
6771 write_vec_element(s, tcg_rd, rd, i, size);
6772 }
6773
6774 if (!is_q) {
6775 clear_vec_high(s, rd);
6776 }
6777
6778 if (round) {
6779 tcg_temp_free_i64(tcg_round);
6780 }
6781}
6782
6783/* SHL/SLI - Vector shift left */
6784static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
6785 int immh, int immb, int opcode, int rn, int rd)
6786{
6787 int size = 32 - clz32(immh) - 1;
6788 int immhb = immh << 3 | immb;
6789 int shift = immhb - (8 << size);
6790 int dsize = is_q ? 128 : 64;
6791 int esize = 8 << size;
6792 int elements = dsize/esize;
6793 TCGv_i64 tcg_rn = new_tmp_a64(s);
6794 TCGv_i64 tcg_rd = new_tmp_a64(s);
6795 int i;
6796
6797 if (extract32(immh, 3, 1) && !is_q) {
6798 unallocated_encoding(s);
6799 return;
6800 }
6801
6802 if (size > 3 && !is_q) {
6803 unallocated_encoding(s);
6804 return;
6805 }
6806
6807 for (i = 0; i < elements; i++) {
6808 read_vec_element(s, tcg_rn, rn, i, size);
6809 if (insert) {
6810 read_vec_element(s, tcg_rd, rd, i, size);
6811 }
6812
6813 handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6814
6815 write_vec_element(s, tcg_rd, rd, i, size);
6816 }
6817
6818 if (!is_q) {
6819 clear_vec_high(s, rd);
6820 }
6821}
6822
6823/* USHLL/SHLL - Vector shift left with widening */
6824static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
6825 int immh, int immb, int opcode, int rn, int rd)
6826{
6827 int size = 32 - clz32(immh) - 1;
6828 int immhb = immh << 3 | immb;
6829 int shift = immhb - (8 << size);
6830 int dsize = 64;
6831 int esize = 8 << size;
6832 int elements = dsize/esize;
6833 TCGv_i64 tcg_rn = new_tmp_a64(s);
6834 TCGv_i64 tcg_rd = new_tmp_a64(s);
6835 int i;
6836
6837 if (size >= 3) {
6838 unallocated_encoding(s);
6839 return;
6840 }
6841
6842 /* For the LL variants the store is larger than the load,
6843 * so if rd == rn we would overwrite parts of our input.
6844 * So load everything right now and use shifts in the main loop.
6845 */
6846 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
6847
6848 for (i = 0; i < elements; i++) {
6849 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
6850 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
6851 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
6852 write_vec_element(s, tcg_rd, rd, i, size + 1);
6853 }
6854}
6855
6856
384b26fb
AB
6857/* C3.6.14 AdvSIMD shift by immediate
6858 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
6859 * +---+---+---+-------------+------+------+--------+---+------+------+
6860 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
6861 * +---+---+---+-------------+------+------+--------+---+------+------+
6862 */
6863static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
6864{
4d1cef84
AB
6865 int rd = extract32(insn, 0, 5);
6866 int rn = extract32(insn, 5, 5);
6867 int opcode = extract32(insn, 11, 5);
6868 int immb = extract32(insn, 16, 3);
6869 int immh = extract32(insn, 19, 4);
6870 bool is_u = extract32(insn, 29, 1);
6871 bool is_q = extract32(insn, 30, 1);
6872
6873 switch (opcode) {
6874 case 0x00: /* SSHR / USHR */
6875 case 0x02: /* SSRA / USRA (accumulate) */
6876 case 0x04: /* SRSHR / URSHR (rounding) */
6877 case 0x06: /* SRSRA / URSRA (accum + rounding) */
6878 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
6879 break;
6880 case 0x0a: /* SHL / SLI */
6881 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
6882 break;
6883 case 0x14: /* SSHLL / USHLL */
6884 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
6885 break;
6886 default:
6887 /* We don't currently implement any of the Narrow or saturating shifts;
6888 * nor do we implement the fixed-point conversions in this
6889 * encoding group (SCVTF, FCVTZS, UCVTF, FCVTZU).
6890 */
6891 unsupported_encoding(s, insn);
6892 return;
6893 }
384b26fb
AB
6894}
6895
70d7f984
PM
6896/* Generate code to do a "long" addition or subtraction, ie one done in
6897 * TCGv_i64 on vector lanes twice the width specified by size.
6898 */
6899static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
6900 TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
6901{
6902 static NeonGenTwo64OpFn * const fns[3][2] = {
6903 { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
6904 { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
6905 { tcg_gen_add_i64, tcg_gen_sub_i64 },
6906 };
6907 NeonGenTwo64OpFn *genfn;
6908 assert(size < 3);
6909
6910 genfn = fns[size][is_sub];
6911 genfn(tcg_res, tcg_op1, tcg_op2);
6912}
6913
a08582f4
PM
6914static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
6915 int opcode, int rd, int rn, int rm)
6916{
6917 /* 3-reg-different widening insns: 64 x 64 -> 128 */
6918 TCGv_i64 tcg_res[2];
6919 int pass, accop;
6920
6921 tcg_res[0] = tcg_temp_new_i64();
6922 tcg_res[1] = tcg_temp_new_i64();
6923
6924 /* Does this op do an adding accumulate, a subtracting accumulate,
6925 * or no accumulate at all?
6926 */
6927 switch (opcode) {
6928 case 5:
6929 case 8:
6930 case 9:
6931 accop = 1;
6932 break;
6933 case 10:
6934 case 11:
6935 accop = -1;
6936 break;
6937 default:
6938 accop = 0;
6939 break;
6940 }
6941
6942 if (accop != 0) {
6943 read_vec_element(s, tcg_res[0], rd, 0, MO_64);
6944 read_vec_element(s, tcg_res[1], rd, 1, MO_64);
6945 }
6946
6947 /* size == 2 means two 32x32->64 operations; this is worth special
6948 * casing because we can generally handle it inline.
6949 */
6950 if (size == 2) {
6951 for (pass = 0; pass < 2; pass++) {
6952 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6953 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6954 TCGv_i64 tcg_passres;
6955 TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
6956
6957 int elt = pass + is_q * 2;
6958
6959 read_vec_element(s, tcg_op1, rn, elt, memop);
6960 read_vec_element(s, tcg_op2, rm, elt, memop);
6961
6962 if (accop == 0) {
6963 tcg_passres = tcg_res[pass];
6964 } else {
6965 tcg_passres = tcg_temp_new_i64();
6966 }
6967
6968 switch (opcode) {
70d7f984
PM
6969 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
6970 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
6971 break;
6972 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
6973 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
6974 break;
0ae39320
PM
6975 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
6976 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
6977 {
6978 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
6979 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
6980
6981 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
6982 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
6983 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
6984 tcg_passres,
6985 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
6986 tcg_temp_free_i64(tcg_tmp1);
6987 tcg_temp_free_i64(tcg_tmp2);
6988 break;
6989 }
a08582f4
PM
6990 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
6991 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
6992 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
6993 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
6994 break;
70d7f984
PM
6995 case 9: /* SQDMLAL, SQDMLAL2 */
6996 case 11: /* SQDMLSL, SQDMLSL2 */
6997 case 13: /* SQDMULL, SQDMULL2 */
6998 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
6999 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
7000 tcg_passres, tcg_passres);
7001 break;
a08582f4
PM
7002 default:
7003 g_assert_not_reached();
7004 }
7005
70d7f984
PM
7006 if (opcode == 9 || opcode == 11) {
7007 /* saturating accumulate ops */
7008 if (accop < 0) {
7009 tcg_gen_neg_i64(tcg_passres, tcg_passres);
7010 }
7011 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
7012 tcg_res[pass], tcg_passres);
7013 } else if (accop > 0) {
a08582f4 7014 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
a08582f4
PM
7015 } else if (accop < 0) {
7016 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
70d7f984
PM
7017 }
7018
7019 if (accop != 0) {
a08582f4
PM
7020 tcg_temp_free_i64(tcg_passres);
7021 }
7022
7023 tcg_temp_free_i64(tcg_op1);
7024 tcg_temp_free_i64(tcg_op2);
7025 }
7026 } else {
7027 /* size 0 or 1, generally helper functions */
7028 for (pass = 0; pass < 2; pass++) {
7029 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7030 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7031 TCGv_i64 tcg_passres;
7032 int elt = pass + is_q * 2;
7033
7034 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
7035 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
7036
7037 if (accop == 0) {
7038 tcg_passres = tcg_res[pass];
7039 } else {
7040 tcg_passres = tcg_temp_new_i64();
7041 }
7042
7043 switch (opcode) {
70d7f984
PM
7044 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
7045 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
7046 {
7047 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
7048 static NeonGenWidenFn * const widenfns[2][2] = {
7049 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
7050 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
7051 };
7052 NeonGenWidenFn *widenfn = widenfns[size][is_u];
7053
7054 widenfn(tcg_op2_64, tcg_op2);
7055 widenfn(tcg_passres, tcg_op1);
7056 gen_neon_addl(size, (opcode == 2), tcg_passres,
7057 tcg_passres, tcg_op2_64);
7058 tcg_temp_free_i64(tcg_op2_64);
7059 break;
7060 }
0ae39320
PM
7061 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
7062 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
7063 if (size == 0) {
7064 if (is_u) {
7065 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
7066 } else {
7067 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
7068 }
7069 } else {
7070 if (is_u) {
7071 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
7072 } else {
7073 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
7074 }
7075 }
7076 break;
a08582f4
PM
7077 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
7078 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
7079 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
7080 if (size == 0) {
7081 if (is_u) {
7082 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
7083 } else {
7084 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
7085 }
7086 } else {
7087 if (is_u) {
7088 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
7089 } else {
7090 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
7091 }
7092 }
7093 break;
70d7f984
PM
7094 case 9: /* SQDMLAL, SQDMLAL2 */
7095 case 11: /* SQDMLSL, SQDMLSL2 */
7096 case 13: /* SQDMULL, SQDMULL2 */
7097 assert(size == 1);
7098 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
7099 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
7100 tcg_passres, tcg_passres);
7101 break;
a08582f4
PM
7102 default:
7103 g_assert_not_reached();
7104 }
7105 tcg_temp_free_i32(tcg_op1);
7106 tcg_temp_free_i32(tcg_op2);
7107
70d7f984
PM
7108 if (accop != 0) {
7109 if (opcode == 9 || opcode == 11) {
7110 /* saturating accumulate ops */
7111 if (accop < 0) {
7112 gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
7113 }
7114 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
7115 tcg_res[pass],
7116 tcg_passres);
a08582f4 7117 } else {
70d7f984
PM
7118 gen_neon_addl(size, (accop < 0), tcg_res[pass],
7119 tcg_res[pass], tcg_passres);
a08582f4
PM
7120 }
7121 tcg_temp_free_i64(tcg_passres);
7122 }
7123 }
7124 }
7125
7126 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
7127 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
7128 tcg_temp_free_i64(tcg_res[0]);
7129 tcg_temp_free_i64(tcg_res[1]);
7130}
7131
dfc15c7c
PM
7132static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
7133 int opcode, int rd, int rn, int rm)
7134{
7135 TCGv_i64 tcg_res[2];
7136 int part = is_q ? 2 : 0;
7137 int pass;
7138
7139 for (pass = 0; pass < 2; pass++) {
7140 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7141 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7142 TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
7143 static NeonGenWidenFn * const widenfns[3][2] = {
7144 { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
7145 { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
7146 { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
7147 };
7148 NeonGenWidenFn *widenfn = widenfns[size][is_u];
7149
7150 read_vec_element(s, tcg_op1, rn, pass, MO_64);
7151 read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
7152 widenfn(tcg_op2_wide, tcg_op2);
7153 tcg_temp_free_i32(tcg_op2);
7154 tcg_res[pass] = tcg_temp_new_i64();
7155 gen_neon_addl(size, (opcode == 3),
7156 tcg_res[pass], tcg_op1, tcg_op2_wide);
7157 tcg_temp_free_i64(tcg_op1);
7158 tcg_temp_free_i64(tcg_op2_wide);
7159 }
7160
7161 for (pass = 0; pass < 2; pass++) {
7162 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
7163 tcg_temp_free_i64(tcg_res[pass]);
7164 }
7165}
7166
e4b998d4
PM
7167static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in)
7168{
7169 tcg_gen_shri_i64(in, in, 32);
7170 tcg_gen_trunc_i64_i32(res, in);
7171}
7172
7173static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
7174{
7175 tcg_gen_addi_i64(in, in, 1U << 31);
7176 do_narrow_high_u32(res, in);
7177}
7178
7179static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
7180 int opcode, int rd, int rn, int rm)
7181{
7182 TCGv_i32 tcg_res[2];
7183 int part = is_q ? 2 : 0;
7184 int pass;
7185
7186 for (pass = 0; pass < 2; pass++) {
7187 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7188 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7189 TCGv_i64 tcg_wideres = tcg_temp_new_i64();
7190 static NeonGenNarrowFn * const narrowfns[3][2] = {
7191 { gen_helper_neon_narrow_high_u8,
7192 gen_helper_neon_narrow_round_high_u8 },
7193 { gen_helper_neon_narrow_high_u16,
7194 gen_helper_neon_narrow_round_high_u16 },
7195 { do_narrow_high_u32, do_narrow_round_high_u32 },
7196 };
7197 NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
7198
7199 read_vec_element(s, tcg_op1, rn, pass, MO_64);
7200 read_vec_element(s, tcg_op2, rm, pass, MO_64);
7201
7202 gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
7203
7204 tcg_temp_free_i64(tcg_op1);
7205 tcg_temp_free_i64(tcg_op2);
7206
7207 tcg_res[pass] = tcg_temp_new_i32();
7208 gennarrow(tcg_res[pass], tcg_wideres);
7209 tcg_temp_free_i64(tcg_wideres);
7210 }
7211
7212 for (pass = 0; pass < 2; pass++) {
7213 write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
7214 tcg_temp_free_i32(tcg_res[pass]);
7215 }
7216 if (!is_q) {
7217 clear_vec_high(s, rd);
7218 }
7219}
7220
384b26fb
AB
7221/* C3.6.15 AdvSIMD three different
7222 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
7223 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
7224 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
7225 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
7226 */
7227static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
7228{
a08582f4
PM
7229 /* Instructions in this group fall into three basic classes
7230 * (in each case with the operation working on each element in
7231 * the input vectors):
7232 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
7233 * 128 bit input)
7234 * (2) wide 64 x 128 -> 128
7235 * (3) narrowing 128 x 128 -> 64
7236 * Here we do initial decode, catch unallocated cases and
7237 * dispatch to separate functions for each class.
7238 */
7239 int is_q = extract32(insn, 30, 1);
7240 int is_u = extract32(insn, 29, 1);
7241 int size = extract32(insn, 22, 2);
7242 int opcode = extract32(insn, 12, 4);
7243 int rm = extract32(insn, 16, 5);
7244 int rn = extract32(insn, 5, 5);
7245 int rd = extract32(insn, 0, 5);
7246
7247 switch (opcode) {
7248 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
7249 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
7250 /* 64 x 128 -> 128 */
dfc15c7c
PM
7251 if (size == 3) {
7252 unallocated_encoding(s);
7253 return;
7254 }
7255 handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
a08582f4
PM
7256 break;
7257 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
7258 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
7259 /* 128 x 128 -> 64 */
e4b998d4
PM
7260 if (size == 3) {
7261 unallocated_encoding(s);
7262 return;
7263 }
7264 handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
a08582f4 7265 break;
70d7f984
PM
7266 case 14: /* PMULL, PMULL2 */
7267 if (is_u || size == 1 || size == 2) {
7268 unallocated_encoding(s);
7269 return;
7270 }
7271 unsupported_encoding(s, insn);
7272 break;
13caf1fd
PM
7273 case 9: /* SQDMLAL, SQDMLAL2 */
7274 case 11: /* SQDMLSL, SQDMLSL2 */
7275 case 13: /* SQDMULL, SQDMULL2 */
70d7f984 7276 if (is_u || size == 0) {
a08582f4
PM
7277 unallocated_encoding(s);
7278 return;
7279 }
7280 /* fall through */
13caf1fd
PM
7281 case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
7282 case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
13caf1fd
PM
7283 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
7284 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
7285 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
7286 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
7287 case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
a08582f4
PM
7288 /* 64 x 64 -> 128 */
7289 if (size == 3) {
7290 unallocated_encoding(s);
7291 return;
7292 }
7293 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
7294 break;
7295 default:
7296 /* opcode 15 not allocated */
7297 unallocated_encoding(s);
7298 break;
7299 }
384b26fb
AB
7300}
7301
e1cea114
PM
7302/* Logic op (opcode == 3) subgroup of C3.6.16. */
7303static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
7304{
956d272e
PM
7305 int rd = extract32(insn, 0, 5);
7306 int rn = extract32(insn, 5, 5);
7307 int rm = extract32(insn, 16, 5);
7308 int size = extract32(insn, 22, 2);
7309 bool is_u = extract32(insn, 29, 1);
7310 bool is_q = extract32(insn, 30, 1);
7311 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7312 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7313 TCGv_i64 tcg_res[2];
7314 int pass;
7315
7316 tcg_res[0] = tcg_temp_new_i64();
7317 tcg_res[1] = tcg_temp_new_i64();
7318
7319 for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
7320 read_vec_element(s, tcg_op1, rn, pass, MO_64);
7321 read_vec_element(s, tcg_op2, rm, pass, MO_64);
7322
7323 if (!is_u) {
7324 switch (size) {
7325 case 0: /* AND */
7326 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
7327 break;
7328 case 1: /* BIC */
7329 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
7330 break;
7331 case 2: /* ORR */
7332 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
7333 break;
7334 case 3: /* ORN */
7335 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
7336 break;
7337 }
7338 } else {
7339 if (size != 0) {
7340 /* B* ops need res loaded to operate on */
7341 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
7342 }
7343
7344 switch (size) {
7345 case 0: /* EOR */
7346 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
7347 break;
7348 case 1: /* BSL bitwise select */
7349 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
7350 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
7351 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
7352 break;
7353 case 2: /* BIT, bitwise insert if true */
7354 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
7355 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
7356 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
7357 break;
7358 case 3: /* BIF, bitwise insert if false */
7359 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
7360 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
7361 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
7362 break;
7363 }
7364 }
7365 }
7366
7367 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
7368 if (!is_q) {
7369 tcg_gen_movi_i64(tcg_res[1], 0);
7370 }
7371 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
7372
7373 tcg_temp_free_i64(tcg_op1);
7374 tcg_temp_free_i64(tcg_op2);
7375 tcg_temp_free_i64(tcg_res[0]);
7376 tcg_temp_free_i64(tcg_res[1]);
e1cea114
PM
7377}
7378
8b12a0cf
PM
7379/* Helper functions for 32 bit comparisons */
7380static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7381{
7382 tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
7383}
7384
7385static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7386{
7387 tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
7388}
7389
7390static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7391{
7392 tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
7393}
7394
7395static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7396{
7397 tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
7398}
7399
bc242f9b
AB
7400/* Pairwise op subgroup of C3.6.16.
7401 *
7402 * This is called directly or via the handle_3same_float for float pairwise
7403 * operations where the opcode and size are calculated differently.
7404 */
7405static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
7406 int size, int rn, int rm, int rd)
e1cea114 7407{
bc242f9b 7408 TCGv_ptr fpst;
0173a005
PM
7409 int pass;
7410
bc242f9b
AB
7411 /* Floating point operations need fpst */
7412 if (opcode >= 0x58) {
7413 fpst = get_fpstatus_ptr();
7414 } else {
7415 TCGV_UNUSED_PTR(fpst);
0173a005
PM
7416 }
7417
7418 /* These operations work on the concatenated rm:rn, with each pair of
7419 * adjacent elements being operated on to produce an element in the result.
7420 */
7421 if (size == 3) {
7422 TCGv_i64 tcg_res[2];
7423
7424 for (pass = 0; pass < 2; pass++) {
7425 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7426 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7427 int passreg = (pass == 0) ? rn : rm;
7428
7429 read_vec_element(s, tcg_op1, passreg, 0, MO_64);
7430 read_vec_element(s, tcg_op2, passreg, 1, MO_64);
7431 tcg_res[pass] = tcg_temp_new_i64();
7432
bc242f9b
AB
7433 switch (opcode) {
7434 case 0x17: /* ADDP */
7435 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
7436 break;
7437 case 0x58: /* FMAXNMP */
7438 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7439 break;
7440 case 0x5a: /* FADDP */
7441 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7442 break;
7443 case 0x5e: /* FMAXP */
7444 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7445 break;
7446 case 0x78: /* FMINNMP */
7447 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7448 break;
7449 case 0x7e: /* FMINP */
7450 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7451 break;
7452 default:
7453 g_assert_not_reached();
7454 }
0173a005
PM
7455
7456 tcg_temp_free_i64(tcg_op1);
7457 tcg_temp_free_i64(tcg_op2);
7458 }
7459
7460 for (pass = 0; pass < 2; pass++) {
7461 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
7462 tcg_temp_free_i64(tcg_res[pass]);
7463 }
7464 } else {
7465 int maxpass = is_q ? 4 : 2;
7466 TCGv_i32 tcg_res[4];
7467
7468 for (pass = 0; pass < maxpass; pass++) {
7469 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7470 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
bc242f9b 7471 NeonGenTwoOpFn *genfn = NULL;
0173a005
PM
7472 int passreg = pass < (maxpass / 2) ? rn : rm;
7473 int passelt = (is_q && (pass & 1)) ? 2 : 0;
7474
7475 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
7476 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
7477 tcg_res[pass] = tcg_temp_new_i32();
7478
7479 switch (opcode) {
7480 case 0x17: /* ADDP */
7481 {
7482 static NeonGenTwoOpFn * const fns[3] = {
7483 gen_helper_neon_padd_u8,
7484 gen_helper_neon_padd_u16,
7485 tcg_gen_add_i32,
7486 };
7487 genfn = fns[size];
7488 break;
7489 }
7490 case 0x14: /* SMAXP, UMAXP */
7491 {
7492 static NeonGenTwoOpFn * const fns[3][2] = {
7493 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
7494 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
7495 { gen_max_s32, gen_max_u32 },
7496 };
7497 genfn = fns[size][u];
7498 break;
7499 }
7500 case 0x15: /* SMINP, UMINP */
7501 {
7502 static NeonGenTwoOpFn * const fns[3][2] = {
7503 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
7504 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
7505 { gen_min_s32, gen_min_u32 },
7506 };
7507 genfn = fns[size][u];
7508 break;
7509 }
bc242f9b
AB
7510 /* The FP operations are all on single floats (32 bit) */
7511 case 0x58: /* FMAXNMP */
7512 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7513 break;
7514 case 0x5a: /* FADDP */
7515 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7516 break;
7517 case 0x5e: /* FMAXP */
7518 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7519 break;
7520 case 0x78: /* FMINNMP */
7521 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7522 break;
7523 case 0x7e: /* FMINP */
7524 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7525 break;
0173a005
PM
7526 default:
7527 g_assert_not_reached();
7528 }
7529
bc242f9b
AB
7530 /* FP ops called directly, otherwise call now */
7531 if (genfn) {
7532 genfn(tcg_res[pass], tcg_op1, tcg_op2);
7533 }
0173a005
PM
7534
7535 tcg_temp_free_i32(tcg_op1);
7536 tcg_temp_free_i32(tcg_op2);
7537 }
7538
7539 for (pass = 0; pass < maxpass; pass++) {
7540 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
7541 tcg_temp_free_i32(tcg_res[pass]);
7542 }
7543 if (!is_q) {
7544 clear_vec_high(s, rd);
7545 }
7546 }
bc242f9b
AB
7547
7548 if (!TCGV_IS_UNUSED_PTR(fpst)) {
7549 tcg_temp_free_ptr(fpst);
7550 }
e1cea114
PM
7551}
7552
7553/* Floating point op subgroup of C3.6.16. */
7554static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
7555{
845ea09a
PM
7556 /* For floating point ops, the U, size[1] and opcode bits
7557 * together indicate the operation. size[0] indicates single
7558 * or double.
7559 */
7560 int fpopcode = extract32(insn, 11, 5)
7561 | (extract32(insn, 23, 1) << 5)
7562 | (extract32(insn, 29, 1) << 6);
7563 int is_q = extract32(insn, 30, 1);
7564 int size = extract32(insn, 22, 1);
7565 int rm = extract32(insn, 16, 5);
7566 int rn = extract32(insn, 5, 5);
7567 int rd = extract32(insn, 0, 5);
7568
7569 int datasize = is_q ? 128 : 64;
7570 int esize = 32 << size;
7571 int elements = datasize / esize;
7572
7573 if (size == 1 && !is_q) {
7574 unallocated_encoding(s);
7575 return;
7576 }
7577
7578 switch (fpopcode) {
7579 case 0x58: /* FMAXNMP */
7580 case 0x5a: /* FADDP */
7581 case 0x5e: /* FMAXP */
7582 case 0x78: /* FMINNMP */
7583 case 0x7e: /* FMINP */
bc242f9b
AB
7584 if (size && !is_q) {
7585 unallocated_encoding(s);
7586 return;
7587 }
7588 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
7589 rn, rm, rd);
845ea09a
PM
7590 return;
7591 case 0x1b: /* FMULX */
845ea09a
PM
7592 case 0x1f: /* FRECPS */
7593 case 0x3f: /* FRSQRTS */
845ea09a 7594 case 0x5d: /* FACGE */
845ea09a
PM
7595 case 0x7d: /* FACGT */
7596 case 0x19: /* FMLA */
7597 case 0x39: /* FMLS */
845ea09a
PM
7598 case 0x18: /* FMAXNM */
7599 case 0x1a: /* FADD */
8908f4d1 7600 case 0x1c: /* FCMEQ */
845ea09a
PM
7601 case 0x1e: /* FMAX */
7602 case 0x38: /* FMINNM */
7603 case 0x3a: /* FSUB */
7604 case 0x3e: /* FMIN */
7605 case 0x5b: /* FMUL */
8908f4d1 7606 case 0x5c: /* FCMGE */
845ea09a
PM
7607 case 0x5f: /* FDIV */
7608 case 0x7a: /* FABD */
8908f4d1 7609 case 0x7c: /* FCMGT */
845ea09a
PM
7610 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
7611 return;
7612 default:
7613 unallocated_encoding(s);
7614 return;
7615 }
e1cea114
PM
7616}
7617
7618/* Integer op subgroup of C3.6.16. */
7619static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
7620{
1f8a73af
PM
7621 int is_q = extract32(insn, 30, 1);
7622 int u = extract32(insn, 29, 1);
7623 int size = extract32(insn, 22, 2);
7624 int opcode = extract32(insn, 11, 5);
7625 int rm = extract32(insn, 16, 5);
7626 int rn = extract32(insn, 5, 5);
7627 int rd = extract32(insn, 0, 5);
7628 int pass;
7629
7630 switch (opcode) {
7631 case 0x13: /* MUL, PMUL */
7632 if (u && size != 0) {
7633 unallocated_encoding(s);
7634 return;
7635 }
7636 /* fall through */
7637 case 0x0: /* SHADD, UHADD */
7638 case 0x2: /* SRHADD, URHADD */
7639 case 0x4: /* SHSUB, UHSUB */
7640 case 0xc: /* SMAX, UMAX */
7641 case 0xd: /* SMIN, UMIN */
7642 case 0xe: /* SABD, UABD */
7643 case 0xf: /* SABA, UABA */
7644 case 0x12: /* MLA, MLS */
7645 if (size == 3) {
7646 unallocated_encoding(s);
7647 return;
7648 }
8b12a0cf 7649 break;
1f8a73af
PM
7650 case 0x16: /* SQDMULH, SQRDMULH */
7651 if (size == 0 || size == 3) {
7652 unallocated_encoding(s);
7653 return;
7654 }
8b12a0cf 7655 break;
1f8a73af
PM
7656 default:
7657 if (size == 3 && !is_q) {
7658 unallocated_encoding(s);
7659 return;
7660 }
7661 break;
7662 }
7663
7664 if (size == 3) {
7665 for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
7666 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7667 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7668 TCGv_i64 tcg_res = tcg_temp_new_i64();
7669
7670 read_vec_element(s, tcg_op1, rn, pass, MO_64);
7671 read_vec_element(s, tcg_op2, rm, pass, MO_64);
7672
7673 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
7674
7675 write_vec_element(s, tcg_res, rd, pass, MO_64);
7676
7677 tcg_temp_free_i64(tcg_res);
7678 tcg_temp_free_i64(tcg_op1);
7679 tcg_temp_free_i64(tcg_op2);
7680 }
7681 } else {
7682 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
7683 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7684 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7685 TCGv_i32 tcg_res = tcg_temp_new_i32();
6d9571f7
PM
7686 NeonGenTwoOpFn *genfn = NULL;
7687 NeonGenTwoOpEnvFn *genenvfn = NULL;
1f8a73af
PM
7688
7689 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7690 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7691
7692 switch (opcode) {
8b12a0cf
PM
7693 case 0x0: /* SHADD, UHADD */
7694 {
7695 static NeonGenTwoOpFn * const fns[3][2] = {
7696 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
7697 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
7698 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
7699 };
7700 genfn = fns[size][u];
7701 break;
7702 }
6d9571f7
PM
7703 case 0x1: /* SQADD, UQADD */
7704 {
7705 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7706 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7707 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7708 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7709 };
7710 genenvfn = fns[size][u];
7711 break;
7712 }
8b12a0cf
PM
7713 case 0x2: /* SRHADD, URHADD */
7714 {
7715 static NeonGenTwoOpFn * const fns[3][2] = {
7716 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
7717 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
7718 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
7719 };
7720 genfn = fns[size][u];
7721 break;
7722 }
7723 case 0x4: /* SHSUB, UHSUB */
7724 {
7725 static NeonGenTwoOpFn * const fns[3][2] = {
7726 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
7727 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
7728 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
7729 };
7730 genfn = fns[size][u];
7731 break;
7732 }
6d9571f7
PM
7733 case 0x5: /* SQSUB, UQSUB */
7734 {
7735 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7736 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7737 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7738 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7739 };
7740 genenvfn = fns[size][u];
7741 break;
7742 }
1f8a73af
PM
7743 case 0x6: /* CMGT, CMHI */
7744 {
7745 static NeonGenTwoOpFn * const fns[3][2] = {
7746 { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
7747 { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
7748 { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
7749 };
7750 genfn = fns[size][u];
7751 break;
7752 }
7753 case 0x7: /* CMGE, CMHS */
7754 {
7755 static NeonGenTwoOpFn * const fns[3][2] = {
7756 { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
7757 { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
7758 { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
7759 };
7760 genfn = fns[size][u];
7761 break;
7762 }
6d9571f7
PM
7763 case 0x8: /* SSHL, USHL */
7764 {
7765 static NeonGenTwoOpFn * const fns[3][2] = {
7766 { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
7767 { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
7768 { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
7769 };
7770 genfn = fns[size][u];
7771 break;
7772 }
7773 case 0x9: /* SQSHL, UQSHL */
7774 {
7775 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7776 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7777 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7778 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7779 };
7780 genenvfn = fns[size][u];
7781 break;
7782 }
7783 case 0xa: /* SRSHL, URSHL */
7784 {
7785 static NeonGenTwoOpFn * const fns[3][2] = {
7786 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
7787 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
7788 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
7789 };
7790 genfn = fns[size][u];
7791 break;
7792 }
7793 case 0xb: /* SQRSHL, UQRSHL */
7794 {
7795 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7796 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7797 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7798 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7799 };
7800 genenvfn = fns[size][u];
7801 break;
7802 }
8b12a0cf
PM
7803 case 0xc: /* SMAX, UMAX */
7804 {
7805 static NeonGenTwoOpFn * const fns[3][2] = {
7806 { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
7807 { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
7808 { gen_max_s32, gen_max_u32 },
7809 };
7810 genfn = fns[size][u];
7811 break;
7812 }
7813
7814 case 0xd: /* SMIN, UMIN */
7815 {
7816 static NeonGenTwoOpFn * const fns[3][2] = {
7817 { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
7818 { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
7819 { gen_min_s32, gen_min_u32 },
7820 };
7821 genfn = fns[size][u];
7822 break;
7823 }
7824 case 0xe: /* SABD, UABD */
7825 case 0xf: /* SABA, UABA */
7826 {
7827 static NeonGenTwoOpFn * const fns[3][2] = {
7828 { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
7829 { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
7830 { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
7831 };
7832 genfn = fns[size][u];
7833 break;
7834 }
1f8a73af
PM
7835 case 0x10: /* ADD, SUB */
7836 {
7837 static NeonGenTwoOpFn * const fns[3][2] = {
7838 { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
7839 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
7840 { tcg_gen_add_i32, tcg_gen_sub_i32 },
7841 };
7842 genfn = fns[size][u];
7843 break;
7844 }
7845 case 0x11: /* CMTST, CMEQ */
7846 {
7847 static NeonGenTwoOpFn * const fns[3][2] = {
7848 { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
7849 { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
7850 { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
7851 };
7852 genfn = fns[size][u];
7853 break;
7854 }
8b12a0cf
PM
7855 case 0x13: /* MUL, PMUL */
7856 if (u) {
7857 /* PMUL */
7858 assert(size == 0);
7859 genfn = gen_helper_neon_mul_p8;
7860 break;
7861 }
7862 /* fall through : MUL */
7863 case 0x12: /* MLA, MLS */
7864 {
7865 static NeonGenTwoOpFn * const fns[3] = {
7866 gen_helper_neon_mul_u8,
7867 gen_helper_neon_mul_u16,
7868 tcg_gen_mul_i32,
7869 };
7870 genfn = fns[size];
7871 break;
7872 }
7873 case 0x16: /* SQDMULH, SQRDMULH */
7874 {
7875 static NeonGenTwoOpEnvFn * const fns[2][2] = {
7876 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7877 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7878 };
7879 assert(size == 1 || size == 2);
7880 genenvfn = fns[size - 1][u];
7881 break;
7882 }
1f8a73af
PM
7883 default:
7884 g_assert_not_reached();
7885 }
7886
6d9571f7
PM
7887 if (genenvfn) {
7888 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
7889 } else {
7890 genfn(tcg_res, tcg_op1, tcg_op2);
7891 }
1f8a73af 7892
8b12a0cf
PM
7893 if (opcode == 0xf || opcode == 0x12) {
7894 /* SABA, UABA, MLA, MLS: accumulating ops */
7895 static NeonGenTwoOpFn * const fns[3][2] = {
7896 { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
7897 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
7898 { tcg_gen_add_i32, tcg_gen_sub_i32 },
7899 };
7900 bool is_sub = (opcode == 0x12 && u); /* MLS */
7901
7902 genfn = fns[size][is_sub];
7903 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
7904 genfn(tcg_res, tcg_res, tcg_op1);
7905 }
7906
1f8a73af
PM
7907 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7908
7909 tcg_temp_free_i32(tcg_res);
7910 tcg_temp_free_i32(tcg_op1);
7911 tcg_temp_free_i32(tcg_op2);
7912 }
7913 }
7914
7915 if (!is_q) {
7916 clear_vec_high(s, rd);
7917 }
e1cea114
PM
7918}
7919
384b26fb
AB
7920/* C3.6.16 AdvSIMD three same
7921 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
7922 * +---+---+---+-----------+------+---+------+--------+---+------+------+
7923 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
7924 * +---+---+---+-----------+------+---+------+--------+---+------+------+
7925 */
7926static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
7927{
e1cea114
PM
7928 int opcode = extract32(insn, 11, 5);
7929
7930 switch (opcode) {
7931 case 0x3: /* logic ops */
7932 disas_simd_3same_logic(s, insn);
7933 break;
7934 case 0x17: /* ADDP */
7935 case 0x14: /* SMAXP, UMAXP */
7936 case 0x15: /* SMINP, UMINP */
bc242f9b 7937 {
e1cea114 7938 /* Pairwise operations */
bc242f9b
AB
7939 int is_q = extract32(insn, 30, 1);
7940 int u = extract32(insn, 29, 1);
7941 int size = extract32(insn, 22, 2);
7942 int rm = extract32(insn, 16, 5);
7943 int rn = extract32(insn, 5, 5);
7944 int rd = extract32(insn, 0, 5);
7945 if (opcode == 0x17) {
7946 if (u || (size == 3 && !is_q)) {
7947 unallocated_encoding(s);
7948 return;
7949 }
7950 } else {
7951 if (size == 3) {
7952 unallocated_encoding(s);
7953 return;
7954 }
7955 }
7956 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
e1cea114 7957 break;
bc242f9b 7958 }
e1cea114
PM
7959 case 0x18 ... 0x31:
7960 /* floating point ops, sz[1] and U are part of opcode */
7961 disas_simd_3same_float(s, insn);
7962 break;
7963 default:
7964 disas_simd_3same_int(s, insn);
7965 break;
7966 }
384b26fb
AB
7967}
7968
d980fd59
PM
7969static void handle_2misc_narrow(DisasContext *s, int opcode, bool u, bool is_q,
7970 int size, int rn, int rd)
7971{
7972 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7973 * in the source becomes a size element in the destination).
7974 */
7975 int pass;
7976 TCGv_i32 tcg_res[2];
7977 int destelt = is_q ? 2 : 0;
7978
7979 for (pass = 0; pass < 2; pass++) {
7980 TCGv_i64 tcg_op = tcg_temp_new_i64();
7981 NeonGenNarrowFn *genfn = NULL;
7982 NeonGenNarrowEnvFn *genenvfn = NULL;
7983
7984 read_vec_element(s, tcg_op, rn, pass, MO_64);
7985 tcg_res[pass] = tcg_temp_new_i32();
7986
7987 switch (opcode) {
7988 case 0x12: /* XTN, SQXTUN */
7989 {
7990 static NeonGenNarrowFn * const xtnfns[3] = {
7991 gen_helper_neon_narrow_u8,
7992 gen_helper_neon_narrow_u16,
7993 tcg_gen_trunc_i64_i32,
7994 };
7995 static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7996 gen_helper_neon_unarrow_sat8,
7997 gen_helper_neon_unarrow_sat16,
7998 gen_helper_neon_unarrow_sat32,
7999 };
8000 if (u) {
8001 genenvfn = sqxtunfns[size];
8002 } else {
8003 genfn = xtnfns[size];
8004 }
8005 break;
8006 }
8007 case 0x14: /* SQXTN, UQXTN */
8008 {
8009 static NeonGenNarrowEnvFn * const fns[3][2] = {
8010 { gen_helper_neon_narrow_sat_s8,
8011 gen_helper_neon_narrow_sat_u8 },
8012 { gen_helper_neon_narrow_sat_s16,
8013 gen_helper_neon_narrow_sat_u16 },
8014 { gen_helper_neon_narrow_sat_s32,
8015 gen_helper_neon_narrow_sat_u32 },
8016 };
8017 genenvfn = fns[size][u];
8018 break;
8019 }
8020 default:
8021 g_assert_not_reached();
8022 }
8023
8024 if (genfn) {
8025 genfn(tcg_res[pass], tcg_op);
8026 } else {
8027 genenvfn(tcg_res[pass], cpu_env, tcg_op);
8028 }
8029
8030 tcg_temp_free_i64(tcg_op);
8031 }
8032
8033 for (pass = 0; pass < 2; pass++) {
8034 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
8035 tcg_temp_free_i32(tcg_res[pass]);
8036 }
8037 if (!is_q) {
8038 clear_vec_high(s, rd);
8039 }
8040}
8041
39d82118
AB
8042static void handle_rev(DisasContext *s, int opcode, bool u,
8043 bool is_q, int size, int rn, int rd)
8044{
8045 int op = (opcode << 1) | u;
8046 int opsz = op + size;
8047 int grp_size = 3 - opsz;
8048 int dsize = is_q ? 128 : 64;
8049 int i;
8050
8051 if (opsz >= 3) {
8052 unallocated_encoding(s);
8053 return;
8054 }
8055
8056 if (size == 0) {
8057 /* Special case bytes, use bswap op on each group of elements */
8058 int groups = dsize / (8 << grp_size);
8059
8060 for (i = 0; i < groups; i++) {
8061 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8062
8063 read_vec_element(s, tcg_tmp, rn, i, grp_size);
8064 switch (grp_size) {
8065 case MO_16:
8066 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
8067 break;
8068 case MO_32:
8069 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
8070 break;
8071 case MO_64:
8072 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
8073 break;
8074 default:
8075 g_assert_not_reached();
8076 }
8077 write_vec_element(s, tcg_tmp, rd, i, grp_size);
8078 tcg_temp_free_i64(tcg_tmp);
8079 }
8080 if (!is_q) {
8081 clear_vec_high(s, rd);
8082 }
8083 } else {
8084 int revmask = (1 << grp_size) - 1;
8085 int esize = 8 << size;
8086 int elements = dsize / esize;
8087 TCGv_i64 tcg_rn = tcg_temp_new_i64();
8088 TCGv_i64 tcg_rd = tcg_const_i64(0);
8089 TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
8090
8091 for (i = 0; i < elements; i++) {
8092 int e_rev = (i & 0xf) ^ revmask;
8093 int off = e_rev * esize;
8094 read_vec_element(s, tcg_rn, rn, i, size);
8095 if (off >= 64) {
8096 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
8097 tcg_rn, off - 64, esize);
8098 } else {
8099 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
8100 }
8101 }
8102 write_vec_element(s, tcg_rd, rd, 0, MO_64);
8103 write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
8104
8105 tcg_temp_free_i64(tcg_rd_hi);
8106 tcg_temp_free_i64(tcg_rd);
8107 tcg_temp_free_i64(tcg_rn);
8108 }
8109}
8110
384b26fb
AB
8111/* C3.6.17 AdvSIMD two reg misc
8112 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
8113 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
8114 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
8115 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
8116 */
8117static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
8118{
45aecc6d
PM
8119 int size = extract32(insn, 22, 2);
8120 int opcode = extract32(insn, 12, 5);
8121 bool u = extract32(insn, 29, 1);
8122 bool is_q = extract32(insn, 30, 1);
94b6c911
PM
8123 int rn = extract32(insn, 5, 5);
8124 int rd = extract32(insn, 0, 5);
45aecc6d
PM
8125
8126 switch (opcode) {
8127 case 0x0: /* REV64, REV32 */
8128 case 0x1: /* REV16 */
39d82118 8129 handle_rev(s, opcode, u, is_q, size, rn, rd);
45aecc6d 8130 return;
86cbc418
PM
8131 case 0x5: /* CNT, NOT, RBIT */
8132 if (u && size == 0) {
8133 /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
8134 size = 3;
8135 break;
8136 } else if (u && size == 1) {
8137 /* RBIT */
8138 break;
8139 } else if (!u && size == 0) {
8140 /* CNT */
8141 break;
45aecc6d 8142 }
86cbc418 8143 unallocated_encoding(s);
45aecc6d 8144 return;
d980fd59
PM
8145 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
8146 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
8147 if (size == 3) {
8148 unallocated_encoding(s);
8149 return;
8150 }
8151 handle_2misc_narrow(s, opcode, u, is_q, size, rn, rd);
8152 return;
45aecc6d
PM
8153 case 0x2: /* SADDLP, UADDLP */
8154 case 0x4: /* CLS, CLZ */
8155 case 0x6: /* SADALP, UADALP */
45aecc6d
PM
8156 if (size == 3) {
8157 unallocated_encoding(s);
8158 return;
8159 }
8160 unsupported_encoding(s, insn);
8161 return;
8162 case 0x13: /* SHLL, SHLL2 */
8163 if (u == 0 || size == 3) {
8164 unallocated_encoding(s);
8165 return;
8166 }
8167 unsupported_encoding(s, insn);
8168 return;
8169 case 0xa: /* CMLT */
8170 if (u == 1) {
8171 unallocated_encoding(s);
8172 return;
8173 }
8174 /* fall through */
45aecc6d
PM
8175 case 0x8: /* CMGT, CMGE */
8176 case 0x9: /* CMEQ, CMLE */
8177 case 0xb: /* ABS, NEG */
94b6c911
PM
8178 if (size == 3 && !is_q) {
8179 unallocated_encoding(s);
8180 return;
8181 }
8182 break;
8183 case 0x3: /* SUQADD, USQADD */
8184 case 0x7: /* SQABS, SQNEG */
45aecc6d
PM
8185 if (size == 3 && !is_q) {
8186 unallocated_encoding(s);
8187 return;
8188 }
8189 unsupported_encoding(s, insn);
8190 return;
8191 case 0xc ... 0xf:
8192 case 0x16 ... 0x1d:
8193 case 0x1f:
8194 {
8195 /* Floating point: U, size[1] and opcode indicate operation;
8196 * size[0] indicates single or double precision.
8197 */
8198 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8199 size = extract32(size, 0, 1) ? 3 : 2;
8200 switch (opcode) {
f93d0138
PM
8201 case 0x2f: /* FABS */
8202 case 0x6f: /* FNEG */
8203 if (size == 3 && !is_q) {
8204 unallocated_encoding(s);
8205 return;
8206 }
8207 break;
8908f4d1
AB
8208 case 0x2c: /* FCMGT (zero) */
8209 case 0x2d: /* FCMEQ (zero) */
8210 case 0x2e: /* FCMLT (zero) */
8211 case 0x6c: /* FCMGE (zero) */
8212 case 0x6d: /* FCMLE (zero) */
8213 if (size == 3 && !is_q) {
8214 unallocated_encoding(s);
8215 return;
8216 }
8217 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
8218 return;
45aecc6d
PM
8219 case 0x16: /* FCVTN, FCVTN2 */
8220 case 0x17: /* FCVTL, FCVTL2 */
8221 case 0x18: /* FRINTN */
8222 case 0x19: /* FRINTM */
8223 case 0x1a: /* FCVTNS */
8224 case 0x1b: /* FCVTMS */
8225 case 0x1c: /* FCVTAS */
8226 case 0x1d: /* SCVTF */
45aecc6d
PM
8227 case 0x38: /* FRINTP */
8228 case 0x39: /* FRINTZ */
8229 case 0x3a: /* FCVTPS */
8230 case 0x3b: /* FCVTZS */
8231 case 0x3c: /* URECPE */
8232 case 0x3d: /* FRECPE */
8233 case 0x56: /* FCVTXN, FCVTXN2 */
8234 case 0x58: /* FRINTA */
8235 case 0x59: /* FRINTX */
8236 case 0x5a: /* FCVTNU */
8237 case 0x5b: /* FCVTMU */
8238 case 0x5c: /* FCVTAU */
8239 case 0x5d: /* UCVTF */
45aecc6d
PM
8240 case 0x79: /* FRINTI */
8241 case 0x7a: /* FCVTPU */
8242 case 0x7b: /* FCVTZU */
8243 case 0x7c: /* URSQRTE */
8244 case 0x7d: /* FRSQRTE */
8245 case 0x7f: /* FSQRT */
8246 unsupported_encoding(s, insn);
8247 return;
8248 default:
8249 unallocated_encoding(s);
8250 return;
8251 }
8252 break;
8253 }
8254 default:
8255 unallocated_encoding(s);
8256 return;
8257 }
94b6c911
PM
8258
8259 if (size == 3) {
8260 /* All 64-bit element operations can be shared with scalar 2misc */
8261 int pass;
8262
8263 for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8264 TCGv_i64 tcg_op = tcg_temp_new_i64();
8265 TCGv_i64 tcg_res = tcg_temp_new_i64();
8266
8267 read_vec_element(s, tcg_op, rn, pass, MO_64);
8268
8269 handle_2misc_64(s, opcode, u, tcg_res, tcg_op);
8270
8271 write_vec_element(s, tcg_res, rd, pass, MO_64);
8272
8273 tcg_temp_free_i64(tcg_res);
8274 tcg_temp_free_i64(tcg_op);
8275 }
8276 } else {
8277 int pass;
8278
8279 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
8280 TCGv_i32 tcg_op = tcg_temp_new_i32();
8281 TCGv_i32 tcg_res = tcg_temp_new_i32();
8282 TCGCond cond;
8283
8284 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
8285
8286 if (size == 2) {
8287 /* Special cases for 32 bit elements */
8288 switch (opcode) {
8289 case 0xa: /* CMLT */
8290 /* 32 bit integer comparison against zero, result is
8291 * test ? (2^32 - 1) : 0. We implement via setcond(test)
8292 * and inverting.
8293 */
8294 cond = TCG_COND_LT;
8295 do_cmop:
8296 tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
8297 tcg_gen_neg_i32(tcg_res, tcg_res);
8298 break;
8299 case 0x8: /* CMGT, CMGE */
8300 cond = u ? TCG_COND_GE : TCG_COND_GT;
8301 goto do_cmop;
8302 case 0x9: /* CMEQ, CMLE */
8303 cond = u ? TCG_COND_LE : TCG_COND_EQ;
8304 goto do_cmop;
8305 case 0xb: /* ABS, NEG */
8306 if (u) {
8307 tcg_gen_neg_i32(tcg_res, tcg_op);
8308 } else {
8309 TCGv_i32 tcg_zero = tcg_const_i32(0);
8310 tcg_gen_neg_i32(tcg_res, tcg_op);
8311 tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
8312 tcg_zero, tcg_op, tcg_res);
8313 tcg_temp_free_i32(tcg_zero);
8314 }
8315 break;
f93d0138
PM
8316 case 0x2f: /* FABS */
8317 gen_helper_vfp_abss(tcg_res, tcg_op);
8318 break;
8319 case 0x6f: /* FNEG */
8320 gen_helper_vfp_negs(tcg_res, tcg_op);
8321 break;
94b6c911
PM
8322 default:
8323 g_assert_not_reached();
8324 }
8325 } else {
8326 /* Use helpers for 8 and 16 bit elements */
8327 switch (opcode) {
86cbc418
PM
8328 case 0x5: /* CNT, RBIT */
8329 /* For these two insns size is part of the opcode specifier
8330 * (handled earlier); they always operate on byte elements.
8331 */
8332 if (u) {
8333 gen_helper_neon_rbit_u8(tcg_res, tcg_op);
8334 } else {
8335 gen_helper_neon_cnt_u8(tcg_res, tcg_op);
8336 }
8337 break;
94b6c911
PM
8338 case 0x8: /* CMGT, CMGE */
8339 case 0x9: /* CMEQ, CMLE */
8340 case 0xa: /* CMLT */
8341 {
8342 static NeonGenTwoOpFn * const fns[3][2] = {
8343 { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
8344 { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
8345 { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
8346 };
8347 NeonGenTwoOpFn *genfn;
8348 int comp;
8349 bool reverse;
8350 TCGv_i32 tcg_zero = tcg_const_i32(0);
8351
8352 /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
8353 comp = (opcode - 0x8) * 2 + u;
8354 /* ...but LE, LT are implemented as reverse GE, GT */
8355 reverse = (comp > 2);
8356 if (reverse) {
8357 comp = 4 - comp;
8358 }
8359 genfn = fns[comp][size];
8360 if (reverse) {
8361 genfn(tcg_res, tcg_zero, tcg_op);
8362 } else {
8363 genfn(tcg_res, tcg_op, tcg_zero);
8364 }
8365 tcg_temp_free_i32(tcg_zero);
8366 break;
8367 }
8368 case 0xb: /* ABS, NEG */
8369 if (u) {
8370 TCGv_i32 tcg_zero = tcg_const_i32(0);
8371 if (size) {
8372 gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
8373 } else {
8374 gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
8375 }
8376 tcg_temp_free_i32(tcg_zero);
8377 } else {
8378 if (size) {
8379 gen_helper_neon_abs_s16(tcg_res, tcg_op);
8380 } else {
8381 gen_helper_neon_abs_s8(tcg_res, tcg_op);
8382 }
8383 }
8384 break;
8385 default:
8386 g_assert_not_reached();
8387 }
8388 }
8389
8390 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8391
8392 tcg_temp_free_i32(tcg_res);
8393 tcg_temp_free_i32(tcg_op);
8394 }
8395 }
8396 if (!is_q) {
8397 clear_vec_high(s, rd);
8398 }
384b26fb
AB
8399}
8400
9f82e0ff
PM
8401/* C3.6.13 AdvSIMD scalar x indexed element
8402 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
8403 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
8404 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
8405 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
8406 * C3.6.18 AdvSIMD vector x indexed element
384b26fb
AB
8407 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
8408 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
8409 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
8410 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
8411 */
9f82e0ff 8412static void disas_simd_indexed(DisasContext *s, uint32_t insn)
384b26fb 8413{
f5e51e7f
PM
8414 /* This encoding has two kinds of instruction:
8415 * normal, where we perform elt x idxelt => elt for each
8416 * element in the vector
8417 * long, where we perform elt x idxelt and generate a result of
8418 * double the width of the input element
8419 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
8420 */
9f82e0ff 8421 bool is_scalar = extract32(insn, 28, 1);
f5e51e7f
PM
8422 bool is_q = extract32(insn, 30, 1);
8423 bool u = extract32(insn, 29, 1);
8424 int size = extract32(insn, 22, 2);
8425 int l = extract32(insn, 21, 1);
8426 int m = extract32(insn, 20, 1);
8427 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
8428 int rm = extract32(insn, 16, 4);
8429 int opcode = extract32(insn, 12, 4);
8430 int h = extract32(insn, 11, 1);
8431 int rn = extract32(insn, 5, 5);
8432 int rd = extract32(insn, 0, 5);
8433 bool is_long = false;
8434 bool is_fp = false;
8435 int index;
8436 TCGv_ptr fpst;
8437
8438 switch (opcode) {
8439 case 0x0: /* MLA */
8440 case 0x4: /* MLS */
9f82e0ff 8441 if (!u || is_scalar) {
f5e51e7f
PM
8442 unallocated_encoding(s);
8443 return;
8444 }
8445 break;
8446 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8447 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8448 case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
9f82e0ff
PM
8449 if (is_scalar) {
8450 unallocated_encoding(s);
8451 return;
8452 }
f5e51e7f
PM
8453 is_long = true;
8454 break;
8455 case 0x3: /* SQDMLAL, SQDMLAL2 */
8456 case 0x7: /* SQDMLSL, SQDMLSL2 */
8457 case 0xb: /* SQDMULL, SQDMULL2 */
8458 is_long = true;
8459 /* fall through */
8460 case 0xc: /* SQDMULH */
8461 case 0xd: /* SQRDMULH */
f5e51e7f
PM
8462 if (u) {
8463 unallocated_encoding(s);
8464 return;
8465 }
8466 break;
9f82e0ff
PM
8467 case 0x8: /* MUL */
8468 if (u || is_scalar) {
8469 unallocated_encoding(s);
8470 return;
8471 }
8472 break;
f5e51e7f
PM
8473 case 0x1: /* FMLA */
8474 case 0x5: /* FMLS */
8475 if (u) {
8476 unallocated_encoding(s);
8477 return;
8478 }
8479 /* fall through */
8480 case 0x9: /* FMUL, FMULX */
8481 if (!extract32(size, 1, 1)) {
8482 unallocated_encoding(s);
8483 return;
8484 }
8485 is_fp = true;
8486 break;
8487 default:
8488 unallocated_encoding(s);
8489 return;
8490 }
8491
8492 if (is_fp) {
8493 /* low bit of size indicates single/double */
8494 size = extract32(size, 0, 1) ? 3 : 2;
8495 if (size == 2) {
8496 index = h << 1 | l;
8497 } else {
8498 if (l || !is_q) {
8499 unallocated_encoding(s);
8500 return;
8501 }
8502 index = h;
8503 }
8504 rm |= (m << 4);
8505 } else {
8506 switch (size) {
8507 case 1:
8508 index = h << 2 | l << 1 | m;
8509 break;
8510 case 2:
8511 index = h << 1 | l;
8512 rm |= (m << 4);
8513 break;
8514 default:
8515 unallocated_encoding(s);
8516 return;
8517 }
8518 }
8519
f5e51e7f
PM
8520 if (is_fp) {
8521 fpst = get_fpstatus_ptr();
8522 } else {
8523 TCGV_UNUSED_PTR(fpst);
8524 }
8525
8526 if (size == 3) {
8527 TCGv_i64 tcg_idx = tcg_temp_new_i64();
8528 int pass;
8529
8530 assert(is_fp && is_q && !is_long);
8531
8532 read_vec_element(s, tcg_idx, rm, index, MO_64);
8533
9f82e0ff 8534 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
f5e51e7f
PM
8535 TCGv_i64 tcg_op = tcg_temp_new_i64();
8536 TCGv_i64 tcg_res = tcg_temp_new_i64();
8537
8538 read_vec_element(s, tcg_op, rn, pass, MO_64);
8539
8540 switch (opcode) {
8541 case 0x5: /* FMLS */
8542 /* As usual for ARM, separate negation for fused multiply-add */
8543 gen_helper_vfp_negd(tcg_op, tcg_op);
8544 /* fall through */
8545 case 0x1: /* FMLA */
8546 read_vec_element(s, tcg_res, rd, pass, MO_64);
8547 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
8548 break;
8549 case 0x9: /* FMUL, FMULX */
8550 if (u) {
8551 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
8552 } else {
8553 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
8554 }
8555 break;
8556 default:
8557 g_assert_not_reached();
8558 }
8559
8560 write_vec_element(s, tcg_res, rd, pass, MO_64);
8561 tcg_temp_free_i64(tcg_op);
8562 tcg_temp_free_i64(tcg_res);
8563 }
8564
9f82e0ff
PM
8565 if (is_scalar) {
8566 clear_vec_high(s, rd);
8567 }
8568
f5e51e7f
PM
8569 tcg_temp_free_i64(tcg_idx);
8570 } else if (!is_long) {
9f82e0ff
PM
8571 /* 32 bit floating point, or 16 or 32 bit integer.
8572 * For the 16 bit scalar case we use the usual Neon helpers and
8573 * rely on the fact that 0 op 0 == 0 with no side effects.
8574 */
f5e51e7f 8575 TCGv_i32 tcg_idx = tcg_temp_new_i32();
9f82e0ff
PM
8576 int pass, maxpasses;
8577
8578 if (is_scalar) {
8579 maxpasses = 1;
8580 } else {
8581 maxpasses = is_q ? 4 : 2;
8582 }
f5e51e7f
PM
8583
8584 read_vec_element_i32(s, tcg_idx, rm, index, size);
8585
9f82e0ff 8586 if (size == 1 && !is_scalar) {
f5e51e7f
PM
8587 /* The simplest way to handle the 16x16 indexed ops is to duplicate
8588 * the index into both halves of the 32 bit tcg_idx and then use
8589 * the usual Neon helpers.
8590 */
8591 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
8592 }
8593
9f82e0ff 8594 for (pass = 0; pass < maxpasses; pass++) {
f5e51e7f
PM
8595 TCGv_i32 tcg_op = tcg_temp_new_i32();
8596 TCGv_i32 tcg_res = tcg_temp_new_i32();
8597
9f82e0ff 8598 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
f5e51e7f
PM
8599
8600 switch (opcode) {
8601 case 0x0: /* MLA */
8602 case 0x4: /* MLS */
8603 case 0x8: /* MUL */
8604 {
8605 static NeonGenTwoOpFn * const fns[2][2] = {
8606 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
8607 { tcg_gen_add_i32, tcg_gen_sub_i32 },
8608 };
8609 NeonGenTwoOpFn *genfn;
8610 bool is_sub = opcode == 0x4;
8611
8612 if (size == 1) {
8613 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
8614 } else {
8615 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
8616 }
8617 if (opcode == 0x8) {
8618 break;
8619 }
8620 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8621 genfn = fns[size - 1][is_sub];
8622 genfn(tcg_res, tcg_op, tcg_res);
8623 break;
8624 }
8625 case 0x5: /* FMLS */
8626 /* As usual for ARM, separate negation for fused multiply-add */
8627 gen_helper_vfp_negs(tcg_op, tcg_op);
8628 /* fall through */
8629 case 0x1: /* FMLA */
8630 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8631 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
8632 break;
8633 case 0x9: /* FMUL, FMULX */
8634 if (u) {
8635 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
8636 } else {
8637 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
8638 }
8639 break;
8640 case 0xc: /* SQDMULH */
8641 if (size == 1) {
8642 gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
8643 tcg_op, tcg_idx);
8644 } else {
8645 gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
8646 tcg_op, tcg_idx);
8647 }
8648 break;
8649 case 0xd: /* SQRDMULH */
8650 if (size == 1) {
8651 gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
8652 tcg_op, tcg_idx);
8653 } else {
8654 gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
8655 tcg_op, tcg_idx);
8656 }
8657 break;
8658 default:
8659 g_assert_not_reached();
8660 }
8661
9f82e0ff
PM
8662 if (is_scalar) {
8663 write_fp_sreg(s, rd, tcg_res);
8664 } else {
8665 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8666 }
8667
f5e51e7f
PM
8668 tcg_temp_free_i32(tcg_op);
8669 tcg_temp_free_i32(tcg_res);
8670 }
8671
8672 tcg_temp_free_i32(tcg_idx);
8673
8674 if (!is_q) {
8675 clear_vec_high(s, rd);
8676 }
8677 } else {
8678 /* long ops: 16x16->32 or 32x32->64 */
c44ad1fd
PM
8679 TCGv_i64 tcg_res[2];
8680 int pass;
8681 bool satop = extract32(opcode, 0, 1);
8682 TCGMemOp memop = MO_32;
8683
8684 if (satop || !u) {
8685 memop |= MO_SIGN;
8686 }
8687
8688 if (size == 2) {
8689 TCGv_i64 tcg_idx = tcg_temp_new_i64();
8690
8691 read_vec_element(s, tcg_idx, rm, index, memop);
8692
9f82e0ff 8693 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
c44ad1fd
PM
8694 TCGv_i64 tcg_op = tcg_temp_new_i64();
8695 TCGv_i64 tcg_passres;
9f82e0ff 8696 int passelt;
c44ad1fd 8697
9f82e0ff
PM
8698 if (is_scalar) {
8699 passelt = 0;
8700 } else {
8701 passelt = pass + (is_q * 2);
8702 }
8703
8704 read_vec_element(s, tcg_op, rn, passelt, memop);
c44ad1fd
PM
8705
8706 tcg_res[pass] = tcg_temp_new_i64();
8707
8708 if (opcode == 0xa || opcode == 0xb) {
8709 /* Non-accumulating ops */
8710 tcg_passres = tcg_res[pass];
8711 } else {
8712 tcg_passres = tcg_temp_new_i64();
8713 }
8714
8715 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
8716 tcg_temp_free_i64(tcg_op);
8717
8718 if (satop) {
8719 /* saturating, doubling */
8720 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8721 tcg_passres, tcg_passres);
8722 }
8723
8724 if (opcode == 0xa || opcode == 0xb) {
8725 continue;
8726 }
8727
8728 /* Accumulating op: handle accumulate step */
8729 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8730
8731 switch (opcode) {
8732 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8733 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8734 break;
8735 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8736 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8737 break;
8738 case 0x7: /* SQDMLSL, SQDMLSL2 */
8739 tcg_gen_neg_i64(tcg_passres, tcg_passres);
8740 /* fall through */
8741 case 0x3: /* SQDMLAL, SQDMLAL2 */
8742 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8743 tcg_res[pass],
8744 tcg_passres);
8745 break;
8746 default:
8747 g_assert_not_reached();
8748 }
8749 tcg_temp_free_i64(tcg_passres);
8750 }
8751 tcg_temp_free_i64(tcg_idx);
9f82e0ff
PM
8752
8753 if (is_scalar) {
8754 clear_vec_high(s, rd);
8755 }
c44ad1fd
PM
8756 } else {
8757 TCGv_i32 tcg_idx = tcg_temp_new_i32();
8758
8759 assert(size == 1);
8760 read_vec_element_i32(s, tcg_idx, rm, index, size);
8761
9f82e0ff
PM
8762 if (!is_scalar) {
8763 /* The simplest way to handle the 16x16 indexed ops is to
8764 * duplicate the index into both halves of the 32 bit tcg_idx
8765 * and then use the usual Neon helpers.
8766 */
8767 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
8768 }
c44ad1fd 8769
9f82e0ff 8770 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
c44ad1fd
PM
8771 TCGv_i32 tcg_op = tcg_temp_new_i32();
8772 TCGv_i64 tcg_passres;
8773
9f82e0ff
PM
8774 if (is_scalar) {
8775 read_vec_element_i32(s, tcg_op, rn, pass, size);
8776 } else {
8777 read_vec_element_i32(s, tcg_op, rn,
8778 pass + (is_q * 2), MO_32);
8779 }
8780
c44ad1fd
PM
8781 tcg_res[pass] = tcg_temp_new_i64();
8782
8783 if (opcode == 0xa || opcode == 0xb) {
8784 /* Non-accumulating ops */
8785 tcg_passres = tcg_res[pass];
8786 } else {
8787 tcg_passres = tcg_temp_new_i64();
8788 }
8789
8790 if (memop & MO_SIGN) {
8791 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
8792 } else {
8793 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
8794 }
8795 if (satop) {
8796 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8797 tcg_passres, tcg_passres);
8798 }
8799 tcg_temp_free_i32(tcg_op);
8800
8801 if (opcode == 0xa || opcode == 0xb) {
8802 continue;
8803 }
8804
8805 /* Accumulating op: handle accumulate step */
8806 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8807
8808 switch (opcode) {
8809 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8810 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
8811 tcg_passres);
8812 break;
8813 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8814 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
8815 tcg_passres);
8816 break;
8817 case 0x7: /* SQDMLSL, SQDMLSL2 */
8818 gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8819 /* fall through */
8820 case 0x3: /* SQDMLAL, SQDMLAL2 */
8821 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8822 tcg_res[pass],
8823 tcg_passres);
8824 break;
8825 default:
8826 g_assert_not_reached();
8827 }
8828 tcg_temp_free_i64(tcg_passres);
8829 }
8830 tcg_temp_free_i32(tcg_idx);
9f82e0ff
PM
8831
8832 if (is_scalar) {
8833 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
8834 }
8835 }
8836
8837 if (is_scalar) {
8838 tcg_res[1] = tcg_const_i64(0);
c44ad1fd
PM
8839 }
8840
8841 for (pass = 0; pass < 2; pass++) {
8842 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8843 tcg_temp_free_i64(tcg_res[pass]);
8844 }
f5e51e7f
PM
8845 }
8846
8847 if (!TCGV_IS_UNUSED_PTR(fpst)) {
8848 tcg_temp_free_ptr(fpst);
8849 }
384b26fb
AB
8850}
8851
8852/* C3.6.19 Crypto AES
8853 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
8854 * +-----------------+------+-----------+--------+-----+------+------+
8855 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
8856 * +-----------------+------+-----------+--------+-----+------+------+
8857 */
8858static void disas_crypto_aes(DisasContext *s, uint32_t insn)
8859{
8860 unsupported_encoding(s, insn);
8861}
8862
8863/* C3.6.20 Crypto three-reg SHA
8864 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
8865 * +-----------------+------+---+------+---+--------+-----+------+------+
8866 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd |
8867 * +-----------------+------+---+------+---+--------+-----+------+------+
8868 */
8869static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
8870{
8871 unsupported_encoding(s, insn);
8872}
8873
8874/* C3.6.21 Crypto two-reg SHA
8875 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
8876 * +-----------------+------+-----------+--------+-----+------+------+
8877 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
8878 * +-----------------+------+-----------+--------+-----+------+------+
8879 */
8880static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
8881{
8882 unsupported_encoding(s, insn);
8883}
8884
8885/* C3.6 Data processing - SIMD, inc Crypto
8886 *
8887 * As the decode gets a little complex we are using a table based
8888 * approach for this part of the decode.
8889 */
8890static const AArch64DecodeTable data_proc_simd[] = {
8891 /* pattern , mask , fn */
8892 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
8893 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
8894 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
8895 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
8896 { 0x0e000400, 0x9fe08400, disas_simd_copy },
9f82e0ff 8897 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
384b26fb
AB
8898 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
8899 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
8900 { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
8901 { 0x0e000000, 0xbf208c00, disas_simd_tb },
8902 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
8903 { 0x2e000000, 0xbf208400, disas_simd_ext },
8904 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
8905 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
8906 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
8907 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
8908 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
9f82e0ff 8909 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
384b26fb
AB
8910 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
8911 { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
8912 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
8913 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
8914 { 0x00000000, 0x00000000, NULL }
8915};
8916
faa0ba46
PM
8917static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
8918{
8919 /* Note that this is called with all non-FP cases from
8920 * table C3-6 so it must UNDEF for entries not specifically
8921 * allocated to instructions in that table.
8922 */
384b26fb
AB
8923 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
8924 if (fn) {
8925 fn(s, insn);
8926 } else {
8927 unallocated_encoding(s);
8928 }
faa0ba46
PM
8929}
8930
ad7ee8a2
CF
8931/* C3.6 Data processing - SIMD and floating point */
8932static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
8933{
faa0ba46
PM
8934 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
8935 disas_data_proc_fp(s, insn);
8936 } else {
8937 /* SIMD, including crypto */
8938 disas_data_proc_simd(s, insn);
8939 }
ad7ee8a2
CF
8940}
8941
8942/* C3.1 A64 instruction index by encoding */
40f860cd 8943static void disas_a64_insn(CPUARMState *env, DisasContext *s)
14ade10f
AG
8944{
8945 uint32_t insn;
8946
8947 insn = arm_ldl_code(env, s->pc, s->bswap_code);
8948 s->insn = insn;
8949 s->pc += 4;
8950
ad7ee8a2
CF
8951 switch (extract32(insn, 25, 4)) {
8952 case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
14ade10f
AG
8953 unallocated_encoding(s);
8954 break;
ad7ee8a2
CF
8955 case 0x8: case 0x9: /* Data processing - immediate */
8956 disas_data_proc_imm(s, insn);
8957 break;
8958 case 0xa: case 0xb: /* Branch, exception generation and system insns */
8959 disas_b_exc_sys(s, insn);
8960 break;
8961 case 0x4:
8962 case 0x6:
8963 case 0xc:
8964 case 0xe: /* Loads and stores */
8965 disas_ldst(s, insn);
8966 break;
8967 case 0x5:
8968 case 0xd: /* Data processing - register */
8969 disas_data_proc_reg(s, insn);
8970 break;
8971 case 0x7:
8972 case 0xf: /* Data processing - SIMD and floating point */
8973 disas_data_proc_simd_fp(s, insn);
8974 break;
8975 default:
8976 assert(FALSE); /* all 15 cases should be handled above */
8977 break;
14ade10f 8978 }
11e169de
AG
8979
8980 /* if we allocated any temporaries, free them here */
8981 free_tmp_a64(s);
40f860cd 8982}
14ade10f 8983
40f860cd
PM
8984void gen_intermediate_code_internal_a64(ARMCPU *cpu,
8985 TranslationBlock *tb,
8986 bool search_pc)
8987{
8988 CPUState *cs = CPU(cpu);
8989 CPUARMState *env = &cpu->env;
8990 DisasContext dc1, *dc = &dc1;
8991 CPUBreakpoint *bp;
8992 uint16_t *gen_opc_end;
8993 int j, lj;
8994 target_ulong pc_start;
8995 target_ulong next_page_start;
8996 int num_insns;
8997 int max_insns;
8998
8999 pc_start = tb->pc;
9000
9001 dc->tb = tb;
9002
9003 gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
9004
9005 dc->is_jmp = DISAS_NEXT;
9006 dc->pc = pc_start;
9007 dc->singlestep_enabled = cs->singlestep_enabled;
9008 dc->condjmp = 0;
9009
9010 dc->aarch64 = 1;
9011 dc->thumb = 0;
9012 dc->bswap_code = 0;
9013 dc->condexec_mask = 0;
9014 dc->condexec_cond = 0;
9015#if !defined(CONFIG_USER_ONLY)
9016 dc->user = 0;
9017#endif
9018 dc->vfp_enabled = 0;
9019 dc->vec_len = 0;
9020 dc->vec_stride = 0;
60322b39
PM
9021 dc->cp_regs = cpu->cp_regs;
9022 dc->current_pl = arm_current_pl(env);
40f860cd 9023
11e169de
AG
9024 init_tmp_a64_array(dc);
9025
40f860cd
PM
9026 next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
9027 lj = -1;
9028 num_insns = 0;
9029 max_insns = tb->cflags & CF_COUNT_MASK;
9030 if (max_insns == 0) {
9031 max_insns = CF_COUNT_MASK;
9032 }
9033
9034 gen_tb_start();
9035
9036 tcg_clear_temp_count();
9037
9038 do {
9039 if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
9040 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
9041 if (bp->pc == dc->pc) {
9042 gen_exception_insn(dc, 0, EXCP_DEBUG);
9043 /* Advance PC so that clearing the breakpoint will
9044 invalidate this TB. */
9045 dc->pc += 2;
9046 goto done_generating;
9047 }
9048 }
9049 }
9050
9051 if (search_pc) {
9052 j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
9053 if (lj < j) {
9054 lj++;
9055 while (lj < j) {
9056 tcg_ctx.gen_opc_instr_start[lj++] = 0;
9057 }
9058 }
9059 tcg_ctx.gen_opc_pc[lj] = dc->pc;
9060 tcg_ctx.gen_opc_instr_start[lj] = 1;
9061 tcg_ctx.gen_opc_icount[lj] = num_insns;
9062 }
9063
9064 if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
9065 gen_io_start();
9066 }
9067
9068 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
9069 tcg_gen_debug_insn_start(dc->pc);
9070 }
9071
9072 disas_a64_insn(env, dc);
9073
9074 if (tcg_check_temp_count()) {
9075 fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
9076 dc->pc);
9077 }
9078
9079 /* Translation stops when a conditional branch is encountered.
9080 * Otherwise the subsequent code could get translated several times.
9081 * Also stop translation when a page boundary is reached. This
9082 * ensures prefetch aborts occur at the right place.
9083 */
9084 num_insns++;
9085 } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
9086 !cs->singlestep_enabled &&
9087 !singlestep &&
9088 dc->pc < next_page_start &&
9089 num_insns < max_insns);
9090
9091 if (tb->cflags & CF_LAST_IO) {
9092 gen_io_end();
9093 }
9094
9095 if (unlikely(cs->singlestep_enabled) && dc->is_jmp != DISAS_EXC) {
9096 /* Note that this means single stepping WFI doesn't halt the CPU.
9097 * For conditional branch insns this is harmless unreachable code as
9098 * gen_goto_tb() has already handled emitting the debug exception
9099 * (and thus a tb-jump is not possible when singlestepping).
9100 */
9101 assert(dc->is_jmp != DISAS_TB_JUMP);
9102 if (dc->is_jmp != DISAS_JUMP) {
9103 gen_a64_set_pc_im(dc->pc);
9104 }
9105 gen_exception(EXCP_DEBUG);
9106 } else {
9107 switch (dc->is_jmp) {
9108 case DISAS_NEXT:
9109 gen_goto_tb(dc, 1, dc->pc);
9110 break;
9111 default:
40f860cd 9112 case DISAS_UPDATE:
fea50522
PM
9113 gen_a64_set_pc_im(dc->pc);
9114 /* fall through */
9115 case DISAS_JUMP:
40f860cd
PM
9116 /* indicate that the hash table must be used to find the next TB */
9117 tcg_gen_exit_tb(0);
9118 break;
9119 case DISAS_TB_JUMP:
9120 case DISAS_EXC:
9121 case DISAS_SWI:
9122 break;
9123 case DISAS_WFI:
9124 /* This is a special case because we don't want to just halt the CPU
9125 * if trying to debug across a WFI.
9126 */
9127 gen_helper_wfi(cpu_env);
9128 break;
9129 }
9130 }
9131
9132done_generating:
9133 gen_tb_end(tb, num_insns);
9134 *tcg_ctx.gen_opc_ptr = INDEX_op_end;
9135
9136#ifdef DEBUG_DISAS
9137 if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
9138 qemu_log("----------------\n");
9139 qemu_log("IN: %s\n", lookup_symbol(pc_start));
9140 log_target_disas(env, pc_start, dc->pc - pc_start,
999b53ec 9141 4 | (dc->bswap_code << 1));
40f860cd
PM
9142 qemu_log("\n");
9143 }
9144#endif
9145 if (search_pc) {
9146 j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
9147 lj++;
9148 while (lj <= j) {
9149 tcg_ctx.gen_opc_instr_start[lj++] = 0;
9150 }
9151 } else {
9152 tb->size = dc->pc - pc_start;
9153 tb->icount = num_insns;
14ade10f
AG
9154 }
9155}
This page took 1.180551 seconds and 4 git commands to generate.