]> Git Repo - qemu.git/blame - target-arm/translate-a64.c
target-arm: Stop underdecoding ARM946 PRBS registers
[qemu.git] / target-arm / translate-a64.c
CommitLineData
14ade10f
AG
1/*
2 * AArch64 translation
3 *
4 * Copyright (c) 2013 Alexander Graf <[email protected]>
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19#include <stdarg.h>
20#include <stdlib.h>
21#include <stdio.h>
22#include <string.h>
23#include <inttypes.h>
24
25#include "cpu.h"
26#include "tcg-op.h"
27#include "qemu/log.h"
28#include "translate.h"
29#include "qemu/host-utils.h"
30
40f860cd
PM
31#include "exec/gen-icount.h"
32
14ade10f
AG
33#include "helper.h"
34#define GEN_HELPER 1
35#include "helper.h"
36
37static TCGv_i64 cpu_X[32];
38static TCGv_i64 cpu_pc;
832ffa1c 39static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
14ade10f 40
fa2ef212
MM
41/* Load/store exclusive handling */
42static TCGv_i64 cpu_exclusive_addr;
43static TCGv_i64 cpu_exclusive_val;
44static TCGv_i64 cpu_exclusive_high;
45#ifdef CONFIG_USER_ONLY
46static TCGv_i64 cpu_exclusive_test;
47static TCGv_i32 cpu_exclusive_info;
48#endif
49
14ade10f
AG
50static const char *regnames[] = {
51 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
52 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
53 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
54 "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
55};
56
832ffa1c
AG
57enum a64_shift_type {
58 A64_SHIFT_TYPE_LSL = 0,
59 A64_SHIFT_TYPE_LSR = 1,
60 A64_SHIFT_TYPE_ASR = 2,
61 A64_SHIFT_TYPE_ROR = 3
62};
63
384b26fb
AB
64/* Table based decoder typedefs - used when the relevant bits for decode
65 * are too awkwardly scattered across the instruction (eg SIMD).
66 */
67typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
68
69typedef struct AArch64DecodeTable {
70 uint32_t pattern;
71 uint32_t mask;
72 AArch64DecodeFn *disas_fn;
73} AArch64DecodeTable;
74
1f8a73af
PM
75/* Function prototype for gen_ functions for calling Neon helpers */
76typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
6d9571f7 77typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
d980fd59
PM
78typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
79typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
8908f4d1
AB
80typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
81typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
1f8a73af 82
14ade10f
AG
83/* initialize TCG globals. */
84void a64_translate_init(void)
85{
86 int i;
87
88 cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
89 offsetof(CPUARMState, pc),
90 "pc");
91 for (i = 0; i < 32; i++) {
92 cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
93 offsetof(CPUARMState, xregs[i]),
94 regnames[i]);
95 }
96
832ffa1c
AG
97 cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
98 cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
99 cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
100 cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
fa2ef212
MM
101
102 cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
103 offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
104 cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
105 offsetof(CPUARMState, exclusive_val), "exclusive_val");
106 cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
107 offsetof(CPUARMState, exclusive_high), "exclusive_high");
108#ifdef CONFIG_USER_ONLY
109 cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
110 offsetof(CPUARMState, exclusive_test), "exclusive_test");
111 cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
112 offsetof(CPUARMState, exclusive_info), "exclusive_info");
113#endif
14ade10f
AG
114}
115
116void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
117 fprintf_function cpu_fprintf, int flags)
118{
119 ARMCPU *cpu = ARM_CPU(cs);
120 CPUARMState *env = &cpu->env;
d356312f 121 uint32_t psr = pstate_read(env);
14ade10f
AG
122 int i;
123
124 cpu_fprintf(f, "PC=%016"PRIx64" SP=%016"PRIx64"\n",
125 env->pc, env->xregs[31]);
126 for (i = 0; i < 31; i++) {
127 cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
128 if ((i % 4) == 3) {
129 cpu_fprintf(f, "\n");
130 } else {
131 cpu_fprintf(f, " ");
132 }
133 }
d356312f
PM
134 cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n",
135 psr,
136 psr & PSTATE_N ? 'N' : '-',
137 psr & PSTATE_Z ? 'Z' : '-',
138 psr & PSTATE_C ? 'C' : '-',
139 psr & PSTATE_V ? 'V' : '-');
14ade10f 140 cpu_fprintf(f, "\n");
f6d8a314
AG
141
142 if (flags & CPU_DUMP_FPU) {
143 int numvfpregs = 32;
144 for (i = 0; i < numvfpregs; i += 2) {
145 uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
146 uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
147 cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
148 i, vhi, vlo);
149 vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
150 vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
151 cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
152 i + 1, vhi, vlo);
153 }
154 cpu_fprintf(f, "FPCR: %08x FPSR: %08x\n",
155 vfp_get_fpcr(env), vfp_get_fpsr(env));
156 }
14ade10f
AG
157}
158
4a08d475
PM
159static int get_mem_index(DisasContext *s)
160{
161#ifdef CONFIG_USER_ONLY
162 return 1;
163#else
164 return s->user;
165#endif
166}
167
14ade10f
AG
168void gen_a64_set_pc_im(uint64_t val)
169{
170 tcg_gen_movi_i64(cpu_pc, val);
171}
172
173static void gen_exception(int excp)
174{
175 TCGv_i32 tmp = tcg_temp_new_i32();
176 tcg_gen_movi_i32(tmp, excp);
177 gen_helper_exception(cpu_env, tmp);
178 tcg_temp_free_i32(tmp);
179}
180
181static void gen_exception_insn(DisasContext *s, int offset, int excp)
182{
183 gen_a64_set_pc_im(s->pc - offset);
184 gen_exception(excp);
40f860cd
PM
185 s->is_jmp = DISAS_EXC;
186}
187
188static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
189{
190 /* No direct tb linking with singlestep or deterministic io */
191 if (s->singlestep_enabled || (s->tb->cflags & CF_LAST_IO)) {
192 return false;
193 }
194
195 /* Only link tbs from inside the same guest page */
196 if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
197 return false;
198 }
199
200 return true;
201}
202
203static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
204{
205 TranslationBlock *tb;
206
207 tb = s->tb;
208 if (use_goto_tb(s, n, dest)) {
209 tcg_gen_goto_tb(n);
210 gen_a64_set_pc_im(dest);
211 tcg_gen_exit_tb((tcg_target_long)tb + n);
212 s->is_jmp = DISAS_TB_JUMP;
213 } else {
214 gen_a64_set_pc_im(dest);
215 if (s->singlestep_enabled) {
216 gen_exception(EXCP_DEBUG);
217 }
218 tcg_gen_exit_tb(0);
219 s->is_jmp = DISAS_JUMP;
220 }
14ade10f
AG
221}
222
ad7ee8a2 223static void unallocated_encoding(DisasContext *s)
14ade10f 224{
14ade10f
AG
225 gen_exception_insn(s, 4, EXCP_UDEF);
226}
227
ad7ee8a2
CF
228#define unsupported_encoding(s, insn) \
229 do { \
230 qemu_log_mask(LOG_UNIMP, \
231 "%s:%d: unsupported instruction encoding 0x%08x " \
232 "at pc=%016" PRIx64 "\n", \
233 __FILE__, __LINE__, insn, s->pc - 4); \
234 unallocated_encoding(s); \
235 } while (0);
14ade10f 236
11e169de
AG
237static void init_tmp_a64_array(DisasContext *s)
238{
239#ifdef CONFIG_DEBUG_TCG
240 int i;
241 for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
242 TCGV_UNUSED_I64(s->tmp_a64[i]);
243 }
244#endif
245 s->tmp_a64_count = 0;
246}
247
248static void free_tmp_a64(DisasContext *s)
249{
250 int i;
251 for (i = 0; i < s->tmp_a64_count; i++) {
252 tcg_temp_free_i64(s->tmp_a64[i]);
253 }
254 init_tmp_a64_array(s);
255}
256
257static TCGv_i64 new_tmp_a64(DisasContext *s)
258{
259 assert(s->tmp_a64_count < TMP_A64_MAX);
260 return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
261}
262
263static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
264{
265 TCGv_i64 t = new_tmp_a64(s);
266 tcg_gen_movi_i64(t, 0);
267 return t;
268}
269
71b46089
AG
270/*
271 * Register access functions
272 *
273 * These functions are used for directly accessing a register in where
274 * changes to the final register value are likely to be made. If you
275 * need to use a register for temporary calculation (e.g. index type
276 * operations) use the read_* form.
277 *
278 * B1.2.1 Register mappings
279 *
280 * In instruction register encoding 31 can refer to ZR (zero register) or
281 * the SP (stack pointer) depending on context. In QEMU's case we map SP
282 * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
283 * This is the point of the _sp forms.
284 */
11e169de
AG
285static TCGv_i64 cpu_reg(DisasContext *s, int reg)
286{
287 if (reg == 31) {
288 return new_tmp_a64_zero(s);
289 } else {
290 return cpu_X[reg];
291 }
292}
293
71b46089
AG
294/* register access for when 31 == SP */
295static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
296{
297 return cpu_X[reg];
298}
299
60e53388
AG
300/* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
301 * representing the register contents. This TCGv is an auto-freed
302 * temporary so it need not be explicitly freed, and may be modified.
303 */
304static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
305{
306 TCGv_i64 v = new_tmp_a64(s);
307 if (reg != 31) {
308 if (sf) {
309 tcg_gen_mov_i64(v, cpu_X[reg]);
310 } else {
311 tcg_gen_ext32u_i64(v, cpu_X[reg]);
312 }
313 } else {
314 tcg_gen_movi_i64(v, 0);
315 }
316 return v;
317}
318
4a08d475
PM
319static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
320{
321 TCGv_i64 v = new_tmp_a64(s);
322 if (sf) {
323 tcg_gen_mov_i64(v, cpu_X[reg]);
324 } else {
325 tcg_gen_ext32u_i64(v, cpu_X[reg]);
326 }
327 return v;
328}
329
72430bf5
AB
330/* Return the offset into CPUARMState of an element of specified
331 * size, 'element' places in from the least significant end of
332 * the FP/vector register Qn.
333 */
334static inline int vec_reg_offset(int regno, int element, TCGMemOp size)
335{
336 int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
337#ifdef HOST_WORDS_BIGENDIAN
338 /* This is complicated slightly because vfp.regs[2n] is
339 * still the low half and vfp.regs[2n+1] the high half
340 * of the 128 bit vector, even on big endian systems.
341 * Calculate the offset assuming a fully bigendian 128 bits,
342 * then XOR to account for the order of the two 64 bit halves.
343 */
344 offs += (16 - ((element + 1) * (1 << size)));
345 offs ^= 8;
346#else
347 offs += element * (1 << size);
348#endif
349 return offs;
350}
351
e2f90565
PM
352/* Return the offset into CPUARMState of a slice (from
353 * the least significant end) of FP register Qn (ie
354 * Dn, Sn, Hn or Bn).
355 * (Note that this is not the same mapping as for A32; see cpu.h)
356 */
357static inline int fp_reg_offset(int regno, TCGMemOp size)
358{
359 int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
360#ifdef HOST_WORDS_BIGENDIAN
361 offs += (8 - (1 << size));
362#endif
363 return offs;
364}
365
366/* Offset of the high half of the 128 bit vector Qn */
367static inline int fp_reg_hi_offset(int regno)
368{
369 return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
370}
371
ec73d2e0
AG
372/* Convenience accessors for reading and writing single and double
373 * FP registers. Writing clears the upper parts of the associated
374 * 128 bit vector register, as required by the architecture.
375 * Note that unlike the GP register accessors, the values returned
376 * by the read functions must be manually freed.
377 */
378static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
379{
380 TCGv_i64 v = tcg_temp_new_i64();
381
382 tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
383 return v;
384}
385
386static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
387{
388 TCGv_i32 v = tcg_temp_new_i32();
389
390 tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(reg, MO_32));
391 return v;
392}
393
394static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
395{
396 TCGv_i64 tcg_zero = tcg_const_i64(0);
397
398 tcg_gen_st_i64(v, cpu_env, fp_reg_offset(reg, MO_64));
399 tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(reg));
400 tcg_temp_free_i64(tcg_zero);
401}
402
403static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
404{
405 TCGv_i64 tmp = tcg_temp_new_i64();
406
407 tcg_gen_extu_i32_i64(tmp, v);
408 write_fp_dreg(s, reg, tmp);
409 tcg_temp_free_i64(tmp);
410}
411
412static TCGv_ptr get_fpstatus_ptr(void)
413{
414 TCGv_ptr statusptr = tcg_temp_new_ptr();
415 int offset;
416
417 /* In A64 all instructions (both FP and Neon) use the FPCR;
418 * there is no equivalent of the A32 Neon "standard FPSCR value"
419 * and all operations use vfp.fp_status.
420 */
421 offset = offsetof(CPUARMState, vfp.fp_status);
422 tcg_gen_addi_ptr(statusptr, cpu_env, offset);
423 return statusptr;
424}
425
832ffa1c
AG
426/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
427 * than the 32 bit equivalent.
428 */
429static inline void gen_set_NZ64(TCGv_i64 result)
430{
431 TCGv_i64 flag = tcg_temp_new_i64();
432
433 tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0);
434 tcg_gen_trunc_i64_i32(cpu_ZF, flag);
435 tcg_gen_shri_i64(flag, result, 32);
436 tcg_gen_trunc_i64_i32(cpu_NF, flag);
437 tcg_temp_free_i64(flag);
438}
439
440/* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
441static inline void gen_logic_CC(int sf, TCGv_i64 result)
442{
443 if (sf) {
444 gen_set_NZ64(result);
445 } else {
446 tcg_gen_trunc_i64_i32(cpu_ZF, result);
447 tcg_gen_trunc_i64_i32(cpu_NF, result);
448 }
449 tcg_gen_movi_i32(cpu_CF, 0);
450 tcg_gen_movi_i32(cpu_VF, 0);
451}
452
b0ff21b4
AB
453/* dest = T0 + T1; compute C, N, V and Z flags */
454static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
455{
456 if (sf) {
457 TCGv_i64 result, flag, tmp;
458 result = tcg_temp_new_i64();
459 flag = tcg_temp_new_i64();
460 tmp = tcg_temp_new_i64();
461
462 tcg_gen_movi_i64(tmp, 0);
463 tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
464
465 tcg_gen_trunc_i64_i32(cpu_CF, flag);
466
467 gen_set_NZ64(result);
468
469 tcg_gen_xor_i64(flag, result, t0);
470 tcg_gen_xor_i64(tmp, t0, t1);
471 tcg_gen_andc_i64(flag, flag, tmp);
472 tcg_temp_free_i64(tmp);
473 tcg_gen_shri_i64(flag, flag, 32);
474 tcg_gen_trunc_i64_i32(cpu_VF, flag);
475
476 tcg_gen_mov_i64(dest, result);
477 tcg_temp_free_i64(result);
478 tcg_temp_free_i64(flag);
479 } else {
480 /* 32 bit arithmetic */
481 TCGv_i32 t0_32 = tcg_temp_new_i32();
482 TCGv_i32 t1_32 = tcg_temp_new_i32();
483 TCGv_i32 tmp = tcg_temp_new_i32();
484
485 tcg_gen_movi_i32(tmp, 0);
486 tcg_gen_trunc_i64_i32(t0_32, t0);
487 tcg_gen_trunc_i64_i32(t1_32, t1);
488 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
489 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
490 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
491 tcg_gen_xor_i32(tmp, t0_32, t1_32);
492 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
493 tcg_gen_extu_i32_i64(dest, cpu_NF);
494
495 tcg_temp_free_i32(tmp);
496 tcg_temp_free_i32(t0_32);
497 tcg_temp_free_i32(t1_32);
498 }
499}
500
501/* dest = T0 - T1; compute C, N, V and Z flags */
502static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
503{
504 if (sf) {
505 /* 64 bit arithmetic */
506 TCGv_i64 result, flag, tmp;
507
508 result = tcg_temp_new_i64();
509 flag = tcg_temp_new_i64();
510 tcg_gen_sub_i64(result, t0, t1);
511
512 gen_set_NZ64(result);
513
514 tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
515 tcg_gen_trunc_i64_i32(cpu_CF, flag);
516
517 tcg_gen_xor_i64(flag, result, t0);
518 tmp = tcg_temp_new_i64();
519 tcg_gen_xor_i64(tmp, t0, t1);
520 tcg_gen_and_i64(flag, flag, tmp);
521 tcg_temp_free_i64(tmp);
522 tcg_gen_shri_i64(flag, flag, 32);
523 tcg_gen_trunc_i64_i32(cpu_VF, flag);
524 tcg_gen_mov_i64(dest, result);
525 tcg_temp_free_i64(flag);
526 tcg_temp_free_i64(result);
527 } else {
528 /* 32 bit arithmetic */
529 TCGv_i32 t0_32 = tcg_temp_new_i32();
530 TCGv_i32 t1_32 = tcg_temp_new_i32();
531 TCGv_i32 tmp;
532
533 tcg_gen_trunc_i64_i32(t0_32, t0);
534 tcg_gen_trunc_i64_i32(t1_32, t1);
535 tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
536 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
537 tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
538 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
539 tmp = tcg_temp_new_i32();
540 tcg_gen_xor_i32(tmp, t0_32, t1_32);
541 tcg_temp_free_i32(t0_32);
542 tcg_temp_free_i32(t1_32);
543 tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
544 tcg_temp_free_i32(tmp);
545 tcg_gen_extu_i32_i64(dest, cpu_NF);
546 }
547}
548
643dbb07
CF
549/* dest = T0 + T1 + CF; do not compute flags. */
550static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
551{
552 TCGv_i64 flag = tcg_temp_new_i64();
553 tcg_gen_extu_i32_i64(flag, cpu_CF);
554 tcg_gen_add_i64(dest, t0, t1);
555 tcg_gen_add_i64(dest, dest, flag);
556 tcg_temp_free_i64(flag);
557
558 if (!sf) {
559 tcg_gen_ext32u_i64(dest, dest);
560 }
561}
562
563/* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
564static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
565{
566 if (sf) {
567 TCGv_i64 result, cf_64, vf_64, tmp;
568 result = tcg_temp_new_i64();
569 cf_64 = tcg_temp_new_i64();
570 vf_64 = tcg_temp_new_i64();
571 tmp = tcg_const_i64(0);
572
573 tcg_gen_extu_i32_i64(cf_64, cpu_CF);
574 tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
575 tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
576 tcg_gen_trunc_i64_i32(cpu_CF, cf_64);
577 gen_set_NZ64(result);
578
579 tcg_gen_xor_i64(vf_64, result, t0);
580 tcg_gen_xor_i64(tmp, t0, t1);
581 tcg_gen_andc_i64(vf_64, vf_64, tmp);
582 tcg_gen_shri_i64(vf_64, vf_64, 32);
583 tcg_gen_trunc_i64_i32(cpu_VF, vf_64);
584
585 tcg_gen_mov_i64(dest, result);
586
587 tcg_temp_free_i64(tmp);
588 tcg_temp_free_i64(vf_64);
589 tcg_temp_free_i64(cf_64);
590 tcg_temp_free_i64(result);
591 } else {
592 TCGv_i32 t0_32, t1_32, tmp;
593 t0_32 = tcg_temp_new_i32();
594 t1_32 = tcg_temp_new_i32();
595 tmp = tcg_const_i32(0);
596
597 tcg_gen_trunc_i64_i32(t0_32, t0);
598 tcg_gen_trunc_i64_i32(t1_32, t1);
599 tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
600 tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
601
602 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
603 tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
604 tcg_gen_xor_i32(tmp, t0_32, t1_32);
605 tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
606 tcg_gen_extu_i32_i64(dest, cpu_NF);
607
608 tcg_temp_free_i32(tmp);
609 tcg_temp_free_i32(t1_32);
610 tcg_temp_free_i32(t0_32);
611 }
612}
613
4a08d475
PM
614/*
615 * Load/Store generators
616 */
617
618/*
619 * Store from GPR register to memory
620 */
621static void do_gpr_st(DisasContext *s, TCGv_i64 source,
622 TCGv_i64 tcg_addr, int size)
623{
624 g_assert(size <= 3);
625 tcg_gen_qemu_st_i64(source, tcg_addr, get_mem_index(s), MO_TE + size);
626}
627
628/*
629 * Load from memory to GPR register
630 */
631static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
632 int size, bool is_signed, bool extend)
633{
634 TCGMemOp memop = MO_TE + size;
635
636 g_assert(size <= 3);
637
638 if (is_signed) {
639 memop += MO_SIGN;
640 }
641
642 tcg_gen_qemu_ld_i64(dest, tcg_addr, get_mem_index(s), memop);
643
644 if (extend && is_signed) {
645 g_assert(size < 3);
646 tcg_gen_ext32u_i64(dest, dest);
647 }
648}
649
650/*
651 * Store from FP register to memory
652 */
653static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
654{
655 /* This writes the bottom N bits of a 128 bit wide vector to memory */
4a08d475 656 TCGv_i64 tmp = tcg_temp_new_i64();
e2f90565 657 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(srcidx, MO_64));
4a08d475 658 if (size < 4) {
4a08d475
PM
659 tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
660 } else {
661 TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
4a08d475
PM
662 tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
663 tcg_gen_qemu_st64(tmp, tcg_addr, get_mem_index(s));
e2f90565 664 tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(srcidx));
4a08d475
PM
665 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
666 tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
667 tcg_temp_free_i64(tcg_hiaddr);
668 }
669
670 tcg_temp_free_i64(tmp);
671}
672
673/*
674 * Load from memory to FP register
675 */
676static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
677{
678 /* This always zero-extends and writes to a full 128 bit wide vector */
4a08d475
PM
679 TCGv_i64 tmplo = tcg_temp_new_i64();
680 TCGv_i64 tmphi;
681
682 if (size < 4) {
683 TCGMemOp memop = MO_TE + size;
684 tmphi = tcg_const_i64(0);
685 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
686 } else {
687 TCGv_i64 tcg_hiaddr;
688 tmphi = tcg_temp_new_i64();
689 tcg_hiaddr = tcg_temp_new_i64();
690
691 tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
692 tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
693 tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
694 tcg_temp_free_i64(tcg_hiaddr);
695 }
696
e2f90565
PM
697 tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(destidx, MO_64));
698 tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(destidx));
4a08d475
PM
699
700 tcg_temp_free_i64(tmplo);
701 tcg_temp_free_i64(tmphi);
702}
703
72430bf5
AB
704/*
705 * Vector load/store helpers.
706 *
707 * The principal difference between this and a FP load is that we don't
708 * zero extend as we are filling a partial chunk of the vector register.
709 * These functions don't support 128 bit loads/stores, which would be
710 * normal load/store operations.
a08582f4
PM
711 *
712 * The _i32 versions are useful when operating on 32 bit quantities
713 * (eg for floating point single or using Neon helper functions).
72430bf5
AB
714 */
715
716/* Get value of an element within a vector register */
717static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
718 int element, TCGMemOp memop)
719{
720 int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
721 switch (memop) {
722 case MO_8:
723 tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
724 break;
725 case MO_16:
726 tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
727 break;
728 case MO_32:
729 tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
730 break;
731 case MO_8|MO_SIGN:
732 tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
733 break;
734 case MO_16|MO_SIGN:
735 tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
736 break;
737 case MO_32|MO_SIGN:
738 tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
739 break;
740 case MO_64:
741 case MO_64|MO_SIGN:
742 tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
743 break;
744 default:
745 g_assert_not_reached();
746 }
747}
748
a08582f4
PM
749static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
750 int element, TCGMemOp memop)
751{
752 int vect_off = vec_reg_offset(srcidx, element, memop & MO_SIZE);
753 switch (memop) {
754 case MO_8:
755 tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
756 break;
757 case MO_16:
758 tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
759 break;
760 case MO_8|MO_SIGN:
761 tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
762 break;
763 case MO_16|MO_SIGN:
764 tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
765 break;
766 case MO_32:
767 case MO_32|MO_SIGN:
768 tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
769 break;
770 default:
771 g_assert_not_reached();
772 }
773}
774
72430bf5
AB
775/* Set value of an element within a vector register */
776static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
777 int element, TCGMemOp memop)
778{
779 int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
780 switch (memop) {
781 case MO_8:
782 tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
783 break;
784 case MO_16:
785 tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
786 break;
787 case MO_32:
788 tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
789 break;
790 case MO_64:
791 tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
792 break;
793 default:
794 g_assert_not_reached();
795 }
796}
797
1f8a73af
PM
798static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
799 int destidx, int element, TCGMemOp memop)
800{
801 int vect_off = vec_reg_offset(destidx, element, memop & MO_SIZE);
802 switch (memop) {
803 case MO_8:
804 tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
805 break;
806 case MO_16:
807 tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
808 break;
809 case MO_32:
810 tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
811 break;
812 default:
813 g_assert_not_reached();
814 }
815}
816
72430bf5
AB
817/* Clear the high 64 bits of a 128 bit vector (in general non-quad
818 * vector ops all need to do this).
819 */
820static void clear_vec_high(DisasContext *s, int rd)
821{
822 TCGv_i64 tcg_zero = tcg_const_i64(0);
823
824 write_vec_element(s, tcg_zero, rd, 1, MO_64);
825 tcg_temp_free_i64(tcg_zero);
826}
827
828/* Store from vector register to memory */
829static void do_vec_st(DisasContext *s, int srcidx, int element,
830 TCGv_i64 tcg_addr, int size)
831{
832 TCGMemOp memop = MO_TE + size;
833 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
834
835 read_vec_element(s, tcg_tmp, srcidx, element, size);
836 tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
837
838 tcg_temp_free_i64(tcg_tmp);
839}
840
841/* Load from memory to vector register */
842static void do_vec_ld(DisasContext *s, int destidx, int element,
843 TCGv_i64 tcg_addr, int size)
844{
845 TCGMemOp memop = MO_TE + size;
846 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
847
848 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
849 write_vec_element(s, tcg_tmp, destidx, element, size);
850
851 tcg_temp_free_i64(tcg_tmp);
852}
853
229b7a05
AB
854/*
855 * This utility function is for doing register extension with an
856 * optional shift. You will likely want to pass a temporary for the
857 * destination register. See DecodeRegExtend() in the ARM ARM.
858 */
859static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
860 int option, unsigned int shift)
861{
862 int extsize = extract32(option, 0, 2);
863 bool is_signed = extract32(option, 2, 1);
864
865 if (is_signed) {
866 switch (extsize) {
867 case 0:
868 tcg_gen_ext8s_i64(tcg_out, tcg_in);
869 break;
870 case 1:
871 tcg_gen_ext16s_i64(tcg_out, tcg_in);
872 break;
873 case 2:
874 tcg_gen_ext32s_i64(tcg_out, tcg_in);
875 break;
876 case 3:
877 tcg_gen_mov_i64(tcg_out, tcg_in);
878 break;
879 }
880 } else {
881 switch (extsize) {
882 case 0:
883 tcg_gen_ext8u_i64(tcg_out, tcg_in);
884 break;
885 case 1:
886 tcg_gen_ext16u_i64(tcg_out, tcg_in);
887 break;
888 case 2:
889 tcg_gen_ext32u_i64(tcg_out, tcg_in);
890 break;
891 case 3:
892 tcg_gen_mov_i64(tcg_out, tcg_in);
893 break;
894 }
895 }
896
897 if (shift) {
898 tcg_gen_shli_i64(tcg_out, tcg_out, shift);
899 }
900}
901
4a08d475
PM
902static inline void gen_check_sp_alignment(DisasContext *s)
903{
904 /* The AArch64 architecture mandates that (if enabled via PSTATE
905 * or SCTLR bits) there is a check that SP is 16-aligned on every
906 * SP-relative load or store (with an exception generated if it is not).
907 * In line with general QEMU practice regarding misaligned accesses,
908 * we omit these checks for the sake of guest program performance.
909 * This function is provided as a hook so we can more easily add these
910 * checks in future (possibly as a "favour catching guest program bugs
911 * over speed" user selectable option).
912 */
913}
914
384b26fb
AB
915/*
916 * This provides a simple table based table lookup decoder. It is
917 * intended to be used when the relevant bits for decode are too
918 * awkwardly placed and switch/if based logic would be confusing and
919 * deeply nested. Since it's a linear search through the table, tables
920 * should be kept small.
921 *
922 * It returns the first handler where insn & mask == pattern, or
923 * NULL if there is no match.
924 * The table is terminated by an empty mask (i.e. 0)
925 */
926static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
927 uint32_t insn)
928{
929 const AArch64DecodeTable *tptr = table;
930
931 while (tptr->mask) {
932 if ((insn & tptr->mask) == tptr->pattern) {
933 return tptr->disas_fn;
934 }
935 tptr++;
936 }
937 return NULL;
938}
939
ad7ee8a2
CF
940/*
941 * the instruction disassembly implemented here matches
942 * the instruction encoding classifications in chapter 3 (C3)
943 * of the ARM Architecture Reference Manual (DDI0487A_a)
944 */
945
11e169de
AG
946/* C3.2.7 Unconditional branch (immediate)
947 * 31 30 26 25 0
948 * +----+-----------+-------------------------------------+
949 * | op | 0 0 1 0 1 | imm26 |
950 * +----+-----------+-------------------------------------+
951 */
ad7ee8a2
CF
952static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
953{
11e169de
AG
954 uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
955
956 if (insn & (1 << 31)) {
957 /* C5.6.26 BL Branch with link */
958 tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
959 }
960
961 /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
962 gen_goto_tb(s, 0, addr);
ad7ee8a2
CF
963}
964
60e53388
AG
965/* C3.2.1 Compare & branch (immediate)
966 * 31 30 25 24 23 5 4 0
967 * +----+-------------+----+---------------------+--------+
968 * | sf | 0 1 1 0 1 0 | op | imm19 | Rt |
969 * +----+-------------+----+---------------------+--------+
970 */
ad7ee8a2
CF
971static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
972{
60e53388
AG
973 unsigned int sf, op, rt;
974 uint64_t addr;
975 int label_match;
976 TCGv_i64 tcg_cmp;
977
978 sf = extract32(insn, 31, 1);
979 op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
980 rt = extract32(insn, 0, 5);
981 addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
982
983 tcg_cmp = read_cpu_reg(s, rt, sf);
984 label_match = gen_new_label();
985
986 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
987 tcg_cmp, 0, label_match);
988
989 gen_goto_tb(s, 0, s->pc);
990 gen_set_label(label_match);
991 gen_goto_tb(s, 1, addr);
ad7ee8a2
CF
992}
993
db0f7958
AG
994/* C3.2.5 Test & branch (immediate)
995 * 31 30 25 24 23 19 18 5 4 0
996 * +----+-------------+----+-------+-------------+------+
997 * | b5 | 0 1 1 0 1 1 | op | b40 | imm14 | Rt |
998 * +----+-------------+----+-------+-------------+------+
999 */
ad7ee8a2
CF
1000static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1001{
db0f7958
AG
1002 unsigned int bit_pos, op, rt;
1003 uint64_t addr;
1004 int label_match;
1005 TCGv_i64 tcg_cmp;
1006
1007 bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1008 op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1009 addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1010 rt = extract32(insn, 0, 5);
1011
1012 tcg_cmp = tcg_temp_new_i64();
1013 tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1014 label_match = gen_new_label();
1015 tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1016 tcg_cmp, 0, label_match);
1017 tcg_temp_free_i64(tcg_cmp);
1018 gen_goto_tb(s, 0, s->pc);
1019 gen_set_label(label_match);
1020 gen_goto_tb(s, 1, addr);
ad7ee8a2
CF
1021}
1022
39fb730a
AG
1023/* C3.2.2 / C5.6.19 Conditional branch (immediate)
1024 * 31 25 24 23 5 4 3 0
1025 * +---------------+----+---------------------+----+------+
1026 * | 0 1 0 1 0 1 0 | o1 | imm19 | o0 | cond |
1027 * +---------------+----+---------------------+----+------+
1028 */
ad7ee8a2
CF
1029static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1030{
39fb730a
AG
1031 unsigned int cond;
1032 uint64_t addr;
1033
1034 if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1035 unallocated_encoding(s);
1036 return;
1037 }
1038 addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1039 cond = extract32(insn, 0, 4);
1040
1041 if (cond < 0x0e) {
1042 /* genuinely conditional branches */
1043 int label_match = gen_new_label();
1044 arm_gen_test_cc(cond, label_match);
1045 gen_goto_tb(s, 0, s->pc);
1046 gen_set_label(label_match);
1047 gen_goto_tb(s, 1, addr);
1048 } else {
1049 /* 0xe and 0xf are both "always" conditions */
1050 gen_goto_tb(s, 0, addr);
1051 }
ad7ee8a2
CF
1052}
1053
87462e0f
CF
1054/* C5.6.68 HINT */
1055static void handle_hint(DisasContext *s, uint32_t insn,
1056 unsigned int op1, unsigned int op2, unsigned int crm)
1057{
1058 unsigned int selector = crm << 3 | op2;
1059
1060 if (op1 != 3) {
1061 unallocated_encoding(s);
1062 return;
1063 }
1064
1065 switch (selector) {
1066 case 0: /* NOP */
1067 return;
1068 case 1: /* YIELD */
1069 case 2: /* WFE */
1070 case 3: /* WFI */
1071 case 4: /* SEV */
1072 case 5: /* SEVL */
1073 /* we treat all as NOP at least for now */
1074 return;
1075 default:
1076 /* default specified as NOP equivalent */
1077 return;
1078 }
1079}
1080
fa2ef212
MM
1081static void gen_clrex(DisasContext *s, uint32_t insn)
1082{
1083 tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1084}
1085
87462e0f
CF
1086/* CLREX, DSB, DMB, ISB */
1087static void handle_sync(DisasContext *s, uint32_t insn,
1088 unsigned int op1, unsigned int op2, unsigned int crm)
1089{
1090 if (op1 != 3) {
1091 unallocated_encoding(s);
1092 return;
1093 }
1094
1095 switch (op2) {
1096 case 2: /* CLREX */
fa2ef212 1097 gen_clrex(s, insn);
87462e0f
CF
1098 return;
1099 case 4: /* DSB */
1100 case 5: /* DMB */
1101 case 6: /* ISB */
1102 /* We don't emulate caches so barriers are no-ops */
1103 return;
1104 default:
1105 unallocated_encoding(s);
1106 return;
1107 }
1108}
1109
1110/* C5.6.130 MSR (immediate) - move immediate to processor state field */
1111static void handle_msr_i(DisasContext *s, uint32_t insn,
1112 unsigned int op1, unsigned int op2, unsigned int crm)
1113{
1114 unsupported_encoding(s, insn);
1115}
1116
b0d2b7d0
PM
1117static void gen_get_nzcv(TCGv_i64 tcg_rt)
1118{
1119 TCGv_i32 tmp = tcg_temp_new_i32();
1120 TCGv_i32 nzcv = tcg_temp_new_i32();
1121
1122 /* build bit 31, N */
1123 tcg_gen_andi_i32(nzcv, cpu_NF, (1 << 31));
1124 /* build bit 30, Z */
1125 tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1126 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1127 /* build bit 29, C */
1128 tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1129 /* build bit 28, V */
1130 tcg_gen_shri_i32(tmp, cpu_VF, 31);
1131 tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1132 /* generate result */
1133 tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1134
1135 tcg_temp_free_i32(nzcv);
1136 tcg_temp_free_i32(tmp);
1137}
1138
1139static void gen_set_nzcv(TCGv_i64 tcg_rt)
1140
1141{
1142 TCGv_i32 nzcv = tcg_temp_new_i32();
1143
1144 /* take NZCV from R[t] */
1145 tcg_gen_trunc_i64_i32(nzcv, tcg_rt);
1146
1147 /* bit 31, N */
1148 tcg_gen_andi_i32(cpu_NF, nzcv, (1 << 31));
1149 /* bit 30, Z */
1150 tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1151 tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1152 /* bit 29, C */
1153 tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1154 tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1155 /* bit 28, V */
1156 tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1157 tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1158 tcg_temp_free_i32(nzcv);
1159}
1160
fea50522
PM
1161/* C5.6.129 MRS - move from system register
1162 * C5.6.131 MSR (register) - move to system register
1163 * C5.6.204 SYS
1164 * C5.6.205 SYSL
1165 * These are all essentially the same insn in 'read' and 'write'
1166 * versions, with varying op0 fields.
1167 */
1168static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1169 unsigned int op0, unsigned int op1, unsigned int op2,
87462e0f
CF
1170 unsigned int crn, unsigned int crm, unsigned int rt)
1171{
fea50522
PM
1172 const ARMCPRegInfo *ri;
1173 TCGv_i64 tcg_rt;
87462e0f 1174
fea50522
PM
1175 ri = get_arm_cp_reginfo(s->cp_regs,
1176 ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1177 crn, crm, op0, op1, op2));
87462e0f 1178
fea50522 1179 if (!ri) {
626187d8
PM
1180 /* Unknown register; this might be a guest error or a QEMU
1181 * unimplemented feature.
1182 */
1183 qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1184 "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1185 isread ? "read" : "write", op0, op1, crn, crm, op2);
fea50522
PM
1186 unallocated_encoding(s);
1187 return;
1188 }
1189
1190 /* Check access permissions */
1191 if (!cp_access_ok(s->current_pl, ri, isread)) {
1192 unallocated_encoding(s);
1193 return;
1194 }
1195
1196 /* Handle special cases first */
1197 switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1198 case ARM_CP_NOP:
1199 return;
b0d2b7d0
PM
1200 case ARM_CP_NZCV:
1201 tcg_rt = cpu_reg(s, rt);
1202 if (isread) {
1203 gen_get_nzcv(tcg_rt);
1204 } else {
1205 gen_set_nzcv(tcg_rt);
1206 }
1207 return;
fea50522
PM
1208 default:
1209 break;
1210 }
1211
1212 if (use_icount && (ri->type & ARM_CP_IO)) {
1213 gen_io_start();
1214 }
1215
1216 tcg_rt = cpu_reg(s, rt);
1217
1218 if (isread) {
1219 if (ri->type & ARM_CP_CONST) {
1220 tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1221 } else if (ri->readfn) {
1222 TCGv_ptr tmpptr;
1223 gen_a64_set_pc_im(s->pc - 4);
1224 tmpptr = tcg_const_ptr(ri);
1225 gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1226 tcg_temp_free_ptr(tmpptr);
1227 } else {
1228 tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1229 }
1230 } else {
1231 if (ri->type & ARM_CP_CONST) {
1232 /* If not forbidden by access permissions, treat as WI */
1233 return;
1234 } else if (ri->writefn) {
1235 TCGv_ptr tmpptr;
1236 gen_a64_set_pc_im(s->pc - 4);
1237 tmpptr = tcg_const_ptr(ri);
1238 gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1239 tcg_temp_free_ptr(tmpptr);
1240 } else {
1241 tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1242 }
1243 }
1244
1245 if (use_icount && (ri->type & ARM_CP_IO)) {
1246 /* I/O operations must end the TB here (whether read or write) */
1247 gen_io_end();
1248 s->is_jmp = DISAS_UPDATE;
1249 } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1250 /* We default to ending the TB on a coprocessor register write,
1251 * but allow this to be suppressed by the register definition
1252 * (usually only necessary to work around guest bugs).
1253 */
1254 s->is_jmp = DISAS_UPDATE;
1255 }
ad7ee8a2
CF
1256}
1257
87462e0f
CF
1258/* C3.2.4 System
1259 * 31 22 21 20 19 18 16 15 12 11 8 7 5 4 0
1260 * +---------------------+---+-----+-----+-------+-------+-----+------+
1261 * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 | CRn | CRm | op2 | Rt |
1262 * +---------------------+---+-----+-----+-------+-------+-----+------+
1263 */
1264static void disas_system(DisasContext *s, uint32_t insn)
1265{
1266 unsigned int l, op0, op1, crn, crm, op2, rt;
1267 l = extract32(insn, 21, 1);
1268 op0 = extract32(insn, 19, 2);
1269 op1 = extract32(insn, 16, 3);
1270 crn = extract32(insn, 12, 4);
1271 crm = extract32(insn, 8, 4);
1272 op2 = extract32(insn, 5, 3);
1273 rt = extract32(insn, 0, 5);
1274
1275 if (op0 == 0) {
1276 if (l || rt != 31) {
1277 unallocated_encoding(s);
1278 return;
1279 }
1280 switch (crn) {
1281 case 2: /* C5.6.68 HINT */
1282 handle_hint(s, insn, op1, op2, crm);
1283 break;
1284 case 3: /* CLREX, DSB, DMB, ISB */
1285 handle_sync(s, insn, op1, op2, crm);
1286 break;
1287 case 4: /* C5.6.130 MSR (immediate) */
1288 handle_msr_i(s, insn, op1, op2, crm);
1289 break;
1290 default:
1291 unallocated_encoding(s);
1292 break;
1293 }
1294 return;
1295 }
fea50522 1296 handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
87462e0f
CF
1297}
1298
9618e809
AG
1299/* C3.2.3 Exception generation
1300 *
1301 * 31 24 23 21 20 5 4 2 1 0
1302 * +-----------------+-----+------------------------+-----+----+
1303 * | 1 1 0 1 0 1 0 0 | opc | imm16 | op2 | LL |
1304 * +-----------------------+------------------------+----------+
1305 */
ad7ee8a2
CF
1306static void disas_exc(DisasContext *s, uint32_t insn)
1307{
9618e809
AG
1308 int opc = extract32(insn, 21, 3);
1309 int op2_ll = extract32(insn, 0, 5);
1310
1311 switch (opc) {
1312 case 0:
1313 /* SVC, HVC, SMC; since we don't support the Virtualization
1314 * or TrustZone extensions these all UNDEF except SVC.
1315 */
1316 if (op2_ll != 1) {
1317 unallocated_encoding(s);
1318 break;
1319 }
1320 gen_exception_insn(s, 0, EXCP_SWI);
1321 break;
1322 case 1:
1323 if (op2_ll != 0) {
1324 unallocated_encoding(s);
1325 break;
1326 }
1327 /* BRK */
1328 gen_exception_insn(s, 0, EXCP_BKPT);
1329 break;
1330 case 2:
1331 if (op2_ll != 0) {
1332 unallocated_encoding(s);
1333 break;
1334 }
1335 /* HLT */
1336 unsupported_encoding(s, insn);
1337 break;
1338 case 5:
1339 if (op2_ll < 1 || op2_ll > 3) {
1340 unallocated_encoding(s);
1341 break;
1342 }
1343 /* DCPS1, DCPS2, DCPS3 */
1344 unsupported_encoding(s, insn);
1345 break;
1346 default:
1347 unallocated_encoding(s);
1348 break;
1349 }
ad7ee8a2
CF
1350}
1351
b001c8c3
AG
1352/* C3.2.7 Unconditional branch (register)
1353 * 31 25 24 21 20 16 15 10 9 5 4 0
1354 * +---------------+-------+-------+-------+------+-------+
1355 * | 1 1 0 1 0 1 1 | opc | op2 | op3 | Rn | op4 |
1356 * +---------------+-------+-------+-------+------+-------+
1357 */
ad7ee8a2
CF
1358static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1359{
b001c8c3
AG
1360 unsigned int opc, op2, op3, rn, op4;
1361
1362 opc = extract32(insn, 21, 4);
1363 op2 = extract32(insn, 16, 5);
1364 op3 = extract32(insn, 10, 6);
1365 rn = extract32(insn, 5, 5);
1366 op4 = extract32(insn, 0, 5);
1367
1368 if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1369 unallocated_encoding(s);
1370 return;
1371 }
1372
1373 switch (opc) {
1374 case 0: /* BR */
1375 case 2: /* RET */
1376 break;
1377 case 1: /* BLR */
1378 tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1379 break;
1380 case 4: /* ERET */
1381 case 5: /* DRPS */
1382 if (rn != 0x1f) {
1383 unallocated_encoding(s);
1384 } else {
1385 unsupported_encoding(s, insn);
1386 }
1387 return;
1388 default:
1389 unallocated_encoding(s);
1390 return;
1391 }
1392
1393 tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1394 s->is_jmp = DISAS_JUMP;
ad7ee8a2
CF
1395}
1396
1397/* C3.2 Branches, exception generating and system instructions */
1398static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1399{
1400 switch (extract32(insn, 25, 7)) {
1401 case 0x0a: case 0x0b:
1402 case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1403 disas_uncond_b_imm(s, insn);
1404 break;
1405 case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1406 disas_comp_b_imm(s, insn);
1407 break;
1408 case 0x1b: case 0x5b: /* Test & branch (immediate) */
1409 disas_test_b_imm(s, insn);
1410 break;
1411 case 0x2a: /* Conditional branch (immediate) */
1412 disas_cond_b_imm(s, insn);
1413 break;
1414 case 0x6a: /* Exception generation / System */
1415 if (insn & (1 << 24)) {
1416 disas_system(s, insn);
1417 } else {
1418 disas_exc(s, insn);
1419 }
1420 break;
1421 case 0x6b: /* Unconditional branch (register) */
1422 disas_uncond_b_reg(s, insn);
1423 break;
1424 default:
1425 unallocated_encoding(s);
1426 break;
1427 }
1428}
1429
fa2ef212
MM
1430/*
1431 * Load/Store exclusive instructions are implemented by remembering
1432 * the value/address loaded, and seeing if these are the same
1433 * when the store is performed. This is not actually the architecturally
1434 * mandated semantics, but it works for typical guest code sequences
1435 * and avoids having to monitor regular stores.
1436 *
1437 * In system emulation mode only one CPU will be running at once, so
1438 * this sequence is effectively atomic. In user emulation mode we
1439 * throw an exception and handle the atomic operation elsewhere.
1440 */
1441static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1442 TCGv_i64 addr, int size, bool is_pair)
1443{
1444 TCGv_i64 tmp = tcg_temp_new_i64();
1445 TCGMemOp memop = MO_TE + size;
1446
1447 g_assert(size <= 3);
1448 tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1449
1450 if (is_pair) {
1451 TCGv_i64 addr2 = tcg_temp_new_i64();
1452 TCGv_i64 hitmp = tcg_temp_new_i64();
1453
1454 g_assert(size >= 2);
1455 tcg_gen_addi_i64(addr2, addr, 1 << size);
1456 tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1457 tcg_temp_free_i64(addr2);
1458 tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1459 tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1460 tcg_temp_free_i64(hitmp);
1461 }
1462
1463 tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1464 tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1465
1466 tcg_temp_free_i64(tmp);
1467 tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1468}
1469
1470#ifdef CONFIG_USER_ONLY
1471static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1472 TCGv_i64 addr, int size, int is_pair)
1473{
1474 tcg_gen_mov_i64(cpu_exclusive_test, addr);
1475 tcg_gen_movi_i32(cpu_exclusive_info,
1476 size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1477 gen_exception_insn(s, 4, EXCP_STREX);
1478}
1479#else
1480static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1481 TCGv_i64 addr, int size, int is_pair)
1482{
1483 qemu_log_mask(LOG_UNIMP,
1484 "%s:%d: system mode store_exclusive unsupported "
1485 "at pc=%016" PRIx64 "\n",
1486 __FILE__, __LINE__, s->pc - 4);
1487}
1488#endif
1489
1490/* C3.3.6 Load/store exclusive
1491 *
1492 * 31 30 29 24 23 22 21 20 16 15 14 10 9 5 4 0
1493 * +-----+-------------+----+---+----+------+----+-------+------+------+
1494 * | sz | 0 0 1 0 0 0 | o2 | L | o1 | Rs | o0 | Rt2 | Rn | Rt |
1495 * +-----+-------------+----+---+----+------+----+-------+------+------+
1496 *
1497 * sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1498 * L: 0 -> store, 1 -> load
1499 * o2: 0 -> exclusive, 1 -> not
1500 * o1: 0 -> single register, 1 -> register pair
1501 * o0: 1 -> load-acquire/store-release, 0 -> not
1502 *
1503 * o0 == 0 AND o2 == 1 is un-allocated
1504 * o1 == 1 is un-allocated except for 32 and 64 bit sizes
1505 */
ad7ee8a2
CF
1506static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1507{
fa2ef212
MM
1508 int rt = extract32(insn, 0, 5);
1509 int rn = extract32(insn, 5, 5);
1510 int rt2 = extract32(insn, 10, 5);
1511 int is_lasr = extract32(insn, 15, 1);
1512 int rs = extract32(insn, 16, 5);
1513 int is_pair = extract32(insn, 21, 1);
1514 int is_store = !extract32(insn, 22, 1);
1515 int is_excl = !extract32(insn, 23, 1);
1516 int size = extract32(insn, 30, 2);
1517 TCGv_i64 tcg_addr;
1518
1519 if ((!is_excl && !is_lasr) ||
1520 (is_pair && size < 2)) {
1521 unallocated_encoding(s);
1522 return;
1523 }
1524
1525 if (rn == 31) {
1526 gen_check_sp_alignment(s);
1527 }
1528 tcg_addr = read_cpu_reg_sp(s, rn, 1);
1529
1530 /* Note that since TCG is single threaded load-acquire/store-release
1531 * semantics require no extra if (is_lasr) { ... } handling.
1532 */
1533
1534 if (is_excl) {
1535 if (!is_store) {
1536 gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1537 } else {
1538 gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1539 }
1540 } else {
1541 TCGv_i64 tcg_rt = cpu_reg(s, rt);
1542 if (is_store) {
1543 do_gpr_st(s, tcg_rt, tcg_addr, size);
1544 } else {
1545 do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1546 }
1547 if (is_pair) {
1548 TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
1549 tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1550 if (is_store) {
1551 do_gpr_st(s, tcg_rt2, tcg_addr, size);
1552 } else {
1553 do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
1554 }
1555 }
1556 }
ad7ee8a2
CF
1557}
1558
32b64e86
AG
1559/*
1560 * C3.3.5 Load register (literal)
1561 *
1562 * 31 30 29 27 26 25 24 23 5 4 0
1563 * +-----+-------+---+-----+-------------------+-------+
1564 * | opc | 0 1 1 | V | 0 0 | imm19 | Rt |
1565 * +-----+-------+---+-----+-------------------+-------+
1566 *
1567 * V: 1 -> vector (simd/fp)
1568 * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1569 * 10-> 32 bit signed, 11 -> prefetch
1570 * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1571 */
ad7ee8a2
CF
1572static void disas_ld_lit(DisasContext *s, uint32_t insn)
1573{
32b64e86
AG
1574 int rt = extract32(insn, 0, 5);
1575 int64_t imm = sextract32(insn, 5, 19) << 2;
1576 bool is_vector = extract32(insn, 26, 1);
1577 int opc = extract32(insn, 30, 2);
1578 bool is_signed = false;
1579 int size = 2;
1580 TCGv_i64 tcg_rt, tcg_addr;
1581
1582 if (is_vector) {
1583 if (opc == 3) {
1584 unallocated_encoding(s);
1585 return;
1586 }
1587 size = 2 + opc;
1588 } else {
1589 if (opc == 3) {
1590 /* PRFM (literal) : prefetch */
1591 return;
1592 }
1593 size = 2 + extract32(opc, 0, 1);
1594 is_signed = extract32(opc, 1, 1);
1595 }
1596
1597 tcg_rt = cpu_reg(s, rt);
1598
1599 tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1600 if (is_vector) {
1601 do_fp_ld(s, rt, tcg_addr, size);
1602 } else {
1603 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1604 }
1605 tcg_temp_free_i64(tcg_addr);
ad7ee8a2
CF
1606}
1607
4a08d475
PM
1608/*
1609 * C5.6.80 LDNP (Load Pair - non-temporal hint)
1610 * C5.6.81 LDP (Load Pair - non vector)
1611 * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1612 * C5.6.176 STNP (Store Pair - non-temporal hint)
1613 * C5.6.177 STP (Store Pair - non vector)
1614 * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1615 * C6.3.165 LDP (Load Pair of SIMD&FP)
1616 * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1617 * C6.3.284 STP (Store Pair of SIMD&FP)
1618 *
1619 * 31 30 29 27 26 25 24 23 22 21 15 14 10 9 5 4 0
1620 * +-----+-------+---+---+-------+---+-----------------------------+
1621 * | opc | 1 0 1 | V | 0 | index | L | imm7 | Rt2 | Rn | Rt |
1622 * +-----+-------+---+---+-------+---+-------+-------+------+------+
1623 *
1624 * opc: LDP/STP/LDNP/STNP 00 -> 32 bit, 10 -> 64 bit
1625 * LDPSW 01
1626 * LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1627 * V: 0 -> GPR, 1 -> Vector
1628 * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1629 * 10 -> signed offset, 11 -> pre-index
1630 * L: 0 -> Store 1 -> Load
1631 *
1632 * Rt, Rt2 = GPR or SIMD registers to be stored
1633 * Rn = general purpose register containing address
1634 * imm7 = signed offset (multiple of 4 or 8 depending on size)
1635 */
ad7ee8a2
CF
1636static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1637{
4a08d475
PM
1638 int rt = extract32(insn, 0, 5);
1639 int rn = extract32(insn, 5, 5);
1640 int rt2 = extract32(insn, 10, 5);
1641 int64_t offset = sextract32(insn, 15, 7);
1642 int index = extract32(insn, 23, 2);
1643 bool is_vector = extract32(insn, 26, 1);
1644 bool is_load = extract32(insn, 22, 1);
1645 int opc = extract32(insn, 30, 2);
1646
1647 bool is_signed = false;
1648 bool postindex = false;
1649 bool wback = false;
1650
1651 TCGv_i64 tcg_addr; /* calculated address */
1652 int size;
1653
1654 if (opc == 3) {
1655 unallocated_encoding(s);
1656 return;
1657 }
1658
1659 if (is_vector) {
1660 size = 2 + opc;
1661 } else {
1662 size = 2 + extract32(opc, 1, 1);
1663 is_signed = extract32(opc, 0, 1);
1664 if (!is_load && is_signed) {
1665 unallocated_encoding(s);
1666 return;
1667 }
1668 }
1669
1670 switch (index) {
1671 case 1: /* post-index */
1672 postindex = true;
1673 wback = true;
1674 break;
1675 case 0:
1676 /* signed offset with "non-temporal" hint. Since we don't emulate
1677 * caches we don't care about hints to the cache system about
1678 * data access patterns, and handle this identically to plain
1679 * signed offset.
1680 */
1681 if (is_signed) {
1682 /* There is no non-temporal-hint version of LDPSW */
1683 unallocated_encoding(s);
1684 return;
1685 }
1686 postindex = false;
1687 break;
1688 case 2: /* signed offset, rn not updated */
1689 postindex = false;
1690 break;
1691 case 3: /* pre-index */
1692 postindex = false;
1693 wback = true;
1694 break;
1695 }
1696
1697 offset <<= size;
1698
1699 if (rn == 31) {
1700 gen_check_sp_alignment(s);
1701 }
1702
1703 tcg_addr = read_cpu_reg_sp(s, rn, 1);
1704
1705 if (!postindex) {
1706 tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
1707 }
1708
1709 if (is_vector) {
1710 if (is_load) {
1711 do_fp_ld(s, rt, tcg_addr, size);
1712 } else {
1713 do_fp_st(s, rt, tcg_addr, size);
1714 }
1715 } else {
1716 TCGv_i64 tcg_rt = cpu_reg(s, rt);
1717 if (is_load) {
1718 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1719 } else {
1720 do_gpr_st(s, tcg_rt, tcg_addr, size);
1721 }
1722 }
1723 tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1724 if (is_vector) {
1725 if (is_load) {
1726 do_fp_ld(s, rt2, tcg_addr, size);
1727 } else {
1728 do_fp_st(s, rt2, tcg_addr, size);
1729 }
1730 } else {
1731 TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
1732 if (is_load) {
1733 do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
1734 } else {
1735 do_gpr_st(s, tcg_rt2, tcg_addr, size);
1736 }
1737 }
1738
1739 if (wback) {
1740 if (postindex) {
1741 tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
1742 } else {
1743 tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
1744 }
1745 tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
1746 }
ad7ee8a2
CF
1747}
1748
a5e94a9d
AB
1749/*
1750 * C3.3.8 Load/store (immediate post-indexed)
1751 * C3.3.9 Load/store (immediate pre-indexed)
1752 * C3.3.12 Load/store (unscaled immediate)
1753 *
1754 * 31 30 29 27 26 25 24 23 22 21 20 12 11 10 9 5 4 0
1755 * +----+-------+---+-----+-----+---+--------+-----+------+------+
1756 * |size| 1 1 1 | V | 0 0 | opc | 0 | imm9 | idx | Rn | Rt |
1757 * +----+-------+---+-----+-----+---+--------+-----+------+------+
1758 *
1759 * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
1760 * V = 0 -> non-vector
1761 * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
1762 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1763 */
1764static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
1765{
1766 int rt = extract32(insn, 0, 5);
1767 int rn = extract32(insn, 5, 5);
1768 int imm9 = sextract32(insn, 12, 9);
1769 int opc = extract32(insn, 22, 2);
1770 int size = extract32(insn, 30, 2);
1771 int idx = extract32(insn, 10, 2);
1772 bool is_signed = false;
1773 bool is_store = false;
1774 bool is_extended = false;
1775 bool is_vector = extract32(insn, 26, 1);
1776 bool post_index;
1777 bool writeback;
1778
1779 TCGv_i64 tcg_addr;
1780
1781 if (is_vector) {
1782 size |= (opc & 2) << 1;
1783 if (size > 4) {
1784 unallocated_encoding(s);
1785 return;
1786 }
1787 is_store = ((opc & 1) == 0);
1788 } else {
1789 if (size == 3 && opc == 2) {
1790 /* PRFM - prefetch */
1791 return;
1792 }
1793 if (opc == 3 && size > 1) {
1794 unallocated_encoding(s);
1795 return;
1796 }
1797 is_store = (opc == 0);
1798 is_signed = opc & (1<<1);
1799 is_extended = (size < 3) && (opc & 1);
1800 }
1801
1802 switch (idx) {
1803 case 0:
1804 post_index = false;
1805 writeback = false;
1806 break;
1807 case 1:
1808 post_index = true;
1809 writeback = true;
1810 break;
1811 case 3:
1812 post_index = false;
1813 writeback = true;
1814 break;
1815 case 2:
1816 g_assert(false);
1817 break;
1818 }
1819
1820 if (rn == 31) {
1821 gen_check_sp_alignment(s);
1822 }
1823 tcg_addr = read_cpu_reg_sp(s, rn, 1);
1824
1825 if (!post_index) {
1826 tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
1827 }
1828
1829 if (is_vector) {
1830 if (is_store) {
1831 do_fp_st(s, rt, tcg_addr, size);
1832 } else {
1833 do_fp_ld(s, rt, tcg_addr, size);
1834 }
1835 } else {
1836 TCGv_i64 tcg_rt = cpu_reg(s, rt);
1837 if (is_store) {
1838 do_gpr_st(s, tcg_rt, tcg_addr, size);
1839 } else {
1840 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
1841 }
1842 }
1843
1844 if (writeback) {
1845 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
1846 if (post_index) {
1847 tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
1848 }
1849 tcg_gen_mov_i64(tcg_rn, tcg_addr);
1850 }
1851}
1852
229b7a05
AB
1853/*
1854 * C3.3.10 Load/store (register offset)
1855 *
1856 * 31 30 29 27 26 25 24 23 22 21 20 16 15 13 12 11 10 9 5 4 0
1857 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
1858 * |size| 1 1 1 | V | 0 0 | opc | 1 | Rm | opt | S| 1 0 | Rn | Rt |
1859 * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
1860 *
1861 * For non-vector:
1862 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
1863 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1864 * For vector:
1865 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
1866 * opc<0>: 0 -> store, 1 -> load
1867 * V: 1 -> vector/simd
1868 * opt: extend encoding (see DecodeRegExtend)
1869 * S: if S=1 then scale (essentially index by sizeof(size))
1870 * Rt: register to transfer into/out of
1871 * Rn: address register or SP for base
1872 * Rm: offset register or ZR for offset
1873 */
1874static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
1875{
1876 int rt = extract32(insn, 0, 5);
1877 int rn = extract32(insn, 5, 5);
1878 int shift = extract32(insn, 12, 1);
1879 int rm = extract32(insn, 16, 5);
1880 int opc = extract32(insn, 22, 2);
1881 int opt = extract32(insn, 13, 3);
1882 int size = extract32(insn, 30, 2);
1883 bool is_signed = false;
1884 bool is_store = false;
1885 bool is_extended = false;
1886 bool is_vector = extract32(insn, 26, 1);
1887
1888 TCGv_i64 tcg_rm;
1889 TCGv_i64 tcg_addr;
1890
1891 if (extract32(opt, 1, 1) == 0) {
1892 unallocated_encoding(s);
1893 return;
1894 }
1895
1896 if (is_vector) {
1897 size |= (opc & 2) << 1;
1898 if (size > 4) {
1899 unallocated_encoding(s);
1900 return;
1901 }
1902 is_store = !extract32(opc, 0, 1);
1903 } else {
1904 if (size == 3 && opc == 2) {
1905 /* PRFM - prefetch */
1906 return;
1907 }
1908 if (opc == 3 && size > 1) {
1909 unallocated_encoding(s);
1910 return;
1911 }
1912 is_store = (opc == 0);
1913 is_signed = extract32(opc, 1, 1);
1914 is_extended = (size < 3) && extract32(opc, 0, 1);
1915 }
1916
1917 if (rn == 31) {
1918 gen_check_sp_alignment(s);
1919 }
1920 tcg_addr = read_cpu_reg_sp(s, rn, 1);
1921
1922 tcg_rm = read_cpu_reg(s, rm, 1);
1923 ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
1924
1925 tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
1926
1927 if (is_vector) {
1928 if (is_store) {
1929 do_fp_st(s, rt, tcg_addr, size);
1930 } else {
1931 do_fp_ld(s, rt, tcg_addr, size);
1932 }
1933 } else {
1934 TCGv_i64 tcg_rt = cpu_reg(s, rt);
1935 if (is_store) {
1936 do_gpr_st(s, tcg_rt, tcg_addr, size);
1937 } else {
1938 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
1939 }
1940 }
1941}
1942
d5612f10
AB
1943/*
1944 * C3.3.13 Load/store (unsigned immediate)
1945 *
1946 * 31 30 29 27 26 25 24 23 22 21 10 9 5
1947 * +----+-------+---+-----+-----+------------+-------+------+
1948 * |size| 1 1 1 | V | 0 1 | opc | imm12 | Rn | Rt |
1949 * +----+-------+---+-----+-----+------------+-------+------+
1950 *
1951 * For non-vector:
1952 * size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
1953 * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
1954 * For vector:
1955 * size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
1956 * opc<0>: 0 -> store, 1 -> load
1957 * Rn: base address register (inc SP)
1958 * Rt: target register
1959 */
1960static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
1961{
1962 int rt = extract32(insn, 0, 5);
1963 int rn = extract32(insn, 5, 5);
1964 unsigned int imm12 = extract32(insn, 10, 12);
1965 bool is_vector = extract32(insn, 26, 1);
1966 int size = extract32(insn, 30, 2);
1967 int opc = extract32(insn, 22, 2);
1968 unsigned int offset;
1969
1970 TCGv_i64 tcg_addr;
1971
1972 bool is_store;
1973 bool is_signed = false;
1974 bool is_extended = false;
1975
1976 if (is_vector) {
1977 size |= (opc & 2) << 1;
1978 if (size > 4) {
1979 unallocated_encoding(s);
1980 return;
1981 }
1982 is_store = !extract32(opc, 0, 1);
1983 } else {
1984 if (size == 3 && opc == 2) {
1985 /* PRFM - prefetch */
1986 return;
1987 }
1988 if (opc == 3 && size > 1) {
1989 unallocated_encoding(s);
1990 return;
1991 }
1992 is_store = (opc == 0);
1993 is_signed = extract32(opc, 1, 1);
1994 is_extended = (size < 3) && extract32(opc, 0, 1);
1995 }
1996
1997 if (rn == 31) {
1998 gen_check_sp_alignment(s);
1999 }
2000 tcg_addr = read_cpu_reg_sp(s, rn, 1);
2001 offset = imm12 << size;
2002 tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2003
2004 if (is_vector) {
2005 if (is_store) {
2006 do_fp_st(s, rt, tcg_addr, size);
2007 } else {
2008 do_fp_ld(s, rt, tcg_addr, size);
2009 }
2010 } else {
2011 TCGv_i64 tcg_rt = cpu_reg(s, rt);
2012 if (is_store) {
2013 do_gpr_st(s, tcg_rt, tcg_addr, size);
2014 } else {
2015 do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2016 }
2017 }
2018}
2019
a5e94a9d
AB
2020/* Load/store register (immediate forms) */
2021static void disas_ldst_reg_imm(DisasContext *s, uint32_t insn)
2022{
2023 switch (extract32(insn, 10, 2)) {
2024 case 0: case 1: case 3:
2025 /* Load/store register (unscaled immediate) */
2026 /* Load/store immediate pre/post-indexed */
2027 disas_ldst_reg_imm9(s, insn);
2028 break;
2029 case 2:
2030 /* Load/store register unprivileged */
2031 unsupported_encoding(s, insn);
2032 break;
2033 default:
2034 unallocated_encoding(s);
2035 break;
2036 }
2037}
2038
ad7ee8a2
CF
2039/* Load/store register (all forms) */
2040static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2041{
d5612f10
AB
2042 switch (extract32(insn, 24, 2)) {
2043 case 0:
229b7a05
AB
2044 if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2045 disas_ldst_reg_roffset(s, insn);
2046 } else {
a5e94a9d 2047 disas_ldst_reg_imm(s, insn);
229b7a05 2048 }
d5612f10
AB
2049 break;
2050 case 1:
2051 disas_ldst_reg_unsigned_imm(s, insn);
2052 break;
2053 default:
2054 unallocated_encoding(s);
2055 break;
2056 }
ad7ee8a2
CF
2057}
2058
72430bf5
AB
2059/* C3.3.1 AdvSIMD load/store multiple structures
2060 *
2061 * 31 30 29 23 22 21 16 15 12 11 10 9 5 4 0
2062 * +---+---+---------------+---+-------------+--------+------+------+------+
2063 * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size | Rn | Rt |
2064 * +---+---+---------------+---+-------------+--------+------+------+------+
2065 *
2066 * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2067 *
2068 * 31 30 29 23 22 21 20 16 15 12 11 10 9 5 4 0
2069 * +---+---+---------------+---+---+---------+--------+------+------+------+
2070 * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 | Rm | opcode | size | Rn | Rt |
2071 * +---+---+---------------+---+---+---------+--------+------+------+------+
2072 *
2073 * Rt: first (or only) SIMD&FP register to be transferred
2074 * Rn: base address or SP
2075 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2076 */
ad7ee8a2
CF
2077static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2078{
72430bf5
AB
2079 int rt = extract32(insn, 0, 5);
2080 int rn = extract32(insn, 5, 5);
2081 int size = extract32(insn, 10, 2);
2082 int opcode = extract32(insn, 12, 4);
2083 bool is_store = !extract32(insn, 22, 1);
2084 bool is_postidx = extract32(insn, 23, 1);
2085 bool is_q = extract32(insn, 30, 1);
2086 TCGv_i64 tcg_addr, tcg_rn;
2087
2088 int ebytes = 1 << size;
2089 int elements = (is_q ? 128 : 64) / (8 << size);
2090 int rpt; /* num iterations */
2091 int selem; /* structure elements */
2092 int r;
2093
2094 if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2095 unallocated_encoding(s);
2096 return;
2097 }
2098
2099 /* From the shared decode logic */
2100 switch (opcode) {
2101 case 0x0:
2102 rpt = 1;
2103 selem = 4;
2104 break;
2105 case 0x2:
2106 rpt = 4;
2107 selem = 1;
2108 break;
2109 case 0x4:
2110 rpt = 1;
2111 selem = 3;
2112 break;
2113 case 0x6:
2114 rpt = 3;
2115 selem = 1;
2116 break;
2117 case 0x7:
2118 rpt = 1;
2119 selem = 1;
2120 break;
2121 case 0x8:
2122 rpt = 1;
2123 selem = 2;
2124 break;
2125 case 0xa:
2126 rpt = 2;
2127 selem = 1;
2128 break;
2129 default:
2130 unallocated_encoding(s);
2131 return;
2132 }
2133
2134 if (size == 3 && !is_q && selem != 1) {
2135 /* reserved */
2136 unallocated_encoding(s);
2137 return;
2138 }
2139
2140 if (rn == 31) {
2141 gen_check_sp_alignment(s);
2142 }
2143
2144 tcg_rn = cpu_reg_sp(s, rn);
2145 tcg_addr = tcg_temp_new_i64();
2146 tcg_gen_mov_i64(tcg_addr, tcg_rn);
2147
2148 for (r = 0; r < rpt; r++) {
2149 int e;
2150 for (e = 0; e < elements; e++) {
2151 int tt = (rt + r) % 32;
2152 int xs;
2153 for (xs = 0; xs < selem; xs++) {
2154 if (is_store) {
2155 do_vec_st(s, tt, e, tcg_addr, size);
2156 } else {
2157 do_vec_ld(s, tt, e, tcg_addr, size);
2158
2159 /* For non-quad operations, setting a slice of the low
2160 * 64 bits of the register clears the high 64 bits (in
2161 * the ARM ARM pseudocode this is implicit in the fact
2162 * that 'rval' is a 64 bit wide variable). We optimize
2163 * by noticing that we only need to do this the first
2164 * time we touch a register.
2165 */
2166 if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2167 clear_vec_high(s, tt);
2168 }
2169 }
2170 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2171 tt = (tt + 1) % 32;
2172 }
2173 }
2174 }
2175
2176 if (is_postidx) {
2177 int rm = extract32(insn, 16, 5);
2178 if (rm == 31) {
2179 tcg_gen_mov_i64(tcg_rn, tcg_addr);
2180 } else {
2181 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2182 }
2183 }
2184 tcg_temp_free_i64(tcg_addr);
ad7ee8a2
CF
2185}
2186
df54e47d
PM
2187/* C3.3.3 AdvSIMD load/store single structure
2188 *
2189 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2190 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2191 * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size | Rn | Rt |
2192 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2193 *
2194 * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2195 *
2196 * 31 30 29 23 22 21 20 16 15 13 12 11 10 9 5 4 0
2197 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2198 * | 0 | Q | 0 0 1 1 0 1 1 | L R | Rm | opc | S | size | Rn | Rt |
2199 * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2200 *
2201 * Rt: first (or only) SIMD&FP register to be transferred
2202 * Rn: base address or SP
2203 * Rm (post-index only): post-index register (when !31) or size dependent #imm
2204 * index = encoded in Q:S:size dependent on size
2205 *
2206 * lane_size = encoded in R, opc
2207 * transfer width = encoded in opc, S, size
2208 */
ad7ee8a2
CF
2209static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2210{
df54e47d
PM
2211 int rt = extract32(insn, 0, 5);
2212 int rn = extract32(insn, 5, 5);
2213 int size = extract32(insn, 10, 2);
2214 int S = extract32(insn, 12, 1);
2215 int opc = extract32(insn, 13, 3);
2216 int R = extract32(insn, 21, 1);
2217 int is_load = extract32(insn, 22, 1);
2218 int is_postidx = extract32(insn, 23, 1);
2219 int is_q = extract32(insn, 30, 1);
2220
2221 int scale = extract32(opc, 1, 2);
2222 int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2223 bool replicate = false;
2224 int index = is_q << 3 | S << 2 | size;
2225 int ebytes, xs;
2226 TCGv_i64 tcg_addr, tcg_rn;
2227
2228 switch (scale) {
2229 case 3:
2230 if (!is_load || S) {
2231 unallocated_encoding(s);
2232 return;
2233 }
2234 scale = size;
2235 replicate = true;
2236 break;
2237 case 0:
2238 break;
2239 case 1:
2240 if (extract32(size, 0, 1)) {
2241 unallocated_encoding(s);
2242 return;
2243 }
2244 index >>= 1;
2245 break;
2246 case 2:
2247 if (extract32(size, 1, 1)) {
2248 unallocated_encoding(s);
2249 return;
2250 }
2251 if (!extract32(size, 0, 1)) {
2252 index >>= 2;
2253 } else {
2254 if (S) {
2255 unallocated_encoding(s);
2256 return;
2257 }
2258 index >>= 3;
2259 scale = 3;
2260 }
2261 break;
2262 default:
2263 g_assert_not_reached();
2264 }
2265
2266 ebytes = 1 << scale;
2267
2268 if (rn == 31) {
2269 gen_check_sp_alignment(s);
2270 }
2271
2272 tcg_rn = cpu_reg_sp(s, rn);
2273 tcg_addr = tcg_temp_new_i64();
2274 tcg_gen_mov_i64(tcg_addr, tcg_rn);
2275
2276 for (xs = 0; xs < selem; xs++) {
2277 if (replicate) {
2278 /* Load and replicate to all elements */
2279 uint64_t mulconst;
2280 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2281
2282 tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2283 get_mem_index(s), MO_TE + scale);
2284 switch (scale) {
2285 case 0:
2286 mulconst = 0x0101010101010101ULL;
2287 break;
2288 case 1:
2289 mulconst = 0x0001000100010001ULL;
2290 break;
2291 case 2:
2292 mulconst = 0x0000000100000001ULL;
2293 break;
2294 case 3:
2295 mulconst = 0;
2296 break;
2297 default:
2298 g_assert_not_reached();
2299 }
2300 if (mulconst) {
2301 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2302 }
2303 write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2304 if (is_q) {
2305 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2306 } else {
2307 clear_vec_high(s, rt);
2308 }
2309 tcg_temp_free_i64(tcg_tmp);
2310 } else {
2311 /* Load/store one element per register */
2312 if (is_load) {
2313 do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2314 } else {
2315 do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2316 }
2317 }
2318 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2319 rt = (rt + 1) % 32;
2320 }
2321
2322 if (is_postidx) {
2323 int rm = extract32(insn, 16, 5);
2324 if (rm == 31) {
2325 tcg_gen_mov_i64(tcg_rn, tcg_addr);
2326 } else {
2327 tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2328 }
2329 }
2330 tcg_temp_free_i64(tcg_addr);
ad7ee8a2
CF
2331}
2332
2333/* C3.3 Loads and stores */
2334static void disas_ldst(DisasContext *s, uint32_t insn)
2335{
2336 switch (extract32(insn, 24, 6)) {
2337 case 0x08: /* Load/store exclusive */
2338 disas_ldst_excl(s, insn);
2339 break;
2340 case 0x18: case 0x1c: /* Load register (literal) */
2341 disas_ld_lit(s, insn);
2342 break;
2343 case 0x28: case 0x29:
2344 case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2345 disas_ldst_pair(s, insn);
2346 break;
2347 case 0x38: case 0x39:
2348 case 0x3c: case 0x3d: /* Load/store register (all forms) */
2349 disas_ldst_reg(s, insn);
2350 break;
2351 case 0x0c: /* AdvSIMD load/store multiple structures */
2352 disas_ldst_multiple_struct(s, insn);
2353 break;
2354 case 0x0d: /* AdvSIMD load/store single structure */
2355 disas_ldst_single_struct(s, insn);
2356 break;
2357 default:
2358 unallocated_encoding(s);
2359 break;
2360 }
2361}
2362
15bfe8b6
AG
2363/* C3.4.6 PC-rel. addressing
2364 * 31 30 29 28 24 23 5 4 0
2365 * +----+-------+-----------+-------------------+------+
2366 * | op | immlo | 1 0 0 0 0 | immhi | Rd |
2367 * +----+-------+-----------+-------------------+------+
2368 */
ad7ee8a2
CF
2369static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2370{
15bfe8b6
AG
2371 unsigned int page, rd;
2372 uint64_t base;
2373 int64_t offset;
2374
2375 page = extract32(insn, 31, 1);
2376 /* SignExtend(immhi:immlo) -> offset */
2377 offset = ((int64_t)sextract32(insn, 5, 19) << 2) | extract32(insn, 29, 2);
2378 rd = extract32(insn, 0, 5);
2379 base = s->pc - 4;
2380
2381 if (page) {
2382 /* ADRP (page based) */
2383 base &= ~0xfff;
2384 offset <<= 12;
2385 }
2386
2387 tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
ad7ee8a2
CF
2388}
2389
b0ff21b4
AB
2390/*
2391 * C3.4.1 Add/subtract (immediate)
2392 *
2393 * 31 30 29 28 24 23 22 21 10 9 5 4 0
2394 * +--+--+--+-----------+-----+-------------+-----+-----+
2395 * |sf|op| S| 1 0 0 0 1 |shift| imm12 | Rn | Rd |
2396 * +--+--+--+-----------+-----+-------------+-----+-----+
2397 *
2398 * sf: 0 -> 32bit, 1 -> 64bit
2399 * op: 0 -> add , 1 -> sub
2400 * S: 1 -> set flags
2401 * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2402 */
ad7ee8a2
CF
2403static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2404{
b0ff21b4
AB
2405 int rd = extract32(insn, 0, 5);
2406 int rn = extract32(insn, 5, 5);
2407 uint64_t imm = extract32(insn, 10, 12);
2408 int shift = extract32(insn, 22, 2);
2409 bool setflags = extract32(insn, 29, 1);
2410 bool sub_op = extract32(insn, 30, 1);
2411 bool is_64bit = extract32(insn, 31, 1);
2412
2413 TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2414 TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2415 TCGv_i64 tcg_result;
2416
2417 switch (shift) {
2418 case 0x0:
2419 break;
2420 case 0x1:
2421 imm <<= 12;
2422 break;
2423 default:
2424 unallocated_encoding(s);
2425 return;
2426 }
2427
2428 tcg_result = tcg_temp_new_i64();
2429 if (!setflags) {
2430 if (sub_op) {
2431 tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2432 } else {
2433 tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2434 }
2435 } else {
2436 TCGv_i64 tcg_imm = tcg_const_i64(imm);
2437 if (sub_op) {
2438 gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2439 } else {
2440 gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2441 }
2442 tcg_temp_free_i64(tcg_imm);
2443 }
2444
2445 if (is_64bit) {
2446 tcg_gen_mov_i64(tcg_rd, tcg_result);
2447 } else {
2448 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2449 }
2450
2451 tcg_temp_free_i64(tcg_result);
ad7ee8a2
CF
2452}
2453
71b46089
AG
2454/* The input should be a value in the bottom e bits (with higher
2455 * bits zero); returns that value replicated into every element
2456 * of size e in a 64 bit integer.
2457 */
2458static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2459{
2460 assert(e != 0);
2461 while (e < 64) {
2462 mask |= mask << e;
2463 e *= 2;
2464 }
2465 return mask;
2466}
2467
2468/* Return a value with the bottom len bits set (where 0 < len <= 64) */
2469static inline uint64_t bitmask64(unsigned int length)
2470{
2471 assert(length > 0 && length <= 64);
2472 return ~0ULL >> (64 - length);
2473}
2474
2475/* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2476 * only require the wmask. Returns false if the imms/immr/immn are a reserved
2477 * value (ie should cause a guest UNDEF exception), and true if they are
2478 * valid, in which case the decoded bit pattern is written to result.
2479 */
2480static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2481 unsigned int imms, unsigned int immr)
2482{
2483 uint64_t mask;
2484 unsigned e, levels, s, r;
2485 int len;
2486
2487 assert(immn < 2 && imms < 64 && immr < 64);
2488
2489 /* The bit patterns we create here are 64 bit patterns which
2490 * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2491 * 64 bits each. Each element contains the same value: a run
2492 * of between 1 and e-1 non-zero bits, rotated within the
2493 * element by between 0 and e-1 bits.
2494 *
2495 * The element size and run length are encoded into immn (1 bit)
2496 * and imms (6 bits) as follows:
2497 * 64 bit elements: immn = 1, imms = <length of run - 1>
2498 * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2499 * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2500 * 8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2501 * 4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2502 * 2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2503 * Notice that immn = 0, imms = 11111x is the only combination
2504 * not covered by one of the above options; this is reserved.
2505 * Further, <length of run - 1> all-ones is a reserved pattern.
2506 *
2507 * In all cases the rotation is by immr % e (and immr is 6 bits).
2508 */
2509
2510 /* First determine the element size */
2511 len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2512 if (len < 1) {
2513 /* This is the immn == 0, imms == 0x11111x case */
2514 return false;
2515 }
2516 e = 1 << len;
2517
2518 levels = e - 1;
2519 s = imms & levels;
2520 r = immr & levels;
2521
2522 if (s == levels) {
2523 /* <length of run - 1> mustn't be all-ones. */
2524 return false;
2525 }
2526
2527 /* Create the value of one element: s+1 set bits rotated
2528 * by r within the element (which is e bits wide)...
2529 */
2530 mask = bitmask64(s + 1);
2531 mask = (mask >> r) | (mask << (e - r));
2532 /* ...then replicate the element over the whole 64 bit value */
2533 mask = bitfield_replicate(mask, e);
2534 *result = mask;
2535 return true;
2536}
2537
2538/* C3.4.4 Logical (immediate)
2539 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
2540 * +----+-----+-------------+---+------+------+------+------+
2541 * | sf | opc | 1 0 0 1 0 0 | N | immr | imms | Rn | Rd |
2542 * +----+-----+-------------+---+------+------+------+------+
2543 */
ad7ee8a2
CF
2544static void disas_logic_imm(DisasContext *s, uint32_t insn)
2545{
71b46089
AG
2546 unsigned int sf, opc, is_n, immr, imms, rn, rd;
2547 TCGv_i64 tcg_rd, tcg_rn;
2548 uint64_t wmask;
2549 bool is_and = false;
2550
2551 sf = extract32(insn, 31, 1);
2552 opc = extract32(insn, 29, 2);
2553 is_n = extract32(insn, 22, 1);
2554 immr = extract32(insn, 16, 6);
2555 imms = extract32(insn, 10, 6);
2556 rn = extract32(insn, 5, 5);
2557 rd = extract32(insn, 0, 5);
2558
2559 if (!sf && is_n) {
2560 unallocated_encoding(s);
2561 return;
2562 }
2563
2564 if (opc == 0x3) { /* ANDS */
2565 tcg_rd = cpu_reg(s, rd);
2566 } else {
2567 tcg_rd = cpu_reg_sp(s, rd);
2568 }
2569 tcg_rn = cpu_reg(s, rn);
2570
2571 if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2572 /* some immediate field values are reserved */
2573 unallocated_encoding(s);
2574 return;
2575 }
2576
2577 if (!sf) {
2578 wmask &= 0xffffffff;
2579 }
2580
2581 switch (opc) {
2582 case 0x3: /* ANDS */
2583 case 0x0: /* AND */
2584 tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2585 is_and = true;
2586 break;
2587 case 0x1: /* ORR */
2588 tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2589 break;
2590 case 0x2: /* EOR */
2591 tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2592 break;
2593 default:
2594 assert(FALSE); /* must handle all above */
2595 break;
2596 }
2597
2598 if (!sf && !is_and) {
2599 /* zero extend final result; we know we can skip this for AND
2600 * since the immediate had the high 32 bits clear.
2601 */
2602 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2603 }
2604
2605 if (opc == 3) { /* ANDS */
2606 gen_logic_CC(sf, tcg_rd);
2607 }
ad7ee8a2
CF
2608}
2609
ed6ec679
AB
2610/*
2611 * C3.4.5 Move wide (immediate)
2612 *
2613 * 31 30 29 28 23 22 21 20 5 4 0
2614 * +--+-----+-------------+-----+----------------+------+
2615 * |sf| opc | 1 0 0 1 0 1 | hw | imm16 | Rd |
2616 * +--+-----+-------------+-----+----------------+------+
2617 *
2618 * sf: 0 -> 32 bit, 1 -> 64 bit
2619 * opc: 00 -> N, 10 -> Z, 11 -> K
2620 * hw: shift/16 (0,16, and sf only 32, 48)
2621 */
ad7ee8a2
CF
2622static void disas_movw_imm(DisasContext *s, uint32_t insn)
2623{
ed6ec679
AB
2624 int rd = extract32(insn, 0, 5);
2625 uint64_t imm = extract32(insn, 5, 16);
2626 int sf = extract32(insn, 31, 1);
2627 int opc = extract32(insn, 29, 2);
2628 int pos = extract32(insn, 21, 2) << 4;
2629 TCGv_i64 tcg_rd = cpu_reg(s, rd);
2630 TCGv_i64 tcg_imm;
2631
2632 if (!sf && (pos >= 32)) {
2633 unallocated_encoding(s);
2634 return;
2635 }
2636
2637 switch (opc) {
2638 case 0: /* MOVN */
2639 case 2: /* MOVZ */
2640 imm <<= pos;
2641 if (opc == 0) {
2642 imm = ~imm;
2643 }
2644 if (!sf) {
2645 imm &= 0xffffffffu;
2646 }
2647 tcg_gen_movi_i64(tcg_rd, imm);
2648 break;
2649 case 3: /* MOVK */
2650 tcg_imm = tcg_const_i64(imm);
2651 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2652 tcg_temp_free_i64(tcg_imm);
2653 if (!sf) {
2654 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2655 }
2656 break;
2657 default:
2658 unallocated_encoding(s);
2659 break;
2660 }
ad7ee8a2
CF
2661}
2662
88077742
CF
2663/* C3.4.2 Bitfield
2664 * 31 30 29 28 23 22 21 16 15 10 9 5 4 0
2665 * +----+-----+-------------+---+------+------+------+------+
2666 * | sf | opc | 1 0 0 1 1 0 | N | immr | imms | Rn | Rd |
2667 * +----+-----+-------------+---+------+------+------+------+
2668 */
ad7ee8a2
CF
2669static void disas_bitfield(DisasContext *s, uint32_t insn)
2670{
88077742
CF
2671 unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2672 TCGv_i64 tcg_rd, tcg_tmp;
2673
2674 sf = extract32(insn, 31, 1);
2675 opc = extract32(insn, 29, 2);
2676 n = extract32(insn, 22, 1);
2677 ri = extract32(insn, 16, 6);
2678 si = extract32(insn, 10, 6);
2679 rn = extract32(insn, 5, 5);
2680 rd = extract32(insn, 0, 5);
2681 bitsize = sf ? 64 : 32;
2682
2683 if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
2684 unallocated_encoding(s);
2685 return;
2686 }
2687
2688 tcg_rd = cpu_reg(s, rd);
2689 tcg_tmp = read_cpu_reg(s, rn, sf);
2690
2691 /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */
2692
2693 if (opc != 1) { /* SBFM or UBFM */
2694 tcg_gen_movi_i64(tcg_rd, 0);
2695 }
2696
2697 /* do the bit move operation */
2698 if (si >= ri) {
2699 /* Wd<s-r:0> = Wn<s:r> */
2700 tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
2701 pos = 0;
2702 len = (si - ri) + 1;
2703 } else {
2704 /* Wd<32+s-r,32-r> = Wn<s:0> */
2705 pos = bitsize - ri;
2706 len = si + 1;
2707 }
2708
2709 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
2710
2711 if (opc == 0) { /* SBFM - sign extend the destination field */
2712 tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
2713 tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
2714 }
2715
2716 if (!sf) { /* zero extend final result */
2717 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2718 }
ad7ee8a2
CF
2719}
2720
e801de93
AG
2721/* C3.4.3 Extract
2722 * 31 30 29 28 23 22 21 20 16 15 10 9 5 4 0
2723 * +----+------+-------------+---+----+------+--------+------+------+
2724 * | sf | op21 | 1 0 0 1 1 1 | N | o0 | Rm | imms | Rn | Rd |
2725 * +----+------+-------------+---+----+------+--------+------+------+
2726 */
ad7ee8a2
CF
2727static void disas_extract(DisasContext *s, uint32_t insn)
2728{
e801de93
AG
2729 unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
2730
2731 sf = extract32(insn, 31, 1);
2732 n = extract32(insn, 22, 1);
2733 rm = extract32(insn, 16, 5);
2734 imm = extract32(insn, 10, 6);
2735 rn = extract32(insn, 5, 5);
2736 rd = extract32(insn, 0, 5);
2737 op21 = extract32(insn, 29, 2);
2738 op0 = extract32(insn, 21, 1);
2739 bitsize = sf ? 64 : 32;
2740
2741 if (sf != n || op21 || op0 || imm >= bitsize) {
2742 unallocated_encoding(s);
2743 } else {
2744 TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
2745
2746 tcg_rd = cpu_reg(s, rd);
2747
2748 if (imm) {
2749 /* OPTME: we can special case rm==rn as a rotate */
2750 tcg_rm = read_cpu_reg(s, rm, sf);
2751 tcg_rn = read_cpu_reg(s, rn, sf);
2752 tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
2753 tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
2754 tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
2755 if (!sf) {
2756 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2757 }
2758 } else {
2759 /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
2760 * so an extract from bit 0 is a special case.
2761 */
2762 if (sf) {
2763 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
2764 } else {
2765 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
2766 }
2767 }
2768
2769 }
ad7ee8a2
CF
2770}
2771
2772/* C3.4 Data processing - immediate */
2773static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
2774{
2775 switch (extract32(insn, 23, 6)) {
2776 case 0x20: case 0x21: /* PC-rel. addressing */
2777 disas_pc_rel_adr(s, insn);
2778 break;
2779 case 0x22: case 0x23: /* Add/subtract (immediate) */
2780 disas_add_sub_imm(s, insn);
2781 break;
2782 case 0x24: /* Logical (immediate) */
2783 disas_logic_imm(s, insn);
2784 break;
2785 case 0x25: /* Move wide (immediate) */
2786 disas_movw_imm(s, insn);
2787 break;
2788 case 0x26: /* Bitfield */
2789 disas_bitfield(s, insn);
2790 break;
2791 case 0x27: /* Extract */
2792 disas_extract(s, insn);
2793 break;
2794 default:
2795 unallocated_encoding(s);
2796 break;
2797 }
2798}
2799
832ffa1c
AG
2800/* Shift a TCGv src by TCGv shift_amount, put result in dst.
2801 * Note that it is the caller's responsibility to ensure that the
2802 * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
2803 * mandated semantics for out of range shifts.
2804 */
2805static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
2806 enum a64_shift_type shift_type, TCGv_i64 shift_amount)
2807{
2808 switch (shift_type) {
2809 case A64_SHIFT_TYPE_LSL:
2810 tcg_gen_shl_i64(dst, src, shift_amount);
2811 break;
2812 case A64_SHIFT_TYPE_LSR:
2813 tcg_gen_shr_i64(dst, src, shift_amount);
2814 break;
2815 case A64_SHIFT_TYPE_ASR:
2816 if (!sf) {
2817 tcg_gen_ext32s_i64(dst, src);
2818 }
2819 tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
2820 break;
2821 case A64_SHIFT_TYPE_ROR:
2822 if (sf) {
2823 tcg_gen_rotr_i64(dst, src, shift_amount);
2824 } else {
2825 TCGv_i32 t0, t1;
2826 t0 = tcg_temp_new_i32();
2827 t1 = tcg_temp_new_i32();
2828 tcg_gen_trunc_i64_i32(t0, src);
2829 tcg_gen_trunc_i64_i32(t1, shift_amount);
2830 tcg_gen_rotr_i32(t0, t0, t1);
2831 tcg_gen_extu_i32_i64(dst, t0);
2832 tcg_temp_free_i32(t0);
2833 tcg_temp_free_i32(t1);
2834 }
2835 break;
2836 default:
2837 assert(FALSE); /* all shift types should be handled */
2838 break;
2839 }
2840
2841 if (!sf) { /* zero extend final result */
2842 tcg_gen_ext32u_i64(dst, dst);
2843 }
2844}
2845
2846/* Shift a TCGv src by immediate, put result in dst.
2847 * The shift amount must be in range (this should always be true as the
2848 * relevant instructions will UNDEF on bad shift immediates).
2849 */
2850static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
2851 enum a64_shift_type shift_type, unsigned int shift_i)
2852{
2853 assert(shift_i < (sf ? 64 : 32));
2854
2855 if (shift_i == 0) {
2856 tcg_gen_mov_i64(dst, src);
2857 } else {
2858 TCGv_i64 shift_const;
2859
2860 shift_const = tcg_const_i64(shift_i);
2861 shift_reg(dst, src, sf, shift_type, shift_const);
2862 tcg_temp_free_i64(shift_const);
2863 }
2864}
2865
2866/* C3.5.10 Logical (shifted register)
2867 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
2868 * +----+-----+-----------+-------+---+------+--------+------+------+
2869 * | sf | opc | 0 1 0 1 0 | shift | N | Rm | imm6 | Rn | Rd |
2870 * +----+-----+-----------+-------+---+------+--------+------+------+
2871 */
ad7ee8a2
CF
2872static void disas_logic_reg(DisasContext *s, uint32_t insn)
2873{
832ffa1c
AG
2874 TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
2875 unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
2876
2877 sf = extract32(insn, 31, 1);
2878 opc = extract32(insn, 29, 2);
2879 shift_type = extract32(insn, 22, 2);
2880 invert = extract32(insn, 21, 1);
2881 rm = extract32(insn, 16, 5);
2882 shift_amount = extract32(insn, 10, 6);
2883 rn = extract32(insn, 5, 5);
2884 rd = extract32(insn, 0, 5);
2885
2886 if (!sf && (shift_amount & (1 << 5))) {
2887 unallocated_encoding(s);
2888 return;
2889 }
2890
2891 tcg_rd = cpu_reg(s, rd);
2892
2893 if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
2894 /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
2895 * register-register MOV and MVN, so it is worth special casing.
2896 */
2897 tcg_rm = cpu_reg(s, rm);
2898 if (invert) {
2899 tcg_gen_not_i64(tcg_rd, tcg_rm);
2900 if (!sf) {
2901 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2902 }
2903 } else {
2904 if (sf) {
2905 tcg_gen_mov_i64(tcg_rd, tcg_rm);
2906 } else {
2907 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
2908 }
2909 }
2910 return;
2911 }
2912
2913 tcg_rm = read_cpu_reg(s, rm, sf);
2914
2915 if (shift_amount) {
2916 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
2917 }
2918
2919 tcg_rn = cpu_reg(s, rn);
2920
2921 switch (opc | (invert << 2)) {
2922 case 0: /* AND */
2923 case 3: /* ANDS */
2924 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
2925 break;
2926 case 1: /* ORR */
2927 tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
2928 break;
2929 case 2: /* EOR */
2930 tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
2931 break;
2932 case 4: /* BIC */
2933 case 7: /* BICS */
2934 tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
2935 break;
2936 case 5: /* ORN */
2937 tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
2938 break;
2939 case 6: /* EON */
2940 tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
2941 break;
2942 default:
2943 assert(FALSE);
2944 break;
2945 }
2946
2947 if (!sf) {
2948 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2949 }
2950
2951 if (opc == 3) {
2952 gen_logic_CC(sf, tcg_rd);
2953 }
ad7ee8a2
CF
2954}
2955
b0ff21b4
AB
2956/*
2957 * C3.5.1 Add/subtract (extended register)
2958 *
2959 * 31|30|29|28 24|23 22|21|20 16|15 13|12 10|9 5|4 0|
2960 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
2961 * |sf|op| S| 0 1 0 1 1 | opt | 1| Rm |option| imm3 | Rn | Rd |
2962 * +--+--+--+-----------+-----+--+-------+------+------+----+----+
2963 *
2964 * sf: 0 -> 32bit, 1 -> 64bit
2965 * op: 0 -> add , 1 -> sub
2966 * S: 1 -> set flags
2967 * opt: 00
2968 * option: extension type (see DecodeRegExtend)
2969 * imm3: optional shift to Rm
2970 *
2971 * Rd = Rn + LSL(extend(Rm), amount)
2972 */
ad7ee8a2
CF
2973static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
2974{
b0ff21b4
AB
2975 int rd = extract32(insn, 0, 5);
2976 int rn = extract32(insn, 5, 5);
2977 int imm3 = extract32(insn, 10, 3);
2978 int option = extract32(insn, 13, 3);
2979 int rm = extract32(insn, 16, 5);
2980 bool setflags = extract32(insn, 29, 1);
2981 bool sub_op = extract32(insn, 30, 1);
2982 bool sf = extract32(insn, 31, 1);
2983
2984 TCGv_i64 tcg_rm, tcg_rn; /* temps */
2985 TCGv_i64 tcg_rd;
2986 TCGv_i64 tcg_result;
2987
2988 if (imm3 > 4) {
2989 unallocated_encoding(s);
2990 return;
2991 }
2992
2993 /* non-flag setting ops may use SP */
2994 if (!setflags) {
2995 tcg_rn = read_cpu_reg_sp(s, rn, sf);
2996 tcg_rd = cpu_reg_sp(s, rd);
2997 } else {
2998 tcg_rn = read_cpu_reg(s, rn, sf);
2999 tcg_rd = cpu_reg(s, rd);
3000 }
3001
3002 tcg_rm = read_cpu_reg(s, rm, sf);
3003 ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3004
3005 tcg_result = tcg_temp_new_i64();
3006
3007 if (!setflags) {
3008 if (sub_op) {
3009 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3010 } else {
3011 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3012 }
3013 } else {
3014 if (sub_op) {
3015 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3016 } else {
3017 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3018 }
3019 }
3020
3021 if (sf) {
3022 tcg_gen_mov_i64(tcg_rd, tcg_result);
3023 } else {
3024 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3025 }
3026
3027 tcg_temp_free_i64(tcg_result);
ad7ee8a2
CF
3028}
3029
b0ff21b4
AB
3030/*
3031 * C3.5.2 Add/subtract (shifted register)
3032 *
3033 * 31 30 29 28 24 23 22 21 20 16 15 10 9 5 4 0
3034 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3035 * |sf|op| S| 0 1 0 1 1 |shift| 0| Rm | imm6 | Rn | Rd |
3036 * +--+--+--+-----------+-----+--+-------+---------+------+------+
3037 *
3038 * sf: 0 -> 32bit, 1 -> 64bit
3039 * op: 0 -> add , 1 -> sub
3040 * S: 1 -> set flags
3041 * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3042 * imm6: Shift amount to apply to Rm before the add/sub
3043 */
ad7ee8a2
CF
3044static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3045{
b0ff21b4
AB
3046 int rd = extract32(insn, 0, 5);
3047 int rn = extract32(insn, 5, 5);
3048 int imm6 = extract32(insn, 10, 6);
3049 int rm = extract32(insn, 16, 5);
3050 int shift_type = extract32(insn, 22, 2);
3051 bool setflags = extract32(insn, 29, 1);
3052 bool sub_op = extract32(insn, 30, 1);
3053 bool sf = extract32(insn, 31, 1);
3054
3055 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3056 TCGv_i64 tcg_rn, tcg_rm;
3057 TCGv_i64 tcg_result;
3058
3059 if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3060 unallocated_encoding(s);
3061 return;
3062 }
3063
3064 tcg_rn = read_cpu_reg(s, rn, sf);
3065 tcg_rm = read_cpu_reg(s, rm, sf);
3066
3067 shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3068
3069 tcg_result = tcg_temp_new_i64();
3070
3071 if (!setflags) {
3072 if (sub_op) {
3073 tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3074 } else {
3075 tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3076 }
3077 } else {
3078 if (sub_op) {
3079 gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3080 } else {
3081 gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3082 }
3083 }
3084
3085 if (sf) {
3086 tcg_gen_mov_i64(tcg_rd, tcg_result);
3087 } else {
3088 tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3089 }
3090
3091 tcg_temp_free_i64(tcg_result);
ad7ee8a2
CF
3092}
3093
52c8b9af
AG
3094/* C3.5.9 Data-processing (3 source)
3095
3096 31 30 29 28 24 23 21 20 16 15 14 10 9 5 4 0
3097 +--+------+-----------+------+------+----+------+------+------+
3098 |sf| op54 | 1 1 0 1 1 | op31 | Rm | o0 | Ra | Rn | Rd |
3099 +--+------+-----------+------+------+----+------+------+------+
3100
3101 */
ad7ee8a2
CF
3102static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3103{
52c8b9af
AG
3104 int rd = extract32(insn, 0, 5);
3105 int rn = extract32(insn, 5, 5);
3106 int ra = extract32(insn, 10, 5);
3107 int rm = extract32(insn, 16, 5);
3108 int op_id = (extract32(insn, 29, 3) << 4) |
3109 (extract32(insn, 21, 3) << 1) |
3110 extract32(insn, 15, 1);
3111 bool sf = extract32(insn, 31, 1);
3112 bool is_sub = extract32(op_id, 0, 1);
3113 bool is_high = extract32(op_id, 2, 1);
3114 bool is_signed = false;
3115 TCGv_i64 tcg_op1;
3116 TCGv_i64 tcg_op2;
3117 TCGv_i64 tcg_tmp;
3118
3119 /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3120 switch (op_id) {
3121 case 0x42: /* SMADDL */
3122 case 0x43: /* SMSUBL */
3123 case 0x44: /* SMULH */
3124 is_signed = true;
3125 break;
3126 case 0x0: /* MADD (32bit) */
3127 case 0x1: /* MSUB (32bit) */
3128 case 0x40: /* MADD (64bit) */
3129 case 0x41: /* MSUB (64bit) */
3130 case 0x4a: /* UMADDL */
3131 case 0x4b: /* UMSUBL */
3132 case 0x4c: /* UMULH */
3133 break;
3134 default:
3135 unallocated_encoding(s);
3136 return;
3137 }
3138
3139 if (is_high) {
3140 TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3141 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3142 TCGv_i64 tcg_rn = cpu_reg(s, rn);
3143 TCGv_i64 tcg_rm = cpu_reg(s, rm);
3144
3145 if (is_signed) {
3146 tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3147 } else {
3148 tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3149 }
3150
3151 tcg_temp_free_i64(low_bits);
3152 return;
3153 }
3154
3155 tcg_op1 = tcg_temp_new_i64();
3156 tcg_op2 = tcg_temp_new_i64();
3157 tcg_tmp = tcg_temp_new_i64();
3158
3159 if (op_id < 0x42) {
3160 tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3161 tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3162 } else {
3163 if (is_signed) {
3164 tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3165 tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3166 } else {
3167 tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3168 tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3169 }
3170 }
3171
3172 if (ra == 31 && !is_sub) {
3173 /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3174 tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3175 } else {
3176 tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3177 if (is_sub) {
3178 tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3179 } else {
3180 tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3181 }
3182 }
3183
3184 if (!sf) {
3185 tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3186 }
3187
3188 tcg_temp_free_i64(tcg_op1);
3189 tcg_temp_free_i64(tcg_op2);
3190 tcg_temp_free_i64(tcg_tmp);
ad7ee8a2
CF
3191}
3192
643dbb07
CF
3193/* C3.5.3 - Add/subtract (with carry)
3194 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 10 9 5 4 0
3195 * +--+--+--+------------------------+------+---------+------+-----+
3196 * |sf|op| S| 1 1 0 1 0 0 0 0 | rm | opcode2 | Rn | Rd |
3197 * +--+--+--+------------------------+------+---------+------+-----+
3198 * [000000]
3199 */
3200
ad7ee8a2
CF
3201static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3202{
643dbb07
CF
3203 unsigned int sf, op, setflags, rm, rn, rd;
3204 TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3205
3206 if (extract32(insn, 10, 6) != 0) {
3207 unallocated_encoding(s);
3208 return;
3209 }
3210
3211 sf = extract32(insn, 31, 1);
3212 op = extract32(insn, 30, 1);
3213 setflags = extract32(insn, 29, 1);
3214 rm = extract32(insn, 16, 5);
3215 rn = extract32(insn, 5, 5);
3216 rd = extract32(insn, 0, 5);
3217
3218 tcg_rd = cpu_reg(s, rd);
3219 tcg_rn = cpu_reg(s, rn);
3220
3221 if (op) {
3222 tcg_y = new_tmp_a64(s);
3223 tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3224 } else {
3225 tcg_y = cpu_reg(s, rm);
3226 }
3227
3228 if (setflags) {
3229 gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3230 } else {
3231 gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3232 }
ad7ee8a2
CF
3233}
3234
750813cf
CF
3235/* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3236 * 31 30 29 28 27 26 25 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
3237 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3238 * |sf|op| S| 1 1 0 1 0 0 1 0 |imm5/rm | cond |i/r |o2| Rn |o3|nzcv |
3239 * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3240 * [1] y [0] [0]
3241 */
3242static void disas_cc(DisasContext *s, uint32_t insn)
ad7ee8a2 3243{
750813cf
CF
3244 unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3245 int label_continue = -1;
3246 TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
ad7ee8a2 3247
750813cf
CF
3248 if (!extract32(insn, 29, 1)) {
3249 unallocated_encoding(s);
3250 return;
3251 }
3252 if (insn & (1 << 10 | 1 << 4)) {
3253 unallocated_encoding(s);
3254 return;
3255 }
3256 sf = extract32(insn, 31, 1);
3257 op = extract32(insn, 30, 1);
3258 is_imm = extract32(insn, 11, 1);
3259 y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3260 cond = extract32(insn, 12, 4);
3261 rn = extract32(insn, 5, 5);
3262 nzcv = extract32(insn, 0, 4);
3263
3264 if (cond < 0x0e) { /* not always */
3265 int label_match = gen_new_label();
3266 label_continue = gen_new_label();
3267 arm_gen_test_cc(cond, label_match);
3268 /* nomatch: */
3269 tcg_tmp = tcg_temp_new_i64();
3270 tcg_gen_movi_i64(tcg_tmp, nzcv << 28);
3271 gen_set_nzcv(tcg_tmp);
3272 tcg_temp_free_i64(tcg_tmp);
3273 tcg_gen_br(label_continue);
3274 gen_set_label(label_match);
3275 }
3276 /* match, or condition is always */
3277 if (is_imm) {
3278 tcg_y = new_tmp_a64(s);
3279 tcg_gen_movi_i64(tcg_y, y);
3280 } else {
3281 tcg_y = cpu_reg(s, y);
3282 }
3283 tcg_rn = cpu_reg(s, rn);
3284
3285 tcg_tmp = tcg_temp_new_i64();
3286 if (op) {
3287 gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3288 } else {
3289 gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3290 }
3291 tcg_temp_free_i64(tcg_tmp);
3292
3293 if (cond < 0x0e) { /* continue */
3294 gen_set_label(label_continue);
3295 }
ad7ee8a2
CF
3296}
3297
e952d8c7
CF
3298/* C3.5.6 Conditional select
3299 * 31 30 29 28 21 20 16 15 12 11 10 9 5 4 0
3300 * +----+----+---+-----------------+------+------+-----+------+------+
3301 * | sf | op | S | 1 1 0 1 0 1 0 0 | Rm | cond | op2 | Rn | Rd |
3302 * +----+----+---+-----------------+------+------+-----+------+------+
3303 */
ad7ee8a2
CF
3304static void disas_cond_select(DisasContext *s, uint32_t insn)
3305{
e952d8c7
CF
3306 unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3307 TCGv_i64 tcg_rd, tcg_src;
3308
3309 if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3310 /* S == 1 or op2<1> == 1 */
3311 unallocated_encoding(s);
3312 return;
3313 }
3314 sf = extract32(insn, 31, 1);
3315 else_inv = extract32(insn, 30, 1);
3316 rm = extract32(insn, 16, 5);
3317 cond = extract32(insn, 12, 4);
3318 else_inc = extract32(insn, 10, 1);
3319 rn = extract32(insn, 5, 5);
3320 rd = extract32(insn, 0, 5);
3321
3322 if (rd == 31) {
3323 /* silly no-op write; until we use movcond we must special-case
3324 * this to avoid a dead temporary across basic blocks.
3325 */
3326 return;
3327 }
3328
3329 tcg_rd = cpu_reg(s, rd);
3330
3331 if (cond >= 0x0e) { /* condition "always" */
3332 tcg_src = read_cpu_reg(s, rn, sf);
3333 tcg_gen_mov_i64(tcg_rd, tcg_src);
3334 } else {
3335 /* OPTME: we could use movcond here, at the cost of duplicating
3336 * a lot of the arm_gen_test_cc() logic.
3337 */
3338 int label_match = gen_new_label();
3339 int label_continue = gen_new_label();
3340
3341 arm_gen_test_cc(cond, label_match);
3342 /* nomatch: */
3343 tcg_src = cpu_reg(s, rm);
3344
3345 if (else_inv && else_inc) {
3346 tcg_gen_neg_i64(tcg_rd, tcg_src);
3347 } else if (else_inv) {
3348 tcg_gen_not_i64(tcg_rd, tcg_src);
3349 } else if (else_inc) {
3350 tcg_gen_addi_i64(tcg_rd, tcg_src, 1);
3351 } else {
3352 tcg_gen_mov_i64(tcg_rd, tcg_src);
3353 }
3354 if (!sf) {
3355 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3356 }
3357 tcg_gen_br(label_continue);
3358 /* match: */
3359 gen_set_label(label_match);
3360 tcg_src = read_cpu_reg(s, rn, sf);
3361 tcg_gen_mov_i64(tcg_rd, tcg_src);
3362 /* continue: */
3363 gen_set_label(label_continue);
3364 }
ad7ee8a2
CF
3365}
3366
680ead21
CF
3367static void handle_clz(DisasContext *s, unsigned int sf,
3368 unsigned int rn, unsigned int rd)
3369{
3370 TCGv_i64 tcg_rd, tcg_rn;
3371 tcg_rd = cpu_reg(s, rd);
3372 tcg_rn = cpu_reg(s, rn);
3373
3374 if (sf) {
3375 gen_helper_clz64(tcg_rd, tcg_rn);
3376 } else {
3377 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3378 tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3379 gen_helper_clz(tcg_tmp32, tcg_tmp32);
3380 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3381 tcg_temp_free_i32(tcg_tmp32);
3382 }
3383}
3384
e80c5020
CF
3385static void handle_cls(DisasContext *s, unsigned int sf,
3386 unsigned int rn, unsigned int rd)
3387{
3388 TCGv_i64 tcg_rd, tcg_rn;
3389 tcg_rd = cpu_reg(s, rd);
3390 tcg_rn = cpu_reg(s, rn);
3391
3392 if (sf) {
3393 gen_helper_cls64(tcg_rd, tcg_rn);
3394 } else {
3395 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3396 tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3397 gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3398 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3399 tcg_temp_free_i32(tcg_tmp32);
3400 }
3401}
3402
82e14b02
AG
3403static void handle_rbit(DisasContext *s, unsigned int sf,
3404 unsigned int rn, unsigned int rd)
3405{
3406 TCGv_i64 tcg_rd, tcg_rn;
3407 tcg_rd = cpu_reg(s, rd);
3408 tcg_rn = cpu_reg(s, rn);
3409
3410 if (sf) {
3411 gen_helper_rbit64(tcg_rd, tcg_rn);
3412 } else {
3413 TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3414 tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn);
3415 gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3416 tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3417 tcg_temp_free_i32(tcg_tmp32);
3418 }
3419}
3420
45323209
CF
3421/* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3422static void handle_rev64(DisasContext *s, unsigned int sf,
3423 unsigned int rn, unsigned int rd)
3424{
3425 if (!sf) {
3426 unallocated_encoding(s);
3427 return;
3428 }
3429 tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3430}
3431
3432/* C5.6.149 REV with sf==0, opcode==2
3433 * C5.6.151 REV32 (sf==1, opcode==2)
3434 */
3435static void handle_rev32(DisasContext *s, unsigned int sf,
3436 unsigned int rn, unsigned int rd)
3437{
3438 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3439
3440 if (sf) {
3441 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3442 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3443
3444 /* bswap32_i64 requires zero high word */
3445 tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3446 tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3447 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3448 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3449 tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3450
3451 tcg_temp_free_i64(tcg_tmp);
3452 } else {
3453 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3454 tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3455 }
3456}
3457
3458/* C5.6.150 REV16 (opcode==1) */
3459static void handle_rev16(DisasContext *s, unsigned int sf,
3460 unsigned int rn, unsigned int rd)
3461{
3462 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3463 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3464 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3465
3466 tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3467 tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3468
3469 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3470 tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3471 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3472 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3473
3474 if (sf) {
3475 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3476 tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3477 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3478 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3479
3480 tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3481 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3482 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3483 }
3484
3485 tcg_temp_free_i64(tcg_tmp);
3486}
3487
680ead21
CF
3488/* C3.5.7 Data-processing (1 source)
3489 * 31 30 29 28 21 20 16 15 10 9 5 4 0
3490 * +----+---+---+-----------------+---------+--------+------+------+
3491 * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode | Rn | Rd |
3492 * +----+---+---+-----------------+---------+--------+------+------+
3493 */
ad7ee8a2
CF
3494static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3495{
680ead21
CF
3496 unsigned int sf, opcode, rn, rd;
3497
3498 if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3499 unallocated_encoding(s);
3500 return;
3501 }
3502
3503 sf = extract32(insn, 31, 1);
3504 opcode = extract32(insn, 10, 6);
3505 rn = extract32(insn, 5, 5);
3506 rd = extract32(insn, 0, 5);
3507
3508 switch (opcode) {
3509 case 0: /* RBIT */
82e14b02
AG
3510 handle_rbit(s, sf, rn, rd);
3511 break;
680ead21 3512 case 1: /* REV16 */
45323209
CF
3513 handle_rev16(s, sf, rn, rd);
3514 break;
680ead21 3515 case 2: /* REV32 */
45323209
CF
3516 handle_rev32(s, sf, rn, rd);
3517 break;
680ead21 3518 case 3: /* REV64 */
45323209 3519 handle_rev64(s, sf, rn, rd);
680ead21
CF
3520 break;
3521 case 4: /* CLZ */
3522 handle_clz(s, sf, rn, rd);
3523 break;
3524 case 5: /* CLS */
e80c5020 3525 handle_cls(s, sf, rn, rd);
680ead21
CF
3526 break;
3527 }
ad7ee8a2
CF
3528}
3529
8220e911
AG
3530static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3531 unsigned int rm, unsigned int rn, unsigned int rd)
3532{
3533 TCGv_i64 tcg_n, tcg_m, tcg_rd;
3534 tcg_rd = cpu_reg(s, rd);
3535
3536 if (!sf && is_signed) {
3537 tcg_n = new_tmp_a64(s);
3538 tcg_m = new_tmp_a64(s);
3539 tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3540 tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3541 } else {
3542 tcg_n = read_cpu_reg(s, rn, sf);
3543 tcg_m = read_cpu_reg(s, rm, sf);
3544 }
3545
3546 if (is_signed) {
3547 gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3548 } else {
3549 gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3550 }
3551
3552 if (!sf) { /* zero extend final result */
3553 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3554 }
3555}
3556
6c1adc91
AG
3557/* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3558static void handle_shift_reg(DisasContext *s,
3559 enum a64_shift_type shift_type, unsigned int sf,
3560 unsigned int rm, unsigned int rn, unsigned int rd)
3561{
3562 TCGv_i64 tcg_shift = tcg_temp_new_i64();
3563 TCGv_i64 tcg_rd = cpu_reg(s, rd);
3564 TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3565
3566 tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3567 shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3568 tcg_temp_free_i64(tcg_shift);
3569}
3570
8220e911
AG
3571/* C3.5.8 Data-processing (2 source)
3572 * 31 30 29 28 21 20 16 15 10 9 5 4 0
3573 * +----+---+---+-----------------+------+--------+------+------+
3574 * | sf | 0 | S | 1 1 0 1 0 1 1 0 | Rm | opcode | Rn | Rd |
3575 * +----+---+---+-----------------+------+--------+------+------+
3576 */
ad7ee8a2
CF
3577static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
3578{
8220e911
AG
3579 unsigned int sf, rm, opcode, rn, rd;
3580 sf = extract32(insn, 31, 1);
3581 rm = extract32(insn, 16, 5);
3582 opcode = extract32(insn, 10, 6);
3583 rn = extract32(insn, 5, 5);
3584 rd = extract32(insn, 0, 5);
3585
3586 if (extract32(insn, 29, 1)) {
3587 unallocated_encoding(s);
3588 return;
3589 }
3590
3591 switch (opcode) {
3592 case 2: /* UDIV */
3593 handle_div(s, false, sf, rm, rn, rd);
3594 break;
3595 case 3: /* SDIV */
3596 handle_div(s, true, sf, rm, rn, rd);
3597 break;
3598 case 8: /* LSLV */
6c1adc91
AG
3599 handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
3600 break;
8220e911 3601 case 9: /* LSRV */
6c1adc91
AG
3602 handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
3603 break;
8220e911 3604 case 10: /* ASRV */
6c1adc91
AG
3605 handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
3606 break;
8220e911 3607 case 11: /* RORV */
6c1adc91
AG
3608 handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
3609 break;
8220e911
AG
3610 case 16:
3611 case 17:
3612 case 18:
3613 case 19:
3614 case 20:
3615 case 21:
3616 case 22:
3617 case 23: /* CRC32 */
3618 unsupported_encoding(s, insn);
3619 break;
3620 default:
3621 unallocated_encoding(s);
3622 break;
3623 }
ad7ee8a2
CF
3624}
3625
3626/* C3.5 Data processing - register */
3627static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
3628{
3629 switch (extract32(insn, 24, 5)) {
3630 case 0x0a: /* Logical (shifted register) */
3631 disas_logic_reg(s, insn);
3632 break;
3633 case 0x0b: /* Add/subtract */
3634 if (insn & (1 << 21)) { /* (extended register) */
3635 disas_add_sub_ext_reg(s, insn);
3636 } else {
3637 disas_add_sub_reg(s, insn);
3638 }
3639 break;
3640 case 0x1b: /* Data-processing (3 source) */
3641 disas_data_proc_3src(s, insn);
3642 break;
3643 case 0x1a:
3644 switch (extract32(insn, 21, 3)) {
3645 case 0x0: /* Add/subtract (with carry) */
3646 disas_adc_sbc(s, insn);
3647 break;
3648 case 0x2: /* Conditional compare */
750813cf 3649 disas_cc(s, insn); /* both imm and reg forms */
ad7ee8a2
CF
3650 break;
3651 case 0x4: /* Conditional select */
3652 disas_cond_select(s, insn);
3653 break;
3654 case 0x6: /* Data-processing */
3655 if (insn & (1 << 30)) { /* (1 source) */
3656 disas_data_proc_1src(s, insn);
3657 } else { /* (2 source) */
3658 disas_data_proc_2src(s, insn);
3659 }
3660 break;
3661 default:
3662 unallocated_encoding(s);
3663 break;
3664 }
3665 break;
3666 default:
3667 unallocated_encoding(s);
3668 break;
3669 }
3670}
3671
da7dafe7
CF
3672static void handle_fp_compare(DisasContext *s, bool is_double,
3673 unsigned int rn, unsigned int rm,
3674 bool cmp_with_zero, bool signal_all_nans)
3675{
3676 TCGv_i64 tcg_flags = tcg_temp_new_i64();
3677 TCGv_ptr fpst = get_fpstatus_ptr();
3678
3679 if (is_double) {
3680 TCGv_i64 tcg_vn, tcg_vm;
3681
3682 tcg_vn = read_fp_dreg(s, rn);
3683 if (cmp_with_zero) {
3684 tcg_vm = tcg_const_i64(0);
3685 } else {
3686 tcg_vm = read_fp_dreg(s, rm);
3687 }
3688 if (signal_all_nans) {
3689 gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3690 } else {
3691 gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3692 }
3693 tcg_temp_free_i64(tcg_vn);
3694 tcg_temp_free_i64(tcg_vm);
3695 } else {
3696 TCGv_i32 tcg_vn, tcg_vm;
3697
3698 tcg_vn = read_fp_sreg(s, rn);
3699 if (cmp_with_zero) {
3700 tcg_vm = tcg_const_i32(0);
3701 } else {
3702 tcg_vm = read_fp_sreg(s, rm);
3703 }
3704 if (signal_all_nans) {
3705 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3706 } else {
3707 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
3708 }
3709 tcg_temp_free_i32(tcg_vn);
3710 tcg_temp_free_i32(tcg_vm);
3711 }
3712
3713 tcg_temp_free_ptr(fpst);
3714
3715 gen_set_nzcv(tcg_flags);
3716
3717 tcg_temp_free_i64(tcg_flags);
3718}
3719
faa0ba46
PM
3720/* C3.6.22 Floating point compare
3721 * 31 30 29 28 24 23 22 21 20 16 15 14 13 10 9 5 4 0
3722 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
3723 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | op | 1 0 0 0 | Rn | op2 |
3724 * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
3725 */
3726static void disas_fp_compare(DisasContext *s, uint32_t insn)
3727{
da7dafe7
CF
3728 unsigned int mos, type, rm, op, rn, opc, op2r;
3729
3730 mos = extract32(insn, 29, 3);
3731 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3732 rm = extract32(insn, 16, 5);
3733 op = extract32(insn, 14, 2);
3734 rn = extract32(insn, 5, 5);
3735 opc = extract32(insn, 3, 2);
3736 op2r = extract32(insn, 0, 3);
3737
3738 if (mos || op || op2r || type > 1) {
3739 unallocated_encoding(s);
3740 return;
3741 }
3742
3743 handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
faa0ba46
PM
3744}
3745
3746/* C3.6.23 Floating point conditional compare
3747 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 3 0
3748 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
3749 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 0 1 | Rn | op | nzcv |
3750 * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
3751 */
3752static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
3753{
513f1d76
CF
3754 unsigned int mos, type, rm, cond, rn, op, nzcv;
3755 TCGv_i64 tcg_flags;
3756 int label_continue = -1;
3757
3758 mos = extract32(insn, 29, 3);
3759 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3760 rm = extract32(insn, 16, 5);
3761 cond = extract32(insn, 12, 4);
3762 rn = extract32(insn, 5, 5);
3763 op = extract32(insn, 4, 1);
3764 nzcv = extract32(insn, 0, 4);
3765
3766 if (mos || type > 1) {
3767 unallocated_encoding(s);
3768 return;
3769 }
3770
3771 if (cond < 0x0e) { /* not always */
3772 int label_match = gen_new_label();
3773 label_continue = gen_new_label();
3774 arm_gen_test_cc(cond, label_match);
3775 /* nomatch: */
3776 tcg_flags = tcg_const_i64(nzcv << 28);
3777 gen_set_nzcv(tcg_flags);
3778 tcg_temp_free_i64(tcg_flags);
3779 tcg_gen_br(label_continue);
3780 gen_set_label(label_match);
3781 }
3782
3783 handle_fp_compare(s, type, rn, rm, false, op);
3784
3785 if (cond < 0x0e) {
3786 gen_set_label(label_continue);
3787 }
faa0ba46
PM
3788}
3789
5640ff62
CF
3790/* copy src FP register to dst FP register; type specifies single or double */
3791static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
3792{
3793 if (type) {
3794 TCGv_i64 v = read_fp_dreg(s, src);
3795 write_fp_dreg(s, dst, v);
3796 tcg_temp_free_i64(v);
3797 } else {
3798 TCGv_i32 v = read_fp_sreg(s, src);
3799 write_fp_sreg(s, dst, v);
3800 tcg_temp_free_i32(v);
3801 }
3802}
3803
faa0ba46
PM
3804/* C3.6.24 Floating point conditional select
3805 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
3806 * +---+---+---+-----------+------+---+------+------+-----+------+------+
3807 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd |
3808 * +---+---+---+-----------+------+---+------+------+-----+------+------+
3809 */
3810static void disas_fp_csel(DisasContext *s, uint32_t insn)
3811{
5640ff62
CF
3812 unsigned int mos, type, rm, cond, rn, rd;
3813 int label_continue = -1;
3814
3815 mos = extract32(insn, 29, 3);
3816 type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
3817 rm = extract32(insn, 16, 5);
3818 cond = extract32(insn, 12, 4);
3819 rn = extract32(insn, 5, 5);
3820 rd = extract32(insn, 0, 5);
3821
3822 if (mos || type > 1) {
3823 unallocated_encoding(s);
3824 return;
3825 }
3826
3827 if (cond < 0x0e) { /* not always */
3828 int label_match = gen_new_label();
3829 label_continue = gen_new_label();
3830 arm_gen_test_cc(cond, label_match);
3831 /* nomatch: */
3832 gen_mov_fp2fp(s, type, rd, rm);
3833 tcg_gen_br(label_continue);
3834 gen_set_label(label_match);
3835 }
3836
3837 gen_mov_fp2fp(s, type, rd, rn);
3838
3839 if (cond < 0x0e) { /* continue */
3840 gen_set_label(label_continue);
3841 }
faa0ba46
PM
3842}
3843
d9b0848d
PM
3844/* C3.6.25 Floating-point data-processing (1 source) - single precision */
3845static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
3846{
3847 TCGv_ptr fpst;
3848 TCGv_i32 tcg_op;
3849 TCGv_i32 tcg_res;
3850
3851 fpst = get_fpstatus_ptr();
3852 tcg_op = read_fp_sreg(s, rn);
3853 tcg_res = tcg_temp_new_i32();
3854
3855 switch (opcode) {
3856 case 0x0: /* FMOV */
3857 tcg_gen_mov_i32(tcg_res, tcg_op);
3858 break;
3859 case 0x1: /* FABS */
3860 gen_helper_vfp_abss(tcg_res, tcg_op);
3861 break;
3862 case 0x2: /* FNEG */
3863 gen_helper_vfp_negs(tcg_res, tcg_op);
3864 break;
3865 case 0x3: /* FSQRT */
3866 gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
3867 break;
3868 case 0x8: /* FRINTN */
3869 case 0x9: /* FRINTP */
3870 case 0xa: /* FRINTM */
3871 case 0xb: /* FRINTZ */
3872 case 0xc: /* FRINTA */
3873 {
3874 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
3875
3876 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3877 gen_helper_rints(tcg_res, tcg_op, fpst);
3878
3879 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3880 tcg_temp_free_i32(tcg_rmode);
3881 break;
3882 }
3883 case 0xe: /* FRINTX */
3884 gen_helper_rints_exact(tcg_res, tcg_op, fpst);
3885 break;
3886 case 0xf: /* FRINTI */
3887 gen_helper_rints(tcg_res, tcg_op, fpst);
3888 break;
3889 default:
3890 abort();
3891 }
3892
3893 write_fp_sreg(s, rd, tcg_res);
3894
3895 tcg_temp_free_ptr(fpst);
3896 tcg_temp_free_i32(tcg_op);
3897 tcg_temp_free_i32(tcg_res);
3898}
3899
3900/* C3.6.25 Floating-point data-processing (1 source) - double precision */
3901static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
3902{
3903 TCGv_ptr fpst;
3904 TCGv_i64 tcg_op;
3905 TCGv_i64 tcg_res;
3906
3907 fpst = get_fpstatus_ptr();
3908 tcg_op = read_fp_dreg(s, rn);
3909 tcg_res = tcg_temp_new_i64();
3910
3911 switch (opcode) {
3912 case 0x0: /* FMOV */
3913 tcg_gen_mov_i64(tcg_res, tcg_op);
3914 break;
3915 case 0x1: /* FABS */
3916 gen_helper_vfp_absd(tcg_res, tcg_op);
3917 break;
3918 case 0x2: /* FNEG */
3919 gen_helper_vfp_negd(tcg_res, tcg_op);
3920 break;
3921 case 0x3: /* FSQRT */
3922 gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
3923 break;
3924 case 0x8: /* FRINTN */
3925 case 0x9: /* FRINTP */
3926 case 0xa: /* FRINTM */
3927 case 0xb: /* FRINTZ */
3928 case 0xc: /* FRINTA */
3929 {
3930 TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
3931
3932 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3933 gen_helper_rintd(tcg_res, tcg_op, fpst);
3934
3935 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
3936 tcg_temp_free_i32(tcg_rmode);
3937 break;
3938 }
3939 case 0xe: /* FRINTX */
3940 gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
3941 break;
3942 case 0xf: /* FRINTI */
3943 gen_helper_rintd(tcg_res, tcg_op, fpst);
3944 break;
3945 default:
3946 abort();
3947 }
3948
3949 write_fp_dreg(s, rd, tcg_res);
3950
3951 tcg_temp_free_ptr(fpst);
3952 tcg_temp_free_i64(tcg_op);
3953 tcg_temp_free_i64(tcg_res);
3954}
3955
8900aad2
PM
3956static void handle_fp_fcvt(DisasContext *s, int opcode,
3957 int rd, int rn, int dtype, int ntype)
3958{
3959 switch (ntype) {
3960 case 0x0:
3961 {
3962 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
3963 if (dtype == 1) {
3964 /* Single to double */
3965 TCGv_i64 tcg_rd = tcg_temp_new_i64();
3966 gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
3967 write_fp_dreg(s, rd, tcg_rd);
3968 tcg_temp_free_i64(tcg_rd);
3969 } else {
3970 /* Single to half */
3971 TCGv_i32 tcg_rd = tcg_temp_new_i32();
3972 gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
3973 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
3974 write_fp_sreg(s, rd, tcg_rd);
3975 tcg_temp_free_i32(tcg_rd);
3976 }
3977 tcg_temp_free_i32(tcg_rn);
3978 break;
3979 }
3980 case 0x1:
3981 {
3982 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
3983 TCGv_i32 tcg_rd = tcg_temp_new_i32();
3984 if (dtype == 0) {
3985 /* Double to single */
3986 gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
3987 } else {
3988 /* Double to half */
3989 gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
3990 /* write_fp_sreg is OK here because top half of tcg_rd is zero */
3991 }
3992 write_fp_sreg(s, rd, tcg_rd);
3993 tcg_temp_free_i32(tcg_rd);
3994 tcg_temp_free_i64(tcg_rn);
3995 break;
3996 }
3997 case 0x3:
3998 {
3999 TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4000 tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4001 if (dtype == 0) {
4002 /* Half to single */
4003 TCGv_i32 tcg_rd = tcg_temp_new_i32();
4004 gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4005 write_fp_sreg(s, rd, tcg_rd);
4006 tcg_temp_free_i32(tcg_rd);
4007 } else {
4008 /* Half to double */
4009 TCGv_i64 tcg_rd = tcg_temp_new_i64();
4010 gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4011 write_fp_dreg(s, rd, tcg_rd);
4012 tcg_temp_free_i64(tcg_rd);
4013 }
4014 tcg_temp_free_i32(tcg_rn);
4015 break;
4016 }
4017 default:
4018 abort();
4019 }
4020}
4021
faa0ba46
PM
4022/* C3.6.25 Floating point data-processing (1 source)
4023 * 31 30 29 28 24 23 22 21 20 15 14 10 9 5 4 0
4024 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4025 * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 | Rn | Rd |
4026 * +---+---+---+-----------+------+---+--------+-----------+------+------+
4027 */
4028static void disas_fp_1src(DisasContext *s, uint32_t insn)
4029{
d9b0848d
PM
4030 int type = extract32(insn, 22, 2);
4031 int opcode = extract32(insn, 15, 6);
4032 int rn = extract32(insn, 5, 5);
4033 int rd = extract32(insn, 0, 5);
4034
4035 switch (opcode) {
4036 case 0x4: case 0x5: case 0x7:
8900aad2 4037 {
d9b0848d 4038 /* FCVT between half, single and double precision */
8900aad2
PM
4039 int dtype = extract32(opcode, 0, 2);
4040 if (type == 2 || dtype == type) {
4041 unallocated_encoding(s);
4042 return;
4043 }
4044 handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
d9b0848d 4045 break;
8900aad2 4046 }
d9b0848d
PM
4047 case 0x0 ... 0x3:
4048 case 0x8 ... 0xc:
4049 case 0xe ... 0xf:
4050 /* 32-to-32 and 64-to-64 ops */
4051 switch (type) {
4052 case 0:
4053 handle_fp_1src_single(s, opcode, rd, rn);
4054 break;
4055 case 1:
4056 handle_fp_1src_double(s, opcode, rd, rn);
4057 break;
4058 default:
4059 unallocated_encoding(s);
4060 }
4061 break;
4062 default:
4063 unallocated_encoding(s);
4064 break;
4065 }
faa0ba46
PM
4066}
4067
ec73d2e0
AG
4068/* C3.6.26 Floating-point data-processing (2 source) - single precision */
4069static void handle_fp_2src_single(DisasContext *s, int opcode,
4070 int rd, int rn, int rm)
4071{
4072 TCGv_i32 tcg_op1;
4073 TCGv_i32 tcg_op2;
4074 TCGv_i32 tcg_res;
4075 TCGv_ptr fpst;
4076
4077 tcg_res = tcg_temp_new_i32();
4078 fpst = get_fpstatus_ptr();
4079 tcg_op1 = read_fp_sreg(s, rn);
4080 tcg_op2 = read_fp_sreg(s, rm);
4081
4082 switch (opcode) {
4083 case 0x0: /* FMUL */
4084 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4085 break;
4086 case 0x1: /* FDIV */
4087 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4088 break;
4089 case 0x2: /* FADD */
4090 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4091 break;
4092 case 0x3: /* FSUB */
4093 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4094 break;
4095 case 0x4: /* FMAX */
4096 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4097 break;
4098 case 0x5: /* FMIN */
4099 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4100 break;
4101 case 0x6: /* FMAXNM */
4102 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4103 break;
4104 case 0x7: /* FMINNM */
4105 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4106 break;
4107 case 0x8: /* FNMUL */
4108 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4109 gen_helper_vfp_negs(tcg_res, tcg_res);
4110 break;
4111 }
4112
4113 write_fp_sreg(s, rd, tcg_res);
4114
4115 tcg_temp_free_ptr(fpst);
4116 tcg_temp_free_i32(tcg_op1);
4117 tcg_temp_free_i32(tcg_op2);
4118 tcg_temp_free_i32(tcg_res);
4119}
4120
4121/* C3.6.26 Floating-point data-processing (2 source) - double precision */
4122static void handle_fp_2src_double(DisasContext *s, int opcode,
4123 int rd, int rn, int rm)
4124{
4125 TCGv_i64 tcg_op1;
4126 TCGv_i64 tcg_op2;
4127 TCGv_i64 tcg_res;
4128 TCGv_ptr fpst;
4129
4130 tcg_res = tcg_temp_new_i64();
4131 fpst = get_fpstatus_ptr();
4132 tcg_op1 = read_fp_dreg(s, rn);
4133 tcg_op2 = read_fp_dreg(s, rm);
4134
4135 switch (opcode) {
4136 case 0x0: /* FMUL */
4137 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4138 break;
4139 case 0x1: /* FDIV */
4140 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4141 break;
4142 case 0x2: /* FADD */
4143 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4144 break;
4145 case 0x3: /* FSUB */
4146 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4147 break;
4148 case 0x4: /* FMAX */
4149 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4150 break;
4151 case 0x5: /* FMIN */
4152 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4153 break;
4154 case 0x6: /* FMAXNM */
4155 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4156 break;
4157 case 0x7: /* FMINNM */
4158 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4159 break;
4160 case 0x8: /* FNMUL */
4161 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4162 gen_helper_vfp_negd(tcg_res, tcg_res);
4163 break;
4164 }
4165
4166 write_fp_dreg(s, rd, tcg_res);
4167
4168 tcg_temp_free_ptr(fpst);
4169 tcg_temp_free_i64(tcg_op1);
4170 tcg_temp_free_i64(tcg_op2);
4171 tcg_temp_free_i64(tcg_res);
4172}
4173
faa0ba46
PM
4174/* C3.6.26 Floating point data-processing (2 source)
4175 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
4176 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4177 * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd |
4178 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4179 */
4180static void disas_fp_2src(DisasContext *s, uint32_t insn)
4181{
ec73d2e0
AG
4182 int type = extract32(insn, 22, 2);
4183 int rd = extract32(insn, 0, 5);
4184 int rn = extract32(insn, 5, 5);
4185 int rm = extract32(insn, 16, 5);
4186 int opcode = extract32(insn, 12, 4);
4187
4188 if (opcode > 8) {
4189 unallocated_encoding(s);
4190 return;
4191 }
4192
4193 switch (type) {
4194 case 0:
4195 handle_fp_2src_single(s, opcode, rd, rn, rm);
4196 break;
4197 case 1:
4198 handle_fp_2src_double(s, opcode, rd, rn, rm);
4199 break;
4200 default:
4201 unallocated_encoding(s);
4202 }
faa0ba46
PM
4203}
4204
6a30667f
AG
4205/* C3.6.27 Floating-point data-processing (3 source) - single precision */
4206static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4207 int rd, int rn, int rm, int ra)
4208{
4209 TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4210 TCGv_i32 tcg_res = tcg_temp_new_i32();
4211 TCGv_ptr fpst = get_fpstatus_ptr();
4212
4213 tcg_op1 = read_fp_sreg(s, rn);
4214 tcg_op2 = read_fp_sreg(s, rm);
4215 tcg_op3 = read_fp_sreg(s, ra);
4216
4217 /* These are fused multiply-add, and must be done as one
4218 * floating point operation with no rounding between the
4219 * multiplication and addition steps.
4220 * NB that doing the negations here as separate steps is
4221 * correct : an input NaN should come out with its sign bit
4222 * flipped if it is a negated-input.
4223 */
4224 if (o1 == true) {
4225 gen_helper_vfp_negs(tcg_op3, tcg_op3);
4226 }
4227
4228 if (o0 != o1) {
4229 gen_helper_vfp_negs(tcg_op1, tcg_op1);
4230 }
4231
4232 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4233
4234 write_fp_sreg(s, rd, tcg_res);
4235
4236 tcg_temp_free_ptr(fpst);
4237 tcg_temp_free_i32(tcg_op1);
4238 tcg_temp_free_i32(tcg_op2);
4239 tcg_temp_free_i32(tcg_op3);
4240 tcg_temp_free_i32(tcg_res);
4241}
4242
4243/* C3.6.27 Floating-point data-processing (3 source) - double precision */
4244static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4245 int rd, int rn, int rm, int ra)
4246{
4247 TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4248 TCGv_i64 tcg_res = tcg_temp_new_i64();
4249 TCGv_ptr fpst = get_fpstatus_ptr();
4250
4251 tcg_op1 = read_fp_dreg(s, rn);
4252 tcg_op2 = read_fp_dreg(s, rm);
4253 tcg_op3 = read_fp_dreg(s, ra);
4254
4255 /* These are fused multiply-add, and must be done as one
4256 * floating point operation with no rounding between the
4257 * multiplication and addition steps.
4258 * NB that doing the negations here as separate steps is
4259 * correct : an input NaN should come out with its sign bit
4260 * flipped if it is a negated-input.
4261 */
4262 if (o1 == true) {
4263 gen_helper_vfp_negd(tcg_op3, tcg_op3);
4264 }
4265
4266 if (o0 != o1) {
4267 gen_helper_vfp_negd(tcg_op1, tcg_op1);
4268 }
4269
4270 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4271
4272 write_fp_dreg(s, rd, tcg_res);
4273
4274 tcg_temp_free_ptr(fpst);
4275 tcg_temp_free_i64(tcg_op1);
4276 tcg_temp_free_i64(tcg_op2);
4277 tcg_temp_free_i64(tcg_op3);
4278 tcg_temp_free_i64(tcg_res);
4279}
4280
faa0ba46
PM
4281/* C3.6.27 Floating point data-processing (3 source)
4282 * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0
4283 * +---+---+---+-----------+------+----+------+----+------+------+------+
4284 * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd |
4285 * +---+---+---+-----------+------+----+------+----+------+------+------+
4286 */
4287static void disas_fp_3src(DisasContext *s, uint32_t insn)
4288{
6a30667f
AG
4289 int type = extract32(insn, 22, 2);
4290 int rd = extract32(insn, 0, 5);
4291 int rn = extract32(insn, 5, 5);
4292 int ra = extract32(insn, 10, 5);
4293 int rm = extract32(insn, 16, 5);
4294 bool o0 = extract32(insn, 15, 1);
4295 bool o1 = extract32(insn, 21, 1);
4296
4297 switch (type) {
4298 case 0:
4299 handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4300 break;
4301 case 1:
4302 handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4303 break;
4304 default:
4305 unallocated_encoding(s);
4306 }
faa0ba46
PM
4307}
4308
4309/* C3.6.28 Floating point immediate
4310 * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0
4311 * +---+---+---+-----------+------+---+------------+-------+------+------+
4312 * | M | 0 | S | 1 1 1 1 0 | type | 1 | imm8 | 1 0 0 | imm5 | Rd |
4313 * +---+---+---+-----------+------+---+------------+-------+------+------+
4314 */
4315static void disas_fp_imm(DisasContext *s, uint32_t insn)
4316{
6163f868
AG
4317 int rd = extract32(insn, 0, 5);
4318 int imm8 = extract32(insn, 13, 8);
4319 int is_double = extract32(insn, 22, 2);
4320 uint64_t imm;
4321 TCGv_i64 tcg_res;
4322
4323 if (is_double > 1) {
4324 unallocated_encoding(s);
4325 return;
4326 }
4327
4328 /* The imm8 encodes the sign bit, enough bits to represent
4329 * an exponent in the range 01....1xx to 10....0xx,
4330 * and the most significant 4 bits of the mantissa; see
4331 * VFPExpandImm() in the v8 ARM ARM.
4332 */
4333 if (is_double) {
4334 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4335 (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4336 extract32(imm8, 0, 6);
4337 imm <<= 48;
4338 } else {
4339 imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4340 (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4341 (extract32(imm8, 0, 6) << 3);
4342 imm <<= 16;
4343 }
4344
4345 tcg_res = tcg_const_i64(imm);
4346 write_fp_dreg(s, rd, tcg_res);
4347 tcg_temp_free_i64(tcg_res);
faa0ba46
PM
4348}
4349
52a1f6a3
AG
4350/* Handle floating point <=> fixed point conversions. Note that we can
4351 * also deal with fp <=> integer conversions as a special case (scale == 64)
4352 * OPTME: consider handling that special case specially or at least skipping
4353 * the call to scalbn in the helpers for zero shifts.
4354 */
4355static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4356 bool itof, int rmode, int scale, int sf, int type)
4357{
4358 bool is_signed = !(opcode & 1);
4359 bool is_double = type;
4360 TCGv_ptr tcg_fpstatus;
4361 TCGv_i32 tcg_shift;
4362
4363 tcg_fpstatus = get_fpstatus_ptr();
4364
4365 tcg_shift = tcg_const_i32(64 - scale);
4366
4367 if (itof) {
4368 TCGv_i64 tcg_int = cpu_reg(s, rn);
4369 if (!sf) {
4370 TCGv_i64 tcg_extend = new_tmp_a64(s);
4371
4372 if (is_signed) {
4373 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4374 } else {
4375 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4376 }
4377
4378 tcg_int = tcg_extend;
4379 }
4380
4381 if (is_double) {
4382 TCGv_i64 tcg_double = tcg_temp_new_i64();
4383 if (is_signed) {
4384 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4385 tcg_shift, tcg_fpstatus);
4386 } else {
4387 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4388 tcg_shift, tcg_fpstatus);
4389 }
4390 write_fp_dreg(s, rd, tcg_double);
4391 tcg_temp_free_i64(tcg_double);
4392 } else {
4393 TCGv_i32 tcg_single = tcg_temp_new_i32();
4394 if (is_signed) {
4395 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4396 tcg_shift, tcg_fpstatus);
4397 } else {
4398 gen_helper_vfp_uqtos(tcg_single, tcg_int,
4399 tcg_shift, tcg_fpstatus);
4400 }
4401 write_fp_sreg(s, rd, tcg_single);
4402 tcg_temp_free_i32(tcg_single);
4403 }
4404 } else {
4405 TCGv_i64 tcg_int = cpu_reg(s, rd);
4406 TCGv_i32 tcg_rmode;
4407
4408 if (extract32(opcode, 2, 1)) {
4409 /* There are too many rounding modes to all fit into rmode,
4410 * so FCVTA[US] is a special case.
4411 */
4412 rmode = FPROUNDING_TIEAWAY;
4413 }
4414
4415 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4416
4417 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4418
4419 if (is_double) {
4420 TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4421 if (is_signed) {
4422 if (!sf) {
4423 gen_helper_vfp_tosld(tcg_int, tcg_double,
4424 tcg_shift, tcg_fpstatus);
4425 } else {
4426 gen_helper_vfp_tosqd(tcg_int, tcg_double,
4427 tcg_shift, tcg_fpstatus);
4428 }
4429 } else {
4430 if (!sf) {
4431 gen_helper_vfp_tould(tcg_int, tcg_double,
4432 tcg_shift, tcg_fpstatus);
4433 } else {
4434 gen_helper_vfp_touqd(tcg_int, tcg_double,
4435 tcg_shift, tcg_fpstatus);
4436 }
4437 }
4438 tcg_temp_free_i64(tcg_double);
4439 } else {
4440 TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4441 if (sf) {
4442 if (is_signed) {
4443 gen_helper_vfp_tosqs(tcg_int, tcg_single,
4444 tcg_shift, tcg_fpstatus);
4445 } else {
4446 gen_helper_vfp_touqs(tcg_int, tcg_single,
4447 tcg_shift, tcg_fpstatus);
4448 }
4449 } else {
4450 TCGv_i32 tcg_dest = tcg_temp_new_i32();
4451 if (is_signed) {
4452 gen_helper_vfp_tosls(tcg_dest, tcg_single,
4453 tcg_shift, tcg_fpstatus);
4454 } else {
4455 gen_helper_vfp_touls(tcg_dest, tcg_single,
4456 tcg_shift, tcg_fpstatus);
4457 }
4458 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4459 tcg_temp_free_i32(tcg_dest);
4460 }
4461 tcg_temp_free_i32(tcg_single);
4462 }
4463
4464 gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4465 tcg_temp_free_i32(tcg_rmode);
4466
4467 if (!sf) {
4468 tcg_gen_ext32u_i64(tcg_int, tcg_int);
4469 }
4470 }
4471
4472 tcg_temp_free_ptr(tcg_fpstatus);
4473 tcg_temp_free_i32(tcg_shift);
4474}
4475
faa0ba46
PM
4476/* C3.6.29 Floating point <-> fixed point conversions
4477 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
4478 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4479 * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale | Rn | Rd |
4480 * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4481 */
4482static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4483{
52a1f6a3
AG
4484 int rd = extract32(insn, 0, 5);
4485 int rn = extract32(insn, 5, 5);
4486 int scale = extract32(insn, 10, 6);
4487 int opcode = extract32(insn, 16, 3);
4488 int rmode = extract32(insn, 19, 2);
4489 int type = extract32(insn, 22, 2);
4490 bool sbit = extract32(insn, 29, 1);
4491 bool sf = extract32(insn, 31, 1);
4492 bool itof;
4493
4494 if (sbit || (type > 1)
4495 || (!sf && scale < 32)) {
4496 unallocated_encoding(s);
4497 return;
4498 }
4499
4500 switch ((rmode << 3) | opcode) {
4501 case 0x2: /* SCVTF */
4502 case 0x3: /* UCVTF */
4503 itof = true;
4504 break;
4505 case 0x18: /* FCVTZS */
4506 case 0x19: /* FCVTZU */
4507 itof = false;
4508 break;
4509 default:
4510 unallocated_encoding(s);
4511 return;
4512 }
4513
4514 handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
faa0ba46
PM
4515}
4516
ce5458e8
PM
4517static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
4518{
4519 /* FMOV: gpr to or from float, double, or top half of quad fp reg,
4520 * without conversion.
4521 */
4522
4523 if (itof) {
ce5458e8
PM
4524 TCGv_i64 tcg_rn = cpu_reg(s, rn);
4525
4526 switch (type) {
4527 case 0:
4528 {
4529 /* 32 bit */
4530 TCGv_i64 tmp = tcg_temp_new_i64();
4531 tcg_gen_ext32u_i64(tmp, tcg_rn);
e2f90565 4532 tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(rd, MO_64));
ce5458e8 4533 tcg_gen_movi_i64(tmp, 0);
e2f90565 4534 tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
ce5458e8
PM
4535 tcg_temp_free_i64(tmp);
4536 break;
4537 }
4538 case 1:
4539 {
4540 /* 64 bit */
4541 TCGv_i64 tmp = tcg_const_i64(0);
e2f90565
PM
4542 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(rd, MO_64));
4543 tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(rd));
ce5458e8
PM
4544 tcg_temp_free_i64(tmp);
4545 break;
4546 }
4547 case 2:
4548 /* 64 bit to top half. */
e2f90565 4549 tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(rd));
ce5458e8
PM
4550 break;
4551 }
4552 } else {
ce5458e8
PM
4553 TCGv_i64 tcg_rd = cpu_reg(s, rd);
4554
4555 switch (type) {
4556 case 0:
4557 /* 32 bit */
e2f90565 4558 tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_32));
ce5458e8 4559 break;
ce5458e8
PM
4560 case 1:
4561 /* 64 bit */
e2f90565
PM
4562 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(rn, MO_64));
4563 break;
4564 case 2:
4565 /* 64 bits from top half */
4566 tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(rn));
ce5458e8
PM
4567 break;
4568 }
4569 }
4570}
4571
faa0ba46
PM
4572/* C3.6.30 Floating point <-> integer conversions
4573 * 31 30 29 28 24 23 22 21 20 19 18 16 15 10 9 5 4 0
4574 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
c436d406 4575 * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
faa0ba46
PM
4576 * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
4577 */
4578static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
4579{
ce5458e8
PM
4580 int rd = extract32(insn, 0, 5);
4581 int rn = extract32(insn, 5, 5);
4582 int opcode = extract32(insn, 16, 3);
4583 int rmode = extract32(insn, 19, 2);
4584 int type = extract32(insn, 22, 2);
4585 bool sbit = extract32(insn, 29, 1);
4586 bool sf = extract32(insn, 31, 1);
4587
c436d406
WN
4588 if (sbit) {
4589 unallocated_encoding(s);
4590 return;
4591 }
4592
4593 if (opcode > 5) {
ce5458e8
PM
4594 /* FMOV */
4595 bool itof = opcode & 1;
4596
c436d406
WN
4597 if (rmode >= 2) {
4598 unallocated_encoding(s);
4599 return;
4600 }
4601
ce5458e8
PM
4602 switch (sf << 3 | type << 1 | rmode) {
4603 case 0x0: /* 32 bit */
4604 case 0xa: /* 64 bit */
4605 case 0xd: /* 64 bit to top half of quad */
4606 break;
4607 default:
4608 /* all other sf/type/rmode combinations are invalid */
4609 unallocated_encoding(s);
4610 break;
4611 }
4612
4613 handle_fmov(s, rd, rn, type, itof);
4614 } else {
4615 /* actual FP conversions */
c436d406
WN
4616 bool itof = extract32(opcode, 1, 1);
4617
4618 if (type > 1 || (rmode != 0 && opcode > 1)) {
4619 unallocated_encoding(s);
4620 return;
4621 }
4622
4623 handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
ce5458e8 4624 }
faa0ba46
PM
4625}
4626
4627/* FP-specific subcases of table C3-6 (SIMD and FP data processing)
4628 * 31 30 29 28 25 24 0
4629 * +---+---+---+---------+-----------------------------+
4630 * | | 0 | | 1 1 1 1 | |
4631 * +---+---+---+---------+-----------------------------+
4632 */
4633static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
4634{
4635 if (extract32(insn, 24, 1)) {
4636 /* Floating point data-processing (3 source) */
4637 disas_fp_3src(s, insn);
4638 } else if (extract32(insn, 21, 1) == 0) {
4639 /* Floating point to fixed point conversions */
4640 disas_fp_fixed_conv(s, insn);
4641 } else {
4642 switch (extract32(insn, 10, 2)) {
4643 case 1:
4644 /* Floating point conditional compare */
4645 disas_fp_ccomp(s, insn);
4646 break;
4647 case 2:
4648 /* Floating point data-processing (2 source) */
4649 disas_fp_2src(s, insn);
4650 break;
4651 case 3:
4652 /* Floating point conditional select */
4653 disas_fp_csel(s, insn);
4654 break;
4655 case 0:
4656 switch (ctz32(extract32(insn, 12, 4))) {
4657 case 0: /* [15:12] == xxx1 */
4658 /* Floating point immediate */
4659 disas_fp_imm(s, insn);
4660 break;
4661 case 1: /* [15:12] == xx10 */
4662 /* Floating point compare */
4663 disas_fp_compare(s, insn);
4664 break;
4665 case 2: /* [15:12] == x100 */
4666 /* Floating point data-processing (1 source) */
4667 disas_fp_1src(s, insn);
4668 break;
4669 case 3: /* [15:12] == 1000 */
4670 unallocated_encoding(s);
4671 break;
4672 default: /* [15:12] == 0000 */
4673 /* Floating point <-> integer conversions */
4674 disas_fp_int_conv(s, insn);
4675 break;
4676 }
4677 break;
4678 }
4679 }
4680}
4681
5c73747f
PM
4682static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
4683 int pos)
4684{
4685 /* Extract 64 bits from the middle of two concatenated 64 bit
4686 * vector register slices left:right. The extracted bits start
4687 * at 'pos' bits into the right (least significant) side.
4688 * We return the result in tcg_right, and guarantee not to
4689 * trash tcg_left.
4690 */
4691 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4692 assert(pos > 0 && pos < 64);
4693
4694 tcg_gen_shri_i64(tcg_right, tcg_right, pos);
4695 tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
4696 tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
4697
4698 tcg_temp_free_i64(tcg_tmp);
4699}
4700
384b26fb
AB
4701/* C3.6.1 EXT
4702 * 31 30 29 24 23 22 21 20 16 15 14 11 10 9 5 4 0
4703 * +---+---+-------------+-----+---+------+---+------+---+------+------+
4704 * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 | Rm | 0 | imm4 | 0 | Rn | Rd |
4705 * +---+---+-------------+-----+---+------+---+------+---+------+------+
4706 */
4707static void disas_simd_ext(DisasContext *s, uint32_t insn)
4708{
5c73747f
PM
4709 int is_q = extract32(insn, 30, 1);
4710 int op2 = extract32(insn, 22, 2);
4711 int imm4 = extract32(insn, 11, 4);
4712 int rm = extract32(insn, 16, 5);
4713 int rn = extract32(insn, 5, 5);
4714 int rd = extract32(insn, 0, 5);
4715 int pos = imm4 << 3;
4716 TCGv_i64 tcg_resl, tcg_resh;
4717
4718 if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
4719 unallocated_encoding(s);
4720 return;
4721 }
4722
4723 tcg_resh = tcg_temp_new_i64();
4724 tcg_resl = tcg_temp_new_i64();
4725
4726 /* Vd gets bits starting at pos bits into Vm:Vn. This is
4727 * either extracting 128 bits from a 128:128 concatenation, or
4728 * extracting 64 bits from a 64:64 concatenation.
4729 */
4730 if (!is_q) {
4731 read_vec_element(s, tcg_resl, rn, 0, MO_64);
4732 if (pos != 0) {
4733 read_vec_element(s, tcg_resh, rm, 0, MO_64);
4734 do_ext64(s, tcg_resh, tcg_resl, pos);
4735 }
4736 tcg_gen_movi_i64(tcg_resh, 0);
4737 } else {
4738 TCGv_i64 tcg_hh;
4739 typedef struct {
4740 int reg;
4741 int elt;
4742 } EltPosns;
4743 EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
4744 EltPosns *elt = eltposns;
4745
4746 if (pos >= 64) {
4747 elt++;
4748 pos -= 64;
4749 }
4750
4751 read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
4752 elt++;
4753 read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
4754 elt++;
4755 if (pos != 0) {
4756 do_ext64(s, tcg_resh, tcg_resl, pos);
4757 tcg_hh = tcg_temp_new_i64();
4758 read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
4759 do_ext64(s, tcg_hh, tcg_resh, pos);
4760 tcg_temp_free_i64(tcg_hh);
4761 }
4762 }
4763
4764 write_vec_element(s, tcg_resl, rd, 0, MO_64);
4765 tcg_temp_free_i64(tcg_resl);
4766 write_vec_element(s, tcg_resh, rd, 1, MO_64);
4767 tcg_temp_free_i64(tcg_resh);
384b26fb
AB
4768}
4769
4770/* C3.6.2 TBL/TBX
4771 * 31 30 29 24 23 22 21 20 16 15 14 13 12 11 10 9 5 4 0
4772 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
4773 * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 | Rm | 0 | len | op | 0 0 | Rn | Rd |
4774 * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
4775 */
4776static void disas_simd_tb(DisasContext *s, uint32_t insn)
4777{
7c51048f
MM
4778 int op2 = extract32(insn, 22, 2);
4779 int is_q = extract32(insn, 30, 1);
4780 int rm = extract32(insn, 16, 5);
4781 int rn = extract32(insn, 5, 5);
4782 int rd = extract32(insn, 0, 5);
4783 int is_tblx = extract32(insn, 12, 1);
4784 int len = extract32(insn, 13, 2);
4785 TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
4786 TCGv_i32 tcg_regno, tcg_numregs;
4787
4788 if (op2 != 0) {
4789 unallocated_encoding(s);
4790 return;
4791 }
4792
4793 /* This does a table lookup: for every byte element in the input
4794 * we index into a table formed from up to four vector registers,
4795 * and then the output is the result of the lookups. Our helper
4796 * function does the lookup operation for a single 64 bit part of
4797 * the input.
4798 */
4799 tcg_resl = tcg_temp_new_i64();
4800 tcg_resh = tcg_temp_new_i64();
4801
4802 if (is_tblx) {
4803 read_vec_element(s, tcg_resl, rd, 0, MO_64);
4804 } else {
4805 tcg_gen_movi_i64(tcg_resl, 0);
4806 }
4807 if (is_tblx && is_q) {
4808 read_vec_element(s, tcg_resh, rd, 1, MO_64);
4809 } else {
4810 tcg_gen_movi_i64(tcg_resh, 0);
4811 }
4812
4813 tcg_idx = tcg_temp_new_i64();
4814 tcg_regno = tcg_const_i32(rn);
4815 tcg_numregs = tcg_const_i32(len + 1);
4816 read_vec_element(s, tcg_idx, rm, 0, MO_64);
4817 gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
4818 tcg_regno, tcg_numregs);
4819 if (is_q) {
4820 read_vec_element(s, tcg_idx, rm, 1, MO_64);
4821 gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
4822 tcg_regno, tcg_numregs);
4823 }
4824 tcg_temp_free_i64(tcg_idx);
4825 tcg_temp_free_i32(tcg_regno);
4826 tcg_temp_free_i32(tcg_numregs);
4827
4828 write_vec_element(s, tcg_resl, rd, 0, MO_64);
4829 tcg_temp_free_i64(tcg_resl);
4830 write_vec_element(s, tcg_resh, rd, 1, MO_64);
4831 tcg_temp_free_i64(tcg_resh);
384b26fb
AB
4832}
4833
4834/* C3.6.3 ZIP/UZP/TRN
4835 * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
4836 * +---+---+-------------+------+---+------+---+------------------+------+
4837 * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
4838 * +---+---+-------------+------+---+------+---+------------------+------+
4839 */
4840static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
4841{
5fa5469c
MM
4842 int rd = extract32(insn, 0, 5);
4843 int rn = extract32(insn, 5, 5);
4844 int rm = extract32(insn, 16, 5);
4845 int size = extract32(insn, 22, 2);
4846 /* opc field bits [1:0] indicate ZIP/UZP/TRN;
4847 * bit 2 indicates 1 vs 2 variant of the insn.
4848 */
4849 int opcode = extract32(insn, 12, 2);
4850 bool part = extract32(insn, 14, 1);
4851 bool is_q = extract32(insn, 30, 1);
4852 int esize = 8 << size;
4853 int i, ofs;
4854 int datasize = is_q ? 128 : 64;
4855 int elements = datasize / esize;
4856 TCGv_i64 tcg_res, tcg_resl, tcg_resh;
4857
4858 if (opcode == 0 || (size == 3 && !is_q)) {
4859 unallocated_encoding(s);
4860 return;
4861 }
4862
4863 tcg_resl = tcg_const_i64(0);
4864 tcg_resh = tcg_const_i64(0);
4865 tcg_res = tcg_temp_new_i64();
4866
4867 for (i = 0; i < elements; i++) {
4868 switch (opcode) {
4869 case 1: /* UZP1/2 */
4870 {
4871 int midpoint = elements / 2;
4872 if (i < midpoint) {
4873 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
4874 } else {
4875 read_vec_element(s, tcg_res, rm,
4876 2 * (i - midpoint) + part, size);
4877 }
4878 break;
4879 }
4880 case 2: /* TRN1/2 */
4881 if (i & 1) {
4882 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
4883 } else {
4884 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
4885 }
4886 break;
4887 case 3: /* ZIP1/2 */
4888 {
4889 int base = part * elements / 2;
4890 if (i & 1) {
4891 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
4892 } else {
4893 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
4894 }
4895 break;
4896 }
4897 default:
4898 g_assert_not_reached();
4899 }
4900
4901 ofs = i * esize;
4902 if (ofs < 64) {
4903 tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
4904 tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
4905 } else {
4906 tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
4907 tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
4908 }
4909 }
4910
4911 tcg_temp_free_i64(tcg_res);
4912
4913 write_vec_element(s, tcg_resl, rd, 0, MO_64);
4914 tcg_temp_free_i64(tcg_resl);
4915 write_vec_element(s, tcg_resh, rd, 1, MO_64);
4916 tcg_temp_free_i64(tcg_resh);
384b26fb
AB
4917}
4918
4a0ff1ce
MM
4919static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
4920 int opc, bool is_min, TCGv_ptr fpst)
4921{
4922 /* Helper function for disas_simd_across_lanes: do a single precision
4923 * min/max operation on the specified two inputs,
4924 * and return the result in tcg_elt1.
4925 */
4926 if (opc == 0xc) {
4927 if (is_min) {
4928 gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
4929 } else {
4930 gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
4931 }
4932 } else {
4933 assert(opc == 0xf);
4934 if (is_min) {
4935 gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
4936 } else {
4937 gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
4938 }
4939 }
4940}
4941
384b26fb
AB
4942/* C3.6.4 AdvSIMD across lanes
4943 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
4944 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
4945 * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
4946 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
4947 */
4948static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
4949{
4a0ff1ce
MM
4950 int rd = extract32(insn, 0, 5);
4951 int rn = extract32(insn, 5, 5);
4952 int size = extract32(insn, 22, 2);
4953 int opcode = extract32(insn, 12, 5);
4954 bool is_q = extract32(insn, 30, 1);
4955 bool is_u = extract32(insn, 29, 1);
4956 bool is_fp = false;
4957 bool is_min = false;
4958 int esize;
4959 int elements;
4960 int i;
4961 TCGv_i64 tcg_res, tcg_elt;
4962
4963 switch (opcode) {
4964 case 0x1b: /* ADDV */
4965 if (is_u) {
4966 unallocated_encoding(s);
4967 return;
4968 }
4969 /* fall through */
4970 case 0x3: /* SADDLV, UADDLV */
4971 case 0xa: /* SMAXV, UMAXV */
4972 case 0x1a: /* SMINV, UMINV */
4973 if (size == 3 || (size == 2 && !is_q)) {
4974 unallocated_encoding(s);
4975 return;
4976 }
4977 break;
4978 case 0xc: /* FMAXNMV, FMINNMV */
4979 case 0xf: /* FMAXV, FMINV */
4980 if (!is_u || !is_q || extract32(size, 0, 1)) {
4981 unallocated_encoding(s);
4982 return;
4983 }
4984 /* Bit 1 of size field encodes min vs max, and actual size is always
4985 * 32 bits: adjust the size variable so following code can rely on it
4986 */
4987 is_min = extract32(size, 1, 1);
4988 is_fp = true;
4989 size = 2;
4990 break;
4991 default:
4992 unallocated_encoding(s);
4993 return;
4994 }
4995
4996 esize = 8 << size;
4997 elements = (is_q ? 128 : 64) / esize;
4998
4999 tcg_res = tcg_temp_new_i64();
5000 tcg_elt = tcg_temp_new_i64();
5001
5002 /* These instructions operate across all lanes of a vector
5003 * to produce a single result. We can guarantee that a 64
5004 * bit intermediate is sufficient:
5005 * + for [US]ADDLV the maximum element size is 32 bits, and
5006 * the result type is 64 bits
5007 * + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5008 * same as the element size, which is 32 bits at most
5009 * For the integer operations we can choose to work at 64
5010 * or 32 bits and truncate at the end; for simplicity
5011 * we use 64 bits always. The floating point
5012 * ops do require 32 bit intermediates, though.
5013 */
5014 if (!is_fp) {
5015 read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5016
5017 for (i = 1; i < elements; i++) {
5018 read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5019
5020 switch (opcode) {
5021 case 0x03: /* SADDLV / UADDLV */
5022 case 0x1b: /* ADDV */
5023 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5024 break;
5025 case 0x0a: /* SMAXV / UMAXV */
5026 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5027 tcg_res,
5028 tcg_res, tcg_elt, tcg_res, tcg_elt);
5029 break;
5030 case 0x1a: /* SMINV / UMINV */
5031 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5032 tcg_res,
5033 tcg_res, tcg_elt, tcg_res, tcg_elt);
5034 break;
5035 break;
5036 default:
5037 g_assert_not_reached();
5038 }
5039
5040 }
5041 } else {
5042 /* Floating point ops which work on 32 bit (single) intermediates.
5043 * Note that correct NaN propagation requires that we do these
5044 * operations in exactly the order specified by the pseudocode.
5045 */
5046 TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5047 TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5048 TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5049 TCGv_ptr fpst = get_fpstatus_ptr();
5050
5051 assert(esize == 32);
5052 assert(elements == 4);
5053
5054 read_vec_element(s, tcg_elt, rn, 0, MO_32);
5055 tcg_gen_trunc_i64_i32(tcg_elt1, tcg_elt);
5056 read_vec_element(s, tcg_elt, rn, 1, MO_32);
5057 tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
5058
5059 do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5060
5061 read_vec_element(s, tcg_elt, rn, 2, MO_32);
5062 tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt);
5063 read_vec_element(s, tcg_elt, rn, 3, MO_32);
5064 tcg_gen_trunc_i64_i32(tcg_elt3, tcg_elt);
5065
5066 do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5067
5068 do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5069
5070 tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5071 tcg_temp_free_i32(tcg_elt1);
5072 tcg_temp_free_i32(tcg_elt2);
5073 tcg_temp_free_i32(tcg_elt3);
5074 tcg_temp_free_ptr(fpst);
5075 }
5076
5077 tcg_temp_free_i64(tcg_elt);
5078
5079 /* Now truncate the result to the width required for the final output */
5080 if (opcode == 0x03) {
5081 /* SADDLV, UADDLV: result is 2*esize */
5082 size++;
5083 }
5084
5085 switch (size) {
5086 case 0:
5087 tcg_gen_ext8u_i64(tcg_res, tcg_res);
5088 break;
5089 case 1:
5090 tcg_gen_ext16u_i64(tcg_res, tcg_res);
5091 break;
5092 case 2:
5093 tcg_gen_ext32u_i64(tcg_res, tcg_res);
5094 break;
5095 case 3:
5096 break;
5097 default:
5098 g_assert_not_reached();
5099 }
5100
5101 write_fp_dreg(s, rd, tcg_res);
5102 tcg_temp_free_i64(tcg_res);
384b26fb
AB
5103}
5104
67bb9389
AB
5105/* C6.3.31 DUP (Element, Vector)
5106 *
5107 * 31 30 29 21 20 16 15 10 9 5 4 0
5108 * +---+---+-------------------+--------+-------------+------+------+
5109 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
5110 * +---+---+-------------------+--------+-------------+------+------+
5111 *
5112 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5113 */
5114static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5115 int imm5)
5116{
5117 int size = ctz32(imm5);
5118 int esize = 8 << size;
5119 int elements = (is_q ? 128 : 64) / esize;
5120 int index, i;
5121 TCGv_i64 tmp;
5122
5123 if (size > 3 || (size == 3 && !is_q)) {
5124 unallocated_encoding(s);
5125 return;
5126 }
5127
5128 index = imm5 >> (size + 1);
5129
5130 tmp = tcg_temp_new_i64();
5131 read_vec_element(s, tmp, rn, index, size);
5132
5133 for (i = 0; i < elements; i++) {
5134 write_vec_element(s, tmp, rd, i, size);
5135 }
5136
5137 if (!is_q) {
5138 clear_vec_high(s, rd);
5139 }
5140
5141 tcg_temp_free_i64(tmp);
5142}
5143
360a6f2d
PM
5144/* C6.3.31 DUP (element, scalar)
5145 * 31 21 20 16 15 10 9 5 4 0
5146 * +-----------------------+--------+-------------+------+------+
5147 * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd |
5148 * +-----------------------+--------+-------------+------+------+
5149 */
5150static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5151 int imm5)
5152{
5153 int size = ctz32(imm5);
5154 int index;
5155 TCGv_i64 tmp;
5156
5157 if (size > 3) {
5158 unallocated_encoding(s);
5159 return;
5160 }
5161
5162 index = imm5 >> (size + 1);
5163
5164 /* This instruction just extracts the specified element and
5165 * zero-extends it into the bottom of the destination register.
5166 */
5167 tmp = tcg_temp_new_i64();
5168 read_vec_element(s, tmp, rn, index, size);
5169 write_fp_dreg(s, rd, tmp);
5170 tcg_temp_free_i64(tmp);
5171}
5172
67bb9389
AB
5173/* C6.3.32 DUP (General)
5174 *
5175 * 31 30 29 21 20 16 15 10 9 5 4 0
5176 * +---+---+-------------------+--------+-------------+------+------+
5177 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd |
5178 * +---+---+-------------------+--------+-------------+------+------+
5179 *
5180 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5181 */
5182static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5183 int imm5)
5184{
5185 int size = ctz32(imm5);
5186 int esize = 8 << size;
5187 int elements = (is_q ? 128 : 64)/esize;
5188 int i = 0;
5189
5190 if (size > 3 || ((size == 3) && !is_q)) {
5191 unallocated_encoding(s);
5192 return;
5193 }
5194 for (i = 0; i < elements; i++) {
5195 write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5196 }
5197 if (!is_q) {
5198 clear_vec_high(s, rd);
5199 }
5200}
5201
5202/* C6.3.150 INS (Element)
5203 *
5204 * 31 21 20 16 15 14 11 10 9 5 4 0
5205 * +-----------------------+--------+------------+---+------+------+
5206 * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5207 * +-----------------------+--------+------------+---+------+------+
5208 *
5209 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5210 * index: encoded in imm5<4:size+1>
5211 */
5212static void handle_simd_inse(DisasContext *s, int rd, int rn,
5213 int imm4, int imm5)
5214{
5215 int size = ctz32(imm5);
5216 int src_index, dst_index;
5217 TCGv_i64 tmp;
5218
5219 if (size > 3) {
5220 unallocated_encoding(s);
5221 return;
5222 }
5223 dst_index = extract32(imm5, 1+size, 5);
5224 src_index = extract32(imm4, size, 4);
5225
5226 tmp = tcg_temp_new_i64();
5227
5228 read_vec_element(s, tmp, rn, src_index, size);
5229 write_vec_element(s, tmp, rd, dst_index, size);
5230
5231 tcg_temp_free_i64(tmp);
5232}
5233
5234
5235/* C6.3.151 INS (General)
5236 *
5237 * 31 21 20 16 15 10 9 5 4 0
5238 * +-----------------------+--------+-------------+------+------+
5239 * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd |
5240 * +-----------------------+--------+-------------+------+------+
5241 *
5242 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5243 * index: encoded in imm5<4:size+1>
5244 */
5245static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5246{
5247 int size = ctz32(imm5);
5248 int idx;
5249
5250 if (size > 3) {
5251 unallocated_encoding(s);
5252 return;
5253 }
5254
5255 idx = extract32(imm5, 1 + size, 4 - size);
5256 write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5257}
5258
5259/*
5260 * C6.3.321 UMOV (General)
5261 * C6.3.237 SMOV (General)
5262 *
5263 * 31 30 29 21 20 16 15 12 10 9 5 4 0
5264 * +---+---+-------------------+--------+-------------+------+------+
5265 * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd |
5266 * +---+---+-------------------+--------+-------------+------+------+
5267 *
5268 * U: unsigned when set
5269 * size: encoded in imm5 (see ARM ARM LowestSetBit())
5270 */
5271static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5272 int rn, int rd, int imm5)
5273{
5274 int size = ctz32(imm5);
5275 int element;
5276 TCGv_i64 tcg_rd;
5277
5278 /* Check for UnallocatedEncodings */
5279 if (is_signed) {
5280 if (size > 2 || (size == 2 && !is_q)) {
5281 unallocated_encoding(s);
5282 return;
5283 }
5284 } else {
5285 if (size > 3
5286 || (size < 3 && is_q)
5287 || (size == 3 && !is_q)) {
5288 unallocated_encoding(s);
5289 return;
5290 }
5291 }
5292 element = extract32(imm5, 1+size, 4);
5293
5294 tcg_rd = cpu_reg(s, rd);
5295 read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5296 if (is_signed && !is_q) {
5297 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5298 }
5299}
5300
384b26fb
AB
5301/* C3.6.5 AdvSIMD copy
5302 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
5303 * +---+---+----+-----------------+------+---+------+---+------+------+
5304 * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5305 * +---+---+----+-----------------+------+---+------+---+------+------+
5306 */
5307static void disas_simd_copy(DisasContext *s, uint32_t insn)
5308{
67bb9389
AB
5309 int rd = extract32(insn, 0, 5);
5310 int rn = extract32(insn, 5, 5);
5311 int imm4 = extract32(insn, 11, 4);
5312 int op = extract32(insn, 29, 1);
5313 int is_q = extract32(insn, 30, 1);
5314 int imm5 = extract32(insn, 16, 5);
5315
5316 if (op) {
5317 if (is_q) {
5318 /* INS (element) */
5319 handle_simd_inse(s, rd, rn, imm4, imm5);
5320 } else {
5321 unallocated_encoding(s);
5322 }
5323 } else {
5324 switch (imm4) {
5325 case 0:
5326 /* DUP (element - vector) */
5327 handle_simd_dupe(s, is_q, rd, rn, imm5);
5328 break;
5329 case 1:
5330 /* DUP (general) */
5331 handle_simd_dupg(s, is_q, rd, rn, imm5);
5332 break;
5333 case 3:
5334 if (is_q) {
5335 /* INS (general) */
5336 handle_simd_insg(s, rd, rn, imm5);
5337 } else {
5338 unallocated_encoding(s);
5339 }
5340 break;
5341 case 5:
5342 case 7:
5343 /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5344 handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5345 break;
5346 default:
5347 unallocated_encoding(s);
5348 break;
5349 }
5350 }
384b26fb
AB
5351}
5352
5353/* C3.6.6 AdvSIMD modified immediate
5354 * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0
5355 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5356 * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh | Rd |
5357 * +---+---+----+---------------------+-----+-------+----+---+-------+------+
f3f8c4f4
AB
5358 *
5359 * There are a number of operations that can be carried out here:
5360 * MOVI - move (shifted) imm into register
5361 * MVNI - move inverted (shifted) imm into register
5362 * ORR - bitwise OR of (shifted) imm with register
5363 * BIC - bitwise clear of (shifted) imm with register
384b26fb
AB
5364 */
5365static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5366{
f3f8c4f4
AB
5367 int rd = extract32(insn, 0, 5);
5368 int cmode = extract32(insn, 12, 4);
5369 int cmode_3_1 = extract32(cmode, 1, 3);
5370 int cmode_0 = extract32(cmode, 0, 1);
5371 int o2 = extract32(insn, 11, 1);
5372 uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5373 bool is_neg = extract32(insn, 29, 1);
5374 bool is_q = extract32(insn, 30, 1);
5375 uint64_t imm = 0;
5376 TCGv_i64 tcg_rd, tcg_imm;
5377 int i;
5378
5379 if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5380 unallocated_encoding(s);
5381 return;
5382 }
5383
5384 /* See AdvSIMDExpandImm() in ARM ARM */
5385 switch (cmode_3_1) {
5386 case 0: /* Replicate(Zeros(24):imm8, 2) */
5387 case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5388 case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5389 case 3: /* Replicate(imm8:Zeros(24), 2) */
5390 {
5391 int shift = cmode_3_1 * 8;
5392 imm = bitfield_replicate(abcdefgh << shift, 32);
5393 break;
5394 }
5395 case 4: /* Replicate(Zeros(8):imm8, 4) */
5396 case 5: /* Replicate(imm8:Zeros(8), 4) */
5397 {
5398 int shift = (cmode_3_1 & 0x1) * 8;
5399 imm = bitfield_replicate(abcdefgh << shift, 16);
5400 break;
5401 }
5402 case 6:
5403 if (cmode_0) {
5404 /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5405 imm = (abcdefgh << 16) | 0xffff;
5406 } else {
5407 /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5408 imm = (abcdefgh << 8) | 0xff;
5409 }
5410 imm = bitfield_replicate(imm, 32);
5411 break;
5412 case 7:
5413 if (!cmode_0 && !is_neg) {
5414 imm = bitfield_replicate(abcdefgh, 8);
5415 } else if (!cmode_0 && is_neg) {
5416 int i;
5417 imm = 0;
5418 for (i = 0; i < 8; i++) {
5419 if ((abcdefgh) & (1 << i)) {
5420 imm |= 0xffULL << (i * 8);
5421 }
5422 }
5423 } else if (cmode_0) {
5424 if (is_neg) {
5425 imm = (abcdefgh & 0x3f) << 48;
5426 if (abcdefgh & 0x80) {
5427 imm |= 0x8000000000000000ULL;
5428 }
5429 if (abcdefgh & 0x40) {
5430 imm |= 0x3fc0000000000000ULL;
5431 } else {
5432 imm |= 0x4000000000000000ULL;
5433 }
5434 } else {
5435 imm = (abcdefgh & 0x3f) << 19;
5436 if (abcdefgh & 0x80) {
5437 imm |= 0x80000000;
5438 }
5439 if (abcdefgh & 0x40) {
5440 imm |= 0x3e000000;
5441 } else {
5442 imm |= 0x40000000;
5443 }
5444 imm |= (imm << 32);
5445 }
5446 }
5447 break;
5448 }
5449
5450 if (cmode_3_1 != 7 && is_neg) {
5451 imm = ~imm;
5452 }
5453
5454 tcg_imm = tcg_const_i64(imm);
5455 tcg_rd = new_tmp_a64(s);
5456
5457 for (i = 0; i < 2; i++) {
5458 int foffs = i ? fp_reg_hi_offset(rd) : fp_reg_offset(rd, MO_64);
5459
5460 if (i == 1 && !is_q) {
5461 /* non-quad ops clear high half of vector */
5462 tcg_gen_movi_i64(tcg_rd, 0);
5463 } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
5464 tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
5465 if (is_neg) {
5466 /* AND (BIC) */
5467 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
5468 } else {
5469 /* ORR */
5470 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
5471 }
5472 } else {
5473 /* MOVI */
5474 tcg_gen_mov_i64(tcg_rd, tcg_imm);
5475 }
5476 tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
5477 }
5478
5479 tcg_temp_free_i64(tcg_imm);
384b26fb
AB
5480}
5481
5482/* C3.6.7 AdvSIMD scalar copy
5483 * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0
5484 * +-----+----+-----------------+------+---+------+---+------+------+
5485 * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd |
5486 * +-----+----+-----------------+------+---+------+---+------+------+
5487 */
5488static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
5489{
360a6f2d
PM
5490 int rd = extract32(insn, 0, 5);
5491 int rn = extract32(insn, 5, 5);
5492 int imm4 = extract32(insn, 11, 4);
5493 int imm5 = extract32(insn, 16, 5);
5494 int op = extract32(insn, 29, 1);
5495
5496 if (op != 0 || imm4 != 0) {
5497 unallocated_encoding(s);
5498 return;
5499 }
5500
5501 /* DUP (element, scalar) */
5502 handle_simd_dupes(s, rd, rn, imm5);
384b26fb
AB
5503}
5504
5505/* C3.6.8 AdvSIMD scalar pairwise
5506 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
5507 * +-----+---+-----------+------+-----------+--------+-----+------+------+
5508 * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd |
5509 * +-----+---+-----------+------+-----------+--------+-----+------+------+
5510 */
5511static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
5512{
3720a7ea
PM
5513 int u = extract32(insn, 29, 1);
5514 int size = extract32(insn, 22, 2);
5515 int opcode = extract32(insn, 12, 5);
5516 int rn = extract32(insn, 5, 5);
5517 int rd = extract32(insn, 0, 5);
5518 TCGv_ptr fpst;
5519
5520 /* For some ops (the FP ones), size[1] is part of the encoding.
5521 * For ADDP strictly it is not but size[1] is always 1 for valid
5522 * encodings.
5523 */
5524 opcode |= (extract32(size, 1, 1) << 5);
5525
5526 switch (opcode) {
5527 case 0x3b: /* ADDP */
5528 if (u || size != 3) {
5529 unallocated_encoding(s);
5530 return;
5531 }
5532 TCGV_UNUSED_PTR(fpst);
5533 break;
5534 case 0xc: /* FMAXNMP */
5535 case 0xd: /* FADDP */
5536 case 0xf: /* FMAXP */
5537 case 0x2c: /* FMINNMP */
5538 case 0x2f: /* FMINP */
5539 /* FP op, size[0] is 32 or 64 bit */
5540 if (!u) {
5541 unallocated_encoding(s);
5542 return;
5543 }
5544 size = extract32(size, 0, 1) ? 3 : 2;
5545 fpst = get_fpstatus_ptr();
5546 break;
5547 default:
5548 unallocated_encoding(s);
5549 return;
5550 }
5551
5552 if (size == 3) {
5553 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5554 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5555 TCGv_i64 tcg_res = tcg_temp_new_i64();
5556
5557 read_vec_element(s, tcg_op1, rn, 0, MO_64);
5558 read_vec_element(s, tcg_op2, rn, 1, MO_64);
5559
5560 switch (opcode) {
5561 case 0x3b: /* ADDP */
5562 tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
5563 break;
5564 case 0xc: /* FMAXNMP */
5565 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5566 break;
5567 case 0xd: /* FADDP */
5568 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5569 break;
5570 case 0xf: /* FMAXP */
5571 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5572 break;
5573 case 0x2c: /* FMINNMP */
5574 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5575 break;
5576 case 0x2f: /* FMINP */
5577 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5578 break;
5579 default:
5580 g_assert_not_reached();
5581 }
5582
5583 write_fp_dreg(s, rd, tcg_res);
5584
5585 tcg_temp_free_i64(tcg_op1);
5586 tcg_temp_free_i64(tcg_op2);
5587 tcg_temp_free_i64(tcg_res);
5588 } else {
5589 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5590 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5591 TCGv_i32 tcg_res = tcg_temp_new_i32();
5592
5593 read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
5594 read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
5595
5596 switch (opcode) {
5597 case 0xc: /* FMAXNMP */
5598 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
5599 break;
5600 case 0xd: /* FADDP */
5601 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
5602 break;
5603 case 0xf: /* FMAXP */
5604 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
5605 break;
5606 case 0x2c: /* FMINNMP */
5607 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
5608 break;
5609 case 0x2f: /* FMINP */
5610 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
5611 break;
5612 default:
5613 g_assert_not_reached();
5614 }
5615
5616 write_fp_sreg(s, rd, tcg_res);
5617
5618 tcg_temp_free_i32(tcg_op1);
5619 tcg_temp_free_i32(tcg_op2);
5620 tcg_temp_free_i32(tcg_res);
5621 }
5622
5623 if (!TCGV_IS_UNUSED_PTR(fpst)) {
5624 tcg_temp_free_ptr(fpst);
5625 }
384b26fb
AB
5626}
5627
4d1cef84
AB
5628/*
5629 * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
5630 *
5631 * This code is handles the common shifting code and is used by both
5632 * the vector and scalar code.
5633 */
5634static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
5635 TCGv_i64 tcg_rnd, bool accumulate,
5636 bool is_u, int size, int shift)
5637{
5638 bool extended_result = false;
5639 bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
5640 int ext_lshift = 0;
5641 TCGv_i64 tcg_src_hi;
5642
5643 if (round && size == 3) {
5644 extended_result = true;
5645 ext_lshift = 64 - shift;
5646 tcg_src_hi = tcg_temp_new_i64();
5647 } else if (shift == 64) {
5648 if (!accumulate && is_u) {
5649 /* result is zero */
5650 tcg_gen_movi_i64(tcg_res, 0);
5651 return;
5652 }
5653 }
5654
5655 /* Deal with the rounding step */
5656 if (round) {
5657 if (extended_result) {
5658 TCGv_i64 tcg_zero = tcg_const_i64(0);
5659 if (!is_u) {
5660 /* take care of sign extending tcg_res */
5661 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
5662 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
5663 tcg_src, tcg_src_hi,
5664 tcg_rnd, tcg_zero);
5665 } else {
5666 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
5667 tcg_src, tcg_zero,
5668 tcg_rnd, tcg_zero);
5669 }
5670 tcg_temp_free_i64(tcg_zero);
5671 } else {
5672 tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
5673 }
5674 }
5675
5676 /* Now do the shift right */
5677 if (round && extended_result) {
5678 /* extended case, >64 bit precision required */
5679 if (ext_lshift == 0) {
5680 /* special case, only high bits matter */
5681 tcg_gen_mov_i64(tcg_src, tcg_src_hi);
5682 } else {
5683 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
5684 tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
5685 tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
5686 }
5687 } else {
5688 if (is_u) {
5689 if (shift == 64) {
5690 /* essentially shifting in 64 zeros */
5691 tcg_gen_movi_i64(tcg_src, 0);
5692 } else {
5693 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
5694 }
5695 } else {
5696 if (shift == 64) {
5697 /* effectively extending the sign-bit */
5698 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
5699 } else {
5700 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
5701 }
5702 }
5703 }
5704
5705 if (accumulate) {
5706 tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
5707 } else {
5708 tcg_gen_mov_i64(tcg_res, tcg_src);
5709 }
5710
5711 if (extended_result) {
5712 tcg_temp_free_i64(tcg_src_hi);
5713 }
5714}
5715
5716/* Common SHL/SLI - Shift left with an optional insert */
5717static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
5718 bool insert, int shift)
5719{
5720 if (insert) { /* SLI */
5721 tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
5722 } else { /* SHL */
5723 tcg_gen_shli_i64(tcg_res, tcg_src, shift);
5724 }
5725}
5726
5727/* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
5728static void handle_scalar_simd_shri(DisasContext *s,
5729 bool is_u, int immh, int immb,
5730 int opcode, int rn, int rd)
5731{
5732 const int size = 3;
5733 int immhb = immh << 3 | immb;
5734 int shift = 2 * (8 << size) - immhb;
5735 bool accumulate = false;
5736 bool round = false;
5737 TCGv_i64 tcg_rn;
5738 TCGv_i64 tcg_rd;
5739 TCGv_i64 tcg_round;
5740
5741 if (!extract32(immh, 3, 1)) {
5742 unallocated_encoding(s);
5743 return;
5744 }
5745
5746 switch (opcode) {
5747 case 0x02: /* SSRA / USRA (accumulate) */
5748 accumulate = true;
5749 break;
5750 case 0x04: /* SRSHR / URSHR (rounding) */
5751 round = true;
5752 break;
5753 case 0x06: /* SRSRA / URSRA (accum + rounding) */
5754 accumulate = round = true;
5755 break;
5756 }
5757
5758 if (round) {
5759 uint64_t round_const = 1ULL << (shift - 1);
5760 tcg_round = tcg_const_i64(round_const);
5761 } else {
5762 TCGV_UNUSED_I64(tcg_round);
5763 }
5764
5765 tcg_rn = read_fp_dreg(s, rn);
5766 tcg_rd = accumulate ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
5767
5768 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
5769 accumulate, is_u, size, shift);
5770
5771 write_fp_dreg(s, rd, tcg_rd);
5772
5773 tcg_temp_free_i64(tcg_rn);
5774 tcg_temp_free_i64(tcg_rd);
5775 if (round) {
5776 tcg_temp_free_i64(tcg_round);
5777 }
5778}
5779
5780/* SHL/SLI - Scalar shift left */
5781static void handle_scalar_simd_shli(DisasContext *s, bool insert,
5782 int immh, int immb, int opcode,
5783 int rn, int rd)
5784{
5785 int size = 32 - clz32(immh) - 1;
5786 int immhb = immh << 3 | immb;
5787 int shift = immhb - (8 << size);
5788 TCGv_i64 tcg_rn = new_tmp_a64(s);
5789 TCGv_i64 tcg_rd = new_tmp_a64(s);
5790
5791 if (!extract32(immh, 3, 1)) {
5792 unallocated_encoding(s);
5793 return;
5794 }
5795
5796 tcg_rn = read_fp_dreg(s, rn);
5797 tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
5798
5799 handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
5800
5801 write_fp_dreg(s, rd, tcg_rd);
5802
5803 tcg_temp_free_i64(tcg_rn);
5804 tcg_temp_free_i64(tcg_rd);
5805}
5806
384b26fb
AB
5807/* C3.6.9 AdvSIMD scalar shift by immediate
5808 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
5809 * +-----+---+-------------+------+------+--------+---+------+------+
5810 * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
5811 * +-----+---+-------------+------+------+--------+---+------+------+
4d1cef84
AB
5812 *
5813 * This is the scalar version so it works on a fixed sized registers
384b26fb
AB
5814 */
5815static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
5816{
4d1cef84
AB
5817 int rd = extract32(insn, 0, 5);
5818 int rn = extract32(insn, 5, 5);
5819 int opcode = extract32(insn, 11, 5);
5820 int immb = extract32(insn, 16, 3);
5821 int immh = extract32(insn, 19, 4);
5822 bool is_u = extract32(insn, 29, 1);
5823
5824 switch (opcode) {
5825 case 0x00: /* SSHR / USHR */
5826 case 0x02: /* SSRA / USRA */
5827 case 0x04: /* SRSHR / URSHR */
5828 case 0x06: /* SRSRA / URSRA */
5829 handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
5830 break;
5831 case 0x0a: /* SHL / SLI */
5832 handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
5833 break;
5834 default:
5835 unsupported_encoding(s, insn);
5836 break;
5837 }
384b26fb
AB
5838}
5839
5840/* C3.6.10 AdvSIMD scalar three different
5841 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
5842 * +-----+---+-----------+------+---+------+--------+-----+------+------+
5843 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
5844 * +-----+---+-----------+------+---+------+--------+-----+------+------+
5845 */
5846static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
5847{
b033cd3d
PM
5848 bool is_u = extract32(insn, 29, 1);
5849 int size = extract32(insn, 22, 2);
5850 int opcode = extract32(insn, 12, 4);
5851 int rm = extract32(insn, 16, 5);
5852 int rn = extract32(insn, 5, 5);
5853 int rd = extract32(insn, 0, 5);
5854
5855 if (is_u) {
5856 unallocated_encoding(s);
5857 return;
5858 }
5859
5860 switch (opcode) {
5861 case 0x9: /* SQDMLAL, SQDMLAL2 */
5862 case 0xb: /* SQDMLSL, SQDMLSL2 */
5863 case 0xd: /* SQDMULL, SQDMULL2 */
5864 if (size == 0 || size == 3) {
5865 unallocated_encoding(s);
5866 return;
5867 }
5868 break;
5869 default:
5870 unallocated_encoding(s);
5871 return;
5872 }
5873
5874 if (size == 2) {
5875 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
5876 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
5877 TCGv_i64 tcg_res = tcg_temp_new_i64();
5878
5879 read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
5880 read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
5881
5882 tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
5883 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
5884
5885 switch (opcode) {
5886 case 0xd: /* SQDMULL, SQDMULL2 */
5887 break;
5888 case 0xb: /* SQDMLSL, SQDMLSL2 */
5889 tcg_gen_neg_i64(tcg_res, tcg_res);
5890 /* fall through */
5891 case 0x9: /* SQDMLAL, SQDMLAL2 */
5892 read_vec_element(s, tcg_op1, rd, 0, MO_64);
5893 gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
5894 tcg_res, tcg_op1);
5895 break;
5896 default:
5897 g_assert_not_reached();
5898 }
5899
5900 write_fp_dreg(s, rd, tcg_res);
5901
5902 tcg_temp_free_i64(tcg_op1);
5903 tcg_temp_free_i64(tcg_op2);
5904 tcg_temp_free_i64(tcg_res);
5905 } else {
5906 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
5907 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
5908 TCGv_i64 tcg_res = tcg_temp_new_i64();
5909
5910 read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
5911 read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
5912
5913 gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
5914 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
5915
5916 switch (opcode) {
5917 case 0xd: /* SQDMULL, SQDMULL2 */
5918 break;
5919 case 0xb: /* SQDMLSL, SQDMLSL2 */
5920 gen_helper_neon_negl_u32(tcg_res, tcg_res);
5921 /* fall through */
5922 case 0x9: /* SQDMLAL, SQDMLAL2 */
5923 {
5924 TCGv_i64 tcg_op3 = tcg_temp_new_i64();
5925 read_vec_element(s, tcg_op3, rd, 0, MO_32);
5926 gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
5927 tcg_res, tcg_op3);
5928 tcg_temp_free_i64(tcg_op3);
5929 break;
5930 }
5931 default:
5932 g_assert_not_reached();
5933 }
5934
5935 tcg_gen_ext32u_i64(tcg_res, tcg_res);
5936 write_fp_dreg(s, rd, tcg_res);
5937
5938 tcg_temp_free_i32(tcg_op1);
5939 tcg_temp_free_i32(tcg_op2);
5940 tcg_temp_free_i64(tcg_res);
5941 }
384b26fb
AB
5942}
5943
b305dba6
PM
5944static void handle_3same_64(DisasContext *s, int opcode, bool u,
5945 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
5946{
5947 /* Handle 64x64->64 opcodes which are shared between the scalar
5948 * and vector 3-same groups. We cover every opcode where size == 3
5949 * is valid in either the three-reg-same (integer, not pairwise)
5950 * or scalar-three-reg-same groups. (Some opcodes are not yet
5951 * implemented.)
5952 */
5953 TCGCond cond;
5954
5955 switch (opcode) {
6d9571f7
PM
5956 case 0x1: /* SQADD */
5957 if (u) {
5958 gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5959 } else {
5960 gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5961 }
5962 break;
5963 case 0x5: /* SQSUB */
5964 if (u) {
5965 gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5966 } else {
5967 gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
5968 }
5969 break;
b305dba6
PM
5970 case 0x6: /* CMGT, CMHI */
5971 /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
5972 * We implement this using setcond (test) and then negating.
5973 */
5974 cond = u ? TCG_COND_GTU : TCG_COND_GT;
5975 do_cmop:
5976 tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
5977 tcg_gen_neg_i64(tcg_rd, tcg_rd);
5978 break;
5979 case 0x7: /* CMGE, CMHS */
5980 cond = u ? TCG_COND_GEU : TCG_COND_GE;
5981 goto do_cmop;
5982 case 0x11: /* CMTST, CMEQ */
5983 if (u) {
5984 cond = TCG_COND_EQ;
5985 goto do_cmop;
5986 }
5987 /* CMTST : test is "if (X & Y != 0)". */
5988 tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
5989 tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
5990 tcg_gen_neg_i64(tcg_rd, tcg_rd);
5991 break;
6d9571f7 5992 case 0x8: /* SSHL, USHL */
b305dba6 5993 if (u) {
6d9571f7 5994 gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
b305dba6 5995 } else {
6d9571f7 5996 gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
b305dba6
PM
5997 }
5998 break;
b305dba6 5999 case 0x9: /* SQSHL, UQSHL */
6d9571f7
PM
6000 if (u) {
6001 gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6002 } else {
6003 gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6004 }
6005 break;
b305dba6 6006 case 0xa: /* SRSHL, URSHL */
6d9571f7
PM
6007 if (u) {
6008 gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
6009 } else {
6010 gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
6011 }
6012 break;
b305dba6 6013 case 0xb: /* SQRSHL, UQRSHL */
6d9571f7
PM
6014 if (u) {
6015 gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6016 } else {
6017 gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6018 }
6019 break;
6020 case 0x10: /* ADD, SUB */
6021 if (u) {
6022 tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
6023 } else {
6024 tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
6025 }
6026 break;
b305dba6
PM
6027 default:
6028 g_assert_not_reached();
6029 }
6030}
6031
845ea09a
PM
6032/* Handle the 3-same-operands float operations; shared by the scalar
6033 * and vector encodings. The caller must filter out any encodings
6034 * not allocated for the encoding it is dealing with.
6035 */
6036static void handle_3same_float(DisasContext *s, int size, int elements,
6037 int fpopcode, int rd, int rn, int rm)
6038{
6039 int pass;
6040 TCGv_ptr fpst = get_fpstatus_ptr();
6041
6042 for (pass = 0; pass < elements; pass++) {
6043 if (size) {
6044 /* Double */
6045 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6046 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6047 TCGv_i64 tcg_res = tcg_temp_new_i64();
6048
6049 read_vec_element(s, tcg_op1, rn, pass, MO_64);
6050 read_vec_element(s, tcg_op2, rm, pass, MO_64);
6051
6052 switch (fpopcode) {
057d5f62
PM
6053 case 0x39: /* FMLS */
6054 /* As usual for ARM, separate negation for fused multiply-add */
6055 gen_helper_vfp_negd(tcg_op1, tcg_op1);
6056 /* fall through */
6057 case 0x19: /* FMLA */
6058 read_vec_element(s, tcg_res, rd, pass, MO_64);
6059 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
6060 tcg_res, fpst);
6061 break;
845ea09a
PM
6062 case 0x18: /* FMAXNM */
6063 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6064 break;
6065 case 0x1a: /* FADD */
6066 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6067 break;
057d5f62
PM
6068 case 0x1b: /* FMULX */
6069 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
6070 break;
8908f4d1
AB
6071 case 0x1c: /* FCMEQ */
6072 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6073 break;
845ea09a
PM
6074 case 0x1e: /* FMAX */
6075 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6076 break;
057d5f62
PM
6077 case 0x1f: /* FRECPS */
6078 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6079 break;
845ea09a
PM
6080 case 0x38: /* FMINNM */
6081 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6082 break;
6083 case 0x3a: /* FSUB */
6084 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6085 break;
6086 case 0x3e: /* FMIN */
6087 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6088 break;
057d5f62
PM
6089 case 0x3f: /* FRSQRTS */
6090 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6091 break;
845ea09a
PM
6092 case 0x5b: /* FMUL */
6093 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
6094 break;
8908f4d1
AB
6095 case 0x5c: /* FCMGE */
6096 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6097 break;
057d5f62
PM
6098 case 0x5d: /* FACGE */
6099 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6100 break;
845ea09a
PM
6101 case 0x5f: /* FDIV */
6102 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
6103 break;
6104 case 0x7a: /* FABD */
6105 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
6106 gen_helper_vfp_absd(tcg_res, tcg_res);
6107 break;
8908f4d1
AB
6108 case 0x7c: /* FCMGT */
6109 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6110 break;
057d5f62
PM
6111 case 0x7d: /* FACGT */
6112 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
6113 break;
845ea09a
PM
6114 default:
6115 g_assert_not_reached();
6116 }
6117
6118 write_vec_element(s, tcg_res, rd, pass, MO_64);
6119
6120 tcg_temp_free_i64(tcg_res);
6121 tcg_temp_free_i64(tcg_op1);
6122 tcg_temp_free_i64(tcg_op2);
6123 } else {
6124 /* Single */
6125 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6126 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6127 TCGv_i32 tcg_res = tcg_temp_new_i32();
6128
6129 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
6130 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
6131
6132 switch (fpopcode) {
057d5f62
PM
6133 case 0x39: /* FMLS */
6134 /* As usual for ARM, separate negation for fused multiply-add */
6135 gen_helper_vfp_negs(tcg_op1, tcg_op1);
6136 /* fall through */
6137 case 0x19: /* FMLA */
6138 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
6139 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
6140 tcg_res, fpst);
6141 break;
845ea09a
PM
6142 case 0x1a: /* FADD */
6143 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6144 break;
057d5f62
PM
6145 case 0x1b: /* FMULX */
6146 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
6147 break;
8908f4d1
AB
6148 case 0x1c: /* FCMEQ */
6149 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6150 break;
845ea09a
PM
6151 case 0x1e: /* FMAX */
6152 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6153 break;
057d5f62
PM
6154 case 0x1f: /* FRECPS */
6155 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6156 break;
845ea09a
PM
6157 case 0x18: /* FMAXNM */
6158 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6159 break;
6160 case 0x38: /* FMINNM */
6161 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6162 break;
6163 case 0x3a: /* FSUB */
6164 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6165 break;
6166 case 0x3e: /* FMIN */
6167 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6168 break;
057d5f62
PM
6169 case 0x3f: /* FRSQRTS */
6170 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6171 break;
845ea09a
PM
6172 case 0x5b: /* FMUL */
6173 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
6174 break;
8908f4d1
AB
6175 case 0x5c: /* FCMGE */
6176 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6177 break;
057d5f62
PM
6178 case 0x5d: /* FACGE */
6179 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6180 break;
845ea09a
PM
6181 case 0x5f: /* FDIV */
6182 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
6183 break;
6184 case 0x7a: /* FABD */
6185 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
6186 gen_helper_vfp_abss(tcg_res, tcg_res);
6187 break;
8908f4d1
AB
6188 case 0x7c: /* FCMGT */
6189 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6190 break;
057d5f62
PM
6191 case 0x7d: /* FACGT */
6192 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
6193 break;
845ea09a
PM
6194 default:
6195 g_assert_not_reached();
6196 }
6197
6198 if (elements == 1) {
6199 /* scalar single so clear high part */
6200 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6201
6202 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
6203 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
6204 tcg_temp_free_i64(tcg_tmp);
6205 } else {
6206 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
6207 }
6208
6209 tcg_temp_free_i32(tcg_res);
6210 tcg_temp_free_i32(tcg_op1);
6211 tcg_temp_free_i32(tcg_op2);
6212 }
6213 }
6214
6215 tcg_temp_free_ptr(fpst);
6216
6217 if ((elements << size) < 4) {
6218 /* scalar, or non-quad vector op */
6219 clear_vec_high(s, rd);
6220 }
6221}
6222
384b26fb
AB
6223/* C3.6.11 AdvSIMD scalar three same
6224 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
6225 * +-----+---+-----------+------+---+------+--------+---+------+------+
6226 * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
6227 * +-----+---+-----------+------+---+------+--------+---+------+------+
6228 */
6229static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
6230{
b305dba6
PM
6231 int rd = extract32(insn, 0, 5);
6232 int rn = extract32(insn, 5, 5);
6233 int opcode = extract32(insn, 11, 5);
6234 int rm = extract32(insn, 16, 5);
6235 int size = extract32(insn, 22, 2);
6236 bool u = extract32(insn, 29, 1);
b305dba6
PM
6237 TCGv_i64 tcg_rd;
6238
6239 if (opcode >= 0x18) {
6240 /* Floating point: U, size[1] and opcode indicate operation */
6241 int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
6242 switch (fpopcode) {
6243 case 0x1b: /* FMULX */
b305dba6
PM
6244 case 0x1f: /* FRECPS */
6245 case 0x3f: /* FRSQRTS */
b305dba6 6246 case 0x5d: /* FACGE */
b305dba6 6247 case 0x7d: /* FACGT */
8908f4d1
AB
6248 case 0x1c: /* FCMEQ */
6249 case 0x5c: /* FCMGE */
6250 case 0x7c: /* FCMGT */
845ea09a
PM
6251 case 0x7a: /* FABD */
6252 break;
b305dba6
PM
6253 default:
6254 unallocated_encoding(s);
6255 return;
6256 }
845ea09a
PM
6257
6258 handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
6259 return;
b305dba6
PM
6260 }
6261
6262 switch (opcode) {
6263 case 0x1: /* SQADD, UQADD */
6264 case 0x5: /* SQSUB, UQSUB */
c0b2b5fa
PM
6265 case 0x9: /* SQSHL, UQSHL */
6266 case 0xb: /* SQRSHL, UQRSHL */
6267 break;
6d9571f7
PM
6268 case 0x8: /* SSHL, USHL */
6269 case 0xa: /* SRSHL, URSHL */
b305dba6
PM
6270 case 0x6: /* CMGT, CMHI */
6271 case 0x7: /* CMGE, CMHS */
6272 case 0x11: /* CMTST, CMEQ */
6273 case 0x10: /* ADD, SUB (vector) */
6274 if (size != 3) {
6275 unallocated_encoding(s);
6276 return;
6277 }
6278 break;
b305dba6
PM
6279 case 0x16: /* SQDMULH, SQRDMULH (vector) */
6280 if (size != 1 && size != 2) {
6281 unallocated_encoding(s);
6282 return;
6283 }
c0b2b5fa 6284 break;
b305dba6
PM
6285 default:
6286 unallocated_encoding(s);
6287 return;
6288 }
6289
b305dba6
PM
6290 tcg_rd = tcg_temp_new_i64();
6291
c0b2b5fa
PM
6292 if (size == 3) {
6293 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6294 TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
6295
6296 handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
6297 tcg_temp_free_i64(tcg_rn);
6298 tcg_temp_free_i64(tcg_rm);
6299 } else {
6300 /* Do a single operation on the lowest element in the vector.
6301 * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
6302 * no side effects for all these operations.
6303 * OPTME: special-purpose helpers would avoid doing some
6304 * unnecessary work in the helper for the 8 and 16 bit cases.
6305 */
6306 NeonGenTwoOpEnvFn *genenvfn;
6307 TCGv_i32 tcg_rn = tcg_temp_new_i32();
6308 TCGv_i32 tcg_rm = tcg_temp_new_i32();
6309 TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
6310
6311 read_vec_element_i32(s, tcg_rn, rn, 0, size);
6312 read_vec_element_i32(s, tcg_rm, rm, 0, size);
6313
6314 switch (opcode) {
6315 case 0x1: /* SQADD, UQADD */
6316 {
6317 static NeonGenTwoOpEnvFn * const fns[3][2] = {
6318 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
6319 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
6320 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
6321 };
6322 genenvfn = fns[size][u];
6323 break;
6324 }
6325 case 0x5: /* SQSUB, UQSUB */
6326 {
6327 static NeonGenTwoOpEnvFn * const fns[3][2] = {
6328 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
6329 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
6330 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
6331 };
6332 genenvfn = fns[size][u];
6333 break;
6334 }
6335 case 0x9: /* SQSHL, UQSHL */
6336 {
6337 static NeonGenTwoOpEnvFn * const fns[3][2] = {
6338 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
6339 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
6340 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
6341 };
6342 genenvfn = fns[size][u];
6343 break;
6344 }
6345 case 0xb: /* SQRSHL, UQRSHL */
6346 {
6347 static NeonGenTwoOpEnvFn * const fns[3][2] = {
6348 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
6349 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
6350 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
6351 };
6352 genenvfn = fns[size][u];
6353 break;
6354 }
6355 case 0x16: /* SQDMULH, SQRDMULH */
6356 {
6357 static NeonGenTwoOpEnvFn * const fns[2][2] = {
6358 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
6359 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
6360 };
6361 assert(size == 1 || size == 2);
6362 genenvfn = fns[size - 1][u];
6363 break;
6364 }
6365 default:
6366 g_assert_not_reached();
6367 }
6368
6369 genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
6370 tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
6371 tcg_temp_free_i32(tcg_rd32);
6372 tcg_temp_free_i32(tcg_rn);
6373 tcg_temp_free_i32(tcg_rm);
6374 }
b305dba6
PM
6375
6376 write_fp_dreg(s, rd, tcg_rd);
6377
b305dba6 6378 tcg_temp_free_i64(tcg_rd);
384b26fb
AB
6379}
6380
effa8e06
PM
6381static void handle_2misc_64(DisasContext *s, int opcode, bool u,
6382 TCGv_i64 tcg_rd, TCGv_i64 tcg_rn)
6383{
6384 /* Handle 64->64 opcodes which are shared between the scalar and
6385 * vector 2-reg-misc groups. We cover every integer opcode where size == 3
f93d0138 6386 * is valid in either group and also the double-precision fp ops.
effa8e06
PM
6387 */
6388 TCGCond cond;
6389
6390 switch (opcode) {
86cbc418
PM
6391 case 0x5: /* NOT */
6392 /* This opcode is shared with CNT and RBIT but we have earlier
6393 * enforced that size == 3 if and only if this is the NOT insn.
6394 */
6395 tcg_gen_not_i64(tcg_rd, tcg_rn);
6396 break;
effa8e06
PM
6397 case 0xa: /* CMLT */
6398 /* 64 bit integer comparison against zero, result is
6399 * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
6400 * subtracting 1.
6401 */
6402 cond = TCG_COND_LT;
6403 do_cmop:
6404 tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
6405 tcg_gen_neg_i64(tcg_rd, tcg_rd);
6406 break;
6407 case 0x8: /* CMGT, CMGE */
6408 cond = u ? TCG_COND_GE : TCG_COND_GT;
6409 goto do_cmop;
6410 case 0x9: /* CMEQ, CMLE */
6411 cond = u ? TCG_COND_LE : TCG_COND_EQ;
6412 goto do_cmop;
6413 case 0xb: /* ABS, NEG */
6414 if (u) {
6415 tcg_gen_neg_i64(tcg_rd, tcg_rn);
6416 } else {
6417 TCGv_i64 tcg_zero = tcg_const_i64(0);
6418 tcg_gen_neg_i64(tcg_rd, tcg_rn);
6419 tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
6420 tcg_rn, tcg_rd);
6421 tcg_temp_free_i64(tcg_zero);
6422 }
6423 break;
f93d0138
PM
6424 case 0x2f: /* FABS */
6425 gen_helper_vfp_absd(tcg_rd, tcg_rn);
6426 break;
6427 case 0x6f: /* FNEG */
6428 gen_helper_vfp_negd(tcg_rd, tcg_rn);
6429 break;
effa8e06
PM
6430 default:
6431 g_assert_not_reached();
6432 }
6433}
6434
8908f4d1
AB
6435static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
6436 bool is_scalar, bool is_u, bool is_q,
6437 int size, int rn, int rd)
6438{
6439 bool is_double = (size == 3);
6440 TCGv_ptr fpst = get_fpstatus_ptr();
6441
6442 if (is_double) {
6443 TCGv_i64 tcg_op = tcg_temp_new_i64();
6444 TCGv_i64 tcg_zero = tcg_const_i64(0);
6445 TCGv_i64 tcg_res = tcg_temp_new_i64();
6446 NeonGenTwoDoubleOPFn *genfn;
6447 bool swap = false;
6448 int pass;
6449
6450 switch (opcode) {
6451 case 0x2e: /* FCMLT (zero) */
6452 swap = true;
6453 /* fallthrough */
6454 case 0x2c: /* FCMGT (zero) */
6455 genfn = gen_helper_neon_cgt_f64;
6456 break;
6457 case 0x2d: /* FCMEQ (zero) */
6458 genfn = gen_helper_neon_ceq_f64;
6459 break;
6460 case 0x6d: /* FCMLE (zero) */
6461 swap = true;
6462 /* fall through */
6463 case 0x6c: /* FCMGE (zero) */
6464 genfn = gen_helper_neon_cge_f64;
6465 break;
6466 default:
6467 g_assert_not_reached();
6468 }
6469
6470 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
6471 read_vec_element(s, tcg_op, rn, pass, MO_64);
6472 if (swap) {
6473 genfn(tcg_res, tcg_zero, tcg_op, fpst);
6474 } else {
6475 genfn(tcg_res, tcg_op, tcg_zero, fpst);
6476 }
6477 write_vec_element(s, tcg_res, rd, pass, MO_64);
6478 }
6479 if (is_scalar) {
6480 clear_vec_high(s, rd);
6481 }
6482
6483 tcg_temp_free_i64(tcg_res);
6484 tcg_temp_free_i64(tcg_zero);
6485 tcg_temp_free_i64(tcg_op);
6486 } else {
6487 TCGv_i32 tcg_op = tcg_temp_new_i32();
6488 TCGv_i32 tcg_zero = tcg_const_i32(0);
6489 TCGv_i32 tcg_res = tcg_temp_new_i32();
6490 NeonGenTwoSingleOPFn *genfn;
6491 bool swap = false;
6492 int pass, maxpasses;
6493
6494 switch (opcode) {
6495 case 0x2e: /* FCMLT (zero) */
6496 swap = true;
6497 /* fall through */
6498 case 0x2c: /* FCMGT (zero) */
6499 genfn = gen_helper_neon_cgt_f32;
6500 break;
6501 case 0x2d: /* FCMEQ (zero) */
6502 genfn = gen_helper_neon_ceq_f32;
6503 break;
6504 case 0x6d: /* FCMLE (zero) */
6505 swap = true;
6506 /* fall through */
6507 case 0x6c: /* FCMGE (zero) */
6508 genfn = gen_helper_neon_cge_f32;
6509 break;
6510 default:
6511 g_assert_not_reached();
6512 }
6513
6514 if (is_scalar) {
6515 maxpasses = 1;
6516 } else {
6517 maxpasses = is_q ? 4 : 2;
6518 }
6519
6520 for (pass = 0; pass < maxpasses; pass++) {
6521 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6522 if (swap) {
6523 genfn(tcg_res, tcg_zero, tcg_op, fpst);
6524 } else {
6525 genfn(tcg_res, tcg_op, tcg_zero, fpst);
6526 }
6527 if (is_scalar) {
6528 write_fp_sreg(s, rd, tcg_res);
6529 } else {
6530 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
6531 }
6532 }
6533 tcg_temp_free_i32(tcg_res);
6534 tcg_temp_free_i32(tcg_zero);
6535 tcg_temp_free_i32(tcg_op);
6536 if (!is_q && !is_scalar) {
6537 clear_vec_high(s, rd);
6538 }
6539 }
6540
6541 tcg_temp_free_ptr(fpst);
6542}
6543
384b26fb
AB
6544/* C3.6.12 AdvSIMD scalar two reg misc
6545 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
6546 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6547 * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
6548 * +-----+---+-----------+------+-----------+--------+-----+------+------+
6549 */
6550static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
6551{
effa8e06
PM
6552 int rd = extract32(insn, 0, 5);
6553 int rn = extract32(insn, 5, 5);
6554 int opcode = extract32(insn, 12, 5);
6555 int size = extract32(insn, 22, 2);
6556 bool u = extract32(insn, 29, 1);
6557
6558 switch (opcode) {
6559 case 0xa: /* CMLT */
6560 if (u) {
6561 unallocated_encoding(s);
6562 return;
6563 }
6564 /* fall through */
6565 case 0x8: /* CMGT, CMGE */
6566 case 0x9: /* CMEQ, CMLE */
6567 case 0xb: /* ABS, NEG */
6568 if (size != 3) {
6569 unallocated_encoding(s);
6570 return;
6571 }
6572 break;
8908f4d1
AB
6573 case 0xc ... 0xf:
6574 case 0x16 ... 0x1d:
6575 case 0x1f:
6576 /* Floating point: U, size[1] and opcode indicate operation;
6577 * size[0] indicates single or double precision.
6578 */
6579 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
6580 size = extract32(size, 0, 1) ? 3 : 2;
6581 switch (opcode) {
6582 case 0x2c: /* FCMGT (zero) */
6583 case 0x2d: /* FCMEQ (zero) */
6584 case 0x2e: /* FCMLT (zero) */
6585 case 0x6c: /* FCMGE (zero) */
6586 case 0x6d: /* FCMLE (zero) */
6587 handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
6588 return;
6589 case 0x1a: /* FCVTNS */
6590 case 0x1b: /* FCVTMS */
6591 case 0x1c: /* FCVTAS */
6592 case 0x1d: /* SCVTF */
6593 case 0x3a: /* FCVTPS */
6594 case 0x3b: /* FCVTZS */
6595 case 0x3d: /* FRECPE */
6596 case 0x3f: /* FRECPX */
6597 case 0x56: /* FCVTXN, FCVTXN2 */
6598 case 0x5a: /* FCVTNU */
6599 case 0x5b: /* FCVTMU */
6600 case 0x5c: /* FCVTAU */
6601 case 0x5d: /* UCVTF */
6602 case 0x7a: /* FCVTPU */
6603 case 0x7b: /* FCVTZU */
6604 case 0x7d: /* FRSQRTE */
6605 unsupported_encoding(s, insn);
6606 return;
6607 default:
6608 unallocated_encoding(s);
6609 return;
6610 }
6611 break;
effa8e06
PM
6612 default:
6613 /* Other categories of encoding in this class:
effa8e06
PM
6614 * + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64
6615 * + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2:
6616 * narrowing saturate ops: size 64/32/16 -> 32/16/8
6617 */
6618 unsupported_encoding(s, insn);
6619 return;
6620 }
6621
6622 if (size == 3) {
6623 TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
6624 TCGv_i64 tcg_rd = tcg_temp_new_i64();
6625
6626 handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn);
6627 write_fp_dreg(s, rd, tcg_rd);
6628 tcg_temp_free_i64(tcg_rd);
6629 tcg_temp_free_i64(tcg_rn);
6630 } else {
6631 /* the 'size might not be 64' ops aren't implemented yet */
6632 g_assert_not_reached();
6633 }
384b26fb
AB
6634}
6635
4d1cef84
AB
6636/* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
6637static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
6638 int immh, int immb, int opcode, int rn, int rd)
6639{
6640 int size = 32 - clz32(immh) - 1;
6641 int immhb = immh << 3 | immb;
6642 int shift = 2 * (8 << size) - immhb;
6643 bool accumulate = false;
6644 bool round = false;
6645 int dsize = is_q ? 128 : 64;
6646 int esize = 8 << size;
6647 int elements = dsize/esize;
6648 TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
6649 TCGv_i64 tcg_rn = new_tmp_a64(s);
6650 TCGv_i64 tcg_rd = new_tmp_a64(s);
6651 TCGv_i64 tcg_round;
6652 int i;
6653
6654 if (extract32(immh, 3, 1) && !is_q) {
6655 unallocated_encoding(s);
6656 return;
6657 }
6658
6659 if (size > 3 && !is_q) {
6660 unallocated_encoding(s);
6661 return;
6662 }
6663
6664 switch (opcode) {
6665 case 0x02: /* SSRA / USRA (accumulate) */
6666 accumulate = true;
6667 break;
6668 case 0x04: /* SRSHR / URSHR (rounding) */
6669 round = true;
6670 break;
6671 case 0x06: /* SRSRA / URSRA (accum + rounding) */
6672 accumulate = round = true;
6673 break;
6674 }
6675
6676 if (round) {
6677 uint64_t round_const = 1ULL << (shift - 1);
6678 tcg_round = tcg_const_i64(round_const);
6679 } else {
6680 TCGV_UNUSED_I64(tcg_round);
6681 }
6682
6683 for (i = 0; i < elements; i++) {
6684 read_vec_element(s, tcg_rn, rn, i, memop);
6685 if (accumulate) {
6686 read_vec_element(s, tcg_rd, rd, i, memop);
6687 }
6688
6689 handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6690 accumulate, is_u, size, shift);
6691
6692 write_vec_element(s, tcg_rd, rd, i, size);
6693 }
6694
6695 if (!is_q) {
6696 clear_vec_high(s, rd);
6697 }
6698
6699 if (round) {
6700 tcg_temp_free_i64(tcg_round);
6701 }
6702}
6703
6704/* SHL/SLI - Vector shift left */
6705static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
6706 int immh, int immb, int opcode, int rn, int rd)
6707{
6708 int size = 32 - clz32(immh) - 1;
6709 int immhb = immh << 3 | immb;
6710 int shift = immhb - (8 << size);
6711 int dsize = is_q ? 128 : 64;
6712 int esize = 8 << size;
6713 int elements = dsize/esize;
6714 TCGv_i64 tcg_rn = new_tmp_a64(s);
6715 TCGv_i64 tcg_rd = new_tmp_a64(s);
6716 int i;
6717
6718 if (extract32(immh, 3, 1) && !is_q) {
6719 unallocated_encoding(s);
6720 return;
6721 }
6722
6723 if (size > 3 && !is_q) {
6724 unallocated_encoding(s);
6725 return;
6726 }
6727
6728 for (i = 0; i < elements; i++) {
6729 read_vec_element(s, tcg_rn, rn, i, size);
6730 if (insert) {
6731 read_vec_element(s, tcg_rd, rd, i, size);
6732 }
6733
6734 handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6735
6736 write_vec_element(s, tcg_rd, rd, i, size);
6737 }
6738
6739 if (!is_q) {
6740 clear_vec_high(s, rd);
6741 }
6742}
6743
6744/* USHLL/SHLL - Vector shift left with widening */
6745static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
6746 int immh, int immb, int opcode, int rn, int rd)
6747{
6748 int size = 32 - clz32(immh) - 1;
6749 int immhb = immh << 3 | immb;
6750 int shift = immhb - (8 << size);
6751 int dsize = 64;
6752 int esize = 8 << size;
6753 int elements = dsize/esize;
6754 TCGv_i64 tcg_rn = new_tmp_a64(s);
6755 TCGv_i64 tcg_rd = new_tmp_a64(s);
6756 int i;
6757
6758 if (size >= 3) {
6759 unallocated_encoding(s);
6760 return;
6761 }
6762
6763 /* For the LL variants the store is larger than the load,
6764 * so if rd == rn we would overwrite parts of our input.
6765 * So load everything right now and use shifts in the main loop.
6766 */
6767 read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
6768
6769 for (i = 0; i < elements; i++) {
6770 tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
6771 ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
6772 tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
6773 write_vec_element(s, tcg_rd, rd, i, size + 1);
6774 }
6775}
6776
6777
384b26fb
AB
6778/* C3.6.14 AdvSIMD shift by immediate
6779 * 31 30 29 28 23 22 19 18 16 15 11 10 9 5 4 0
6780 * +---+---+---+-------------+------+------+--------+---+------+------+
6781 * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 | Rn | Rd |
6782 * +---+---+---+-------------+------+------+--------+---+------+------+
6783 */
6784static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
6785{
4d1cef84
AB
6786 int rd = extract32(insn, 0, 5);
6787 int rn = extract32(insn, 5, 5);
6788 int opcode = extract32(insn, 11, 5);
6789 int immb = extract32(insn, 16, 3);
6790 int immh = extract32(insn, 19, 4);
6791 bool is_u = extract32(insn, 29, 1);
6792 bool is_q = extract32(insn, 30, 1);
6793
6794 switch (opcode) {
6795 case 0x00: /* SSHR / USHR */
6796 case 0x02: /* SSRA / USRA (accumulate) */
6797 case 0x04: /* SRSHR / URSHR (rounding) */
6798 case 0x06: /* SRSRA / URSRA (accum + rounding) */
6799 handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
6800 break;
6801 case 0x0a: /* SHL / SLI */
6802 handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
6803 break;
6804 case 0x14: /* SSHLL / USHLL */
6805 handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
6806 break;
6807 default:
6808 /* We don't currently implement any of the Narrow or saturating shifts;
6809 * nor do we implement the fixed-point conversions in this
6810 * encoding group (SCVTF, FCVTZS, UCVTF, FCVTZU).
6811 */
6812 unsupported_encoding(s, insn);
6813 return;
6814 }
384b26fb
AB
6815}
6816
a08582f4
PM
6817static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
6818 int opcode, int rd, int rn, int rm)
6819{
6820 /* 3-reg-different widening insns: 64 x 64 -> 128 */
6821 TCGv_i64 tcg_res[2];
6822 int pass, accop;
6823
6824 tcg_res[0] = tcg_temp_new_i64();
6825 tcg_res[1] = tcg_temp_new_i64();
6826
6827 /* Does this op do an adding accumulate, a subtracting accumulate,
6828 * or no accumulate at all?
6829 */
6830 switch (opcode) {
6831 case 5:
6832 case 8:
6833 case 9:
6834 accop = 1;
6835 break;
6836 case 10:
6837 case 11:
6838 accop = -1;
6839 break;
6840 default:
6841 accop = 0;
6842 break;
6843 }
6844
6845 if (accop != 0) {
6846 read_vec_element(s, tcg_res[0], rd, 0, MO_64);
6847 read_vec_element(s, tcg_res[1], rd, 1, MO_64);
6848 }
6849
6850 /* size == 2 means two 32x32->64 operations; this is worth special
6851 * casing because we can generally handle it inline.
6852 */
6853 if (size == 2) {
6854 for (pass = 0; pass < 2; pass++) {
6855 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6856 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6857 TCGv_i64 tcg_passres;
6858 TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
6859
6860 int elt = pass + is_q * 2;
6861
6862 read_vec_element(s, tcg_op1, rn, elt, memop);
6863 read_vec_element(s, tcg_op2, rm, elt, memop);
6864
6865 if (accop == 0) {
6866 tcg_passres = tcg_res[pass];
6867 } else {
6868 tcg_passres = tcg_temp_new_i64();
6869 }
6870
6871 switch (opcode) {
0ae39320
PM
6872 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
6873 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
6874 {
6875 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
6876 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
6877
6878 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
6879 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
6880 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
6881 tcg_passres,
6882 tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
6883 tcg_temp_free_i64(tcg_tmp1);
6884 tcg_temp_free_i64(tcg_tmp2);
6885 break;
6886 }
a08582f4
PM
6887 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
6888 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
6889 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
6890 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
6891 break;
6892 default:
6893 g_assert_not_reached();
6894 }
6895
6896 if (accop > 0) {
6897 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
6898 tcg_temp_free_i64(tcg_passres);
6899 } else if (accop < 0) {
6900 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
6901 tcg_temp_free_i64(tcg_passres);
6902 }
6903
6904 tcg_temp_free_i64(tcg_op1);
6905 tcg_temp_free_i64(tcg_op2);
6906 }
6907 } else {
6908 /* size 0 or 1, generally helper functions */
6909 for (pass = 0; pass < 2; pass++) {
6910 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6911 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6912 TCGv_i64 tcg_passres;
6913 int elt = pass + is_q * 2;
6914
6915 read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
6916 read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
6917
6918 if (accop == 0) {
6919 tcg_passres = tcg_res[pass];
6920 } else {
6921 tcg_passres = tcg_temp_new_i64();
6922 }
6923
6924 switch (opcode) {
0ae39320
PM
6925 case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
6926 case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
6927 if (size == 0) {
6928 if (is_u) {
6929 gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
6930 } else {
6931 gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
6932 }
6933 } else {
6934 if (is_u) {
6935 gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
6936 } else {
6937 gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
6938 }
6939 }
6940 break;
a08582f4
PM
6941 case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
6942 case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
6943 case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
6944 if (size == 0) {
6945 if (is_u) {
6946 gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
6947 } else {
6948 gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
6949 }
6950 } else {
6951 if (is_u) {
6952 gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
6953 } else {
6954 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
6955 }
6956 }
6957 break;
6958 default:
6959 g_assert_not_reached();
6960 }
6961 tcg_temp_free_i32(tcg_op1);
6962 tcg_temp_free_i32(tcg_op2);
6963
6964 if (accop > 0) {
6965 if (size == 0) {
6966 gen_helper_neon_addl_u16(tcg_res[pass], tcg_res[pass],
6967 tcg_passres);
6968 } else {
6969 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
6970 tcg_passres);
6971 }
6972 tcg_temp_free_i64(tcg_passres);
6973 } else if (accop < 0) {
6974 if (size == 0) {
6975 gen_helper_neon_subl_u16(tcg_res[pass], tcg_res[pass],
6976 tcg_passres);
6977 } else {
6978 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
6979 tcg_passres);
6980 }
6981 tcg_temp_free_i64(tcg_passres);
6982 }
6983 }
6984 }
6985
6986 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
6987 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
6988 tcg_temp_free_i64(tcg_res[0]);
6989 tcg_temp_free_i64(tcg_res[1]);
6990}
6991
384b26fb
AB
6992/* C3.6.15 AdvSIMD three different
6993 * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0
6994 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6995 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 0 0 | Rn | Rd |
6996 * +---+---+---+-----------+------+---+------+--------+-----+------+------+
6997 */
6998static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
6999{
a08582f4
PM
7000 /* Instructions in this group fall into three basic classes
7001 * (in each case with the operation working on each element in
7002 * the input vectors):
7003 * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
7004 * 128 bit input)
7005 * (2) wide 64 x 128 -> 128
7006 * (3) narrowing 128 x 128 -> 64
7007 * Here we do initial decode, catch unallocated cases and
7008 * dispatch to separate functions for each class.
7009 */
7010 int is_q = extract32(insn, 30, 1);
7011 int is_u = extract32(insn, 29, 1);
7012 int size = extract32(insn, 22, 2);
7013 int opcode = extract32(insn, 12, 4);
7014 int rm = extract32(insn, 16, 5);
7015 int rn = extract32(insn, 5, 5);
7016 int rd = extract32(insn, 0, 5);
7017
7018 switch (opcode) {
7019 case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
7020 case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
7021 /* 64 x 128 -> 128 */
7022 unsupported_encoding(s, insn);
7023 break;
7024 case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
7025 case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
7026 /* 128 x 128 -> 64 */
7027 unsupported_encoding(s, insn);
7028 break;
7029 case 9:
7030 case 11:
7031 case 13:
7032 case 14:
7033 if (is_u) {
7034 unallocated_encoding(s);
7035 return;
7036 }
7037 /* fall through */
7038 case 0:
7039 case 2:
a08582f4
PM
7040 unsupported_encoding(s, insn);
7041 break;
0ae39320
PM
7042 case 5:
7043 case 7:
a08582f4
PM
7044 case 8:
7045 case 10:
7046 case 12:
7047 /* 64 x 64 -> 128 */
7048 if (size == 3) {
7049 unallocated_encoding(s);
7050 return;
7051 }
7052 handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
7053 break;
7054 default:
7055 /* opcode 15 not allocated */
7056 unallocated_encoding(s);
7057 break;
7058 }
384b26fb
AB
7059}
7060
e1cea114
PM
7061/* Logic op (opcode == 3) subgroup of C3.6.16. */
7062static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
7063{
956d272e
PM
7064 int rd = extract32(insn, 0, 5);
7065 int rn = extract32(insn, 5, 5);
7066 int rm = extract32(insn, 16, 5);
7067 int size = extract32(insn, 22, 2);
7068 bool is_u = extract32(insn, 29, 1);
7069 bool is_q = extract32(insn, 30, 1);
7070 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7071 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7072 TCGv_i64 tcg_res[2];
7073 int pass;
7074
7075 tcg_res[0] = tcg_temp_new_i64();
7076 tcg_res[1] = tcg_temp_new_i64();
7077
7078 for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
7079 read_vec_element(s, tcg_op1, rn, pass, MO_64);
7080 read_vec_element(s, tcg_op2, rm, pass, MO_64);
7081
7082 if (!is_u) {
7083 switch (size) {
7084 case 0: /* AND */
7085 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
7086 break;
7087 case 1: /* BIC */
7088 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
7089 break;
7090 case 2: /* ORR */
7091 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
7092 break;
7093 case 3: /* ORN */
7094 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
7095 break;
7096 }
7097 } else {
7098 if (size != 0) {
7099 /* B* ops need res loaded to operate on */
7100 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
7101 }
7102
7103 switch (size) {
7104 case 0: /* EOR */
7105 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
7106 break;
7107 case 1: /* BSL bitwise select */
7108 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
7109 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
7110 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
7111 break;
7112 case 2: /* BIT, bitwise insert if true */
7113 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
7114 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
7115 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
7116 break;
7117 case 3: /* BIF, bitwise insert if false */
7118 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
7119 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
7120 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
7121 break;
7122 }
7123 }
7124 }
7125
7126 write_vec_element(s, tcg_res[0], rd, 0, MO_64);
7127 if (!is_q) {
7128 tcg_gen_movi_i64(tcg_res[1], 0);
7129 }
7130 write_vec_element(s, tcg_res[1], rd, 1, MO_64);
7131
7132 tcg_temp_free_i64(tcg_op1);
7133 tcg_temp_free_i64(tcg_op2);
7134 tcg_temp_free_i64(tcg_res[0]);
7135 tcg_temp_free_i64(tcg_res[1]);
e1cea114
PM
7136}
7137
8b12a0cf
PM
7138/* Helper functions for 32 bit comparisons */
7139static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7140{
7141 tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
7142}
7143
7144static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7145{
7146 tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
7147}
7148
7149static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7150{
7151 tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
7152}
7153
7154static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
7155{
7156 tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
7157}
7158
bc242f9b
AB
7159/* Pairwise op subgroup of C3.6.16.
7160 *
7161 * This is called directly or via the handle_3same_float for float pairwise
7162 * operations where the opcode and size are calculated differently.
7163 */
7164static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
7165 int size, int rn, int rm, int rd)
e1cea114 7166{
bc242f9b 7167 TCGv_ptr fpst;
0173a005
PM
7168 int pass;
7169
bc242f9b
AB
7170 /* Floating point operations need fpst */
7171 if (opcode >= 0x58) {
7172 fpst = get_fpstatus_ptr();
7173 } else {
7174 TCGV_UNUSED_PTR(fpst);
0173a005
PM
7175 }
7176
7177 /* These operations work on the concatenated rm:rn, with each pair of
7178 * adjacent elements being operated on to produce an element in the result.
7179 */
7180 if (size == 3) {
7181 TCGv_i64 tcg_res[2];
7182
7183 for (pass = 0; pass < 2; pass++) {
7184 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7185 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7186 int passreg = (pass == 0) ? rn : rm;
7187
7188 read_vec_element(s, tcg_op1, passreg, 0, MO_64);
7189 read_vec_element(s, tcg_op2, passreg, 1, MO_64);
7190 tcg_res[pass] = tcg_temp_new_i64();
7191
bc242f9b
AB
7192 switch (opcode) {
7193 case 0x17: /* ADDP */
7194 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
7195 break;
7196 case 0x58: /* FMAXNMP */
7197 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7198 break;
7199 case 0x5a: /* FADDP */
7200 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7201 break;
7202 case 0x5e: /* FMAXP */
7203 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7204 break;
7205 case 0x78: /* FMINNMP */
7206 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7207 break;
7208 case 0x7e: /* FMINP */
7209 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7210 break;
7211 default:
7212 g_assert_not_reached();
7213 }
0173a005
PM
7214
7215 tcg_temp_free_i64(tcg_op1);
7216 tcg_temp_free_i64(tcg_op2);
7217 }
7218
7219 for (pass = 0; pass < 2; pass++) {
7220 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
7221 tcg_temp_free_i64(tcg_res[pass]);
7222 }
7223 } else {
7224 int maxpass = is_q ? 4 : 2;
7225 TCGv_i32 tcg_res[4];
7226
7227 for (pass = 0; pass < maxpass; pass++) {
7228 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7229 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
bc242f9b 7230 NeonGenTwoOpFn *genfn = NULL;
0173a005
PM
7231 int passreg = pass < (maxpass / 2) ? rn : rm;
7232 int passelt = (is_q && (pass & 1)) ? 2 : 0;
7233
7234 read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
7235 read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
7236 tcg_res[pass] = tcg_temp_new_i32();
7237
7238 switch (opcode) {
7239 case 0x17: /* ADDP */
7240 {
7241 static NeonGenTwoOpFn * const fns[3] = {
7242 gen_helper_neon_padd_u8,
7243 gen_helper_neon_padd_u16,
7244 tcg_gen_add_i32,
7245 };
7246 genfn = fns[size];
7247 break;
7248 }
7249 case 0x14: /* SMAXP, UMAXP */
7250 {
7251 static NeonGenTwoOpFn * const fns[3][2] = {
7252 { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
7253 { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
7254 { gen_max_s32, gen_max_u32 },
7255 };
7256 genfn = fns[size][u];
7257 break;
7258 }
7259 case 0x15: /* SMINP, UMINP */
7260 {
7261 static NeonGenTwoOpFn * const fns[3][2] = {
7262 { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
7263 { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
7264 { gen_min_s32, gen_min_u32 },
7265 };
7266 genfn = fns[size][u];
7267 break;
7268 }
bc242f9b
AB
7269 /* The FP operations are all on single floats (32 bit) */
7270 case 0x58: /* FMAXNMP */
7271 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7272 break;
7273 case 0x5a: /* FADDP */
7274 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7275 break;
7276 case 0x5e: /* FMAXP */
7277 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7278 break;
7279 case 0x78: /* FMINNMP */
7280 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7281 break;
7282 case 0x7e: /* FMINP */
7283 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
7284 break;
0173a005
PM
7285 default:
7286 g_assert_not_reached();
7287 }
7288
bc242f9b
AB
7289 /* FP ops called directly, otherwise call now */
7290 if (genfn) {
7291 genfn(tcg_res[pass], tcg_op1, tcg_op2);
7292 }
0173a005
PM
7293
7294 tcg_temp_free_i32(tcg_op1);
7295 tcg_temp_free_i32(tcg_op2);
7296 }
7297
7298 for (pass = 0; pass < maxpass; pass++) {
7299 write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
7300 tcg_temp_free_i32(tcg_res[pass]);
7301 }
7302 if (!is_q) {
7303 clear_vec_high(s, rd);
7304 }
7305 }
bc242f9b
AB
7306
7307 if (!TCGV_IS_UNUSED_PTR(fpst)) {
7308 tcg_temp_free_ptr(fpst);
7309 }
e1cea114
PM
7310}
7311
7312/* Floating point op subgroup of C3.6.16. */
7313static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
7314{
845ea09a
PM
7315 /* For floating point ops, the U, size[1] and opcode bits
7316 * together indicate the operation. size[0] indicates single
7317 * or double.
7318 */
7319 int fpopcode = extract32(insn, 11, 5)
7320 | (extract32(insn, 23, 1) << 5)
7321 | (extract32(insn, 29, 1) << 6);
7322 int is_q = extract32(insn, 30, 1);
7323 int size = extract32(insn, 22, 1);
7324 int rm = extract32(insn, 16, 5);
7325 int rn = extract32(insn, 5, 5);
7326 int rd = extract32(insn, 0, 5);
7327
7328 int datasize = is_q ? 128 : 64;
7329 int esize = 32 << size;
7330 int elements = datasize / esize;
7331
7332 if (size == 1 && !is_q) {
7333 unallocated_encoding(s);
7334 return;
7335 }
7336
7337 switch (fpopcode) {
7338 case 0x58: /* FMAXNMP */
7339 case 0x5a: /* FADDP */
7340 case 0x5e: /* FMAXP */
7341 case 0x78: /* FMINNMP */
7342 case 0x7e: /* FMINP */
bc242f9b
AB
7343 if (size && !is_q) {
7344 unallocated_encoding(s);
7345 return;
7346 }
7347 handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
7348 rn, rm, rd);
845ea09a
PM
7349 return;
7350 case 0x1b: /* FMULX */
845ea09a
PM
7351 case 0x1f: /* FRECPS */
7352 case 0x3f: /* FRSQRTS */
845ea09a 7353 case 0x5d: /* FACGE */
845ea09a
PM
7354 case 0x7d: /* FACGT */
7355 case 0x19: /* FMLA */
7356 case 0x39: /* FMLS */
845ea09a
PM
7357 case 0x18: /* FMAXNM */
7358 case 0x1a: /* FADD */
8908f4d1 7359 case 0x1c: /* FCMEQ */
845ea09a
PM
7360 case 0x1e: /* FMAX */
7361 case 0x38: /* FMINNM */
7362 case 0x3a: /* FSUB */
7363 case 0x3e: /* FMIN */
7364 case 0x5b: /* FMUL */
8908f4d1 7365 case 0x5c: /* FCMGE */
845ea09a
PM
7366 case 0x5f: /* FDIV */
7367 case 0x7a: /* FABD */
8908f4d1 7368 case 0x7c: /* FCMGT */
845ea09a
PM
7369 handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
7370 return;
7371 default:
7372 unallocated_encoding(s);
7373 return;
7374 }
e1cea114
PM
7375}
7376
7377/* Integer op subgroup of C3.6.16. */
7378static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
7379{
1f8a73af
PM
7380 int is_q = extract32(insn, 30, 1);
7381 int u = extract32(insn, 29, 1);
7382 int size = extract32(insn, 22, 2);
7383 int opcode = extract32(insn, 11, 5);
7384 int rm = extract32(insn, 16, 5);
7385 int rn = extract32(insn, 5, 5);
7386 int rd = extract32(insn, 0, 5);
7387 int pass;
7388
7389 switch (opcode) {
7390 case 0x13: /* MUL, PMUL */
7391 if (u && size != 0) {
7392 unallocated_encoding(s);
7393 return;
7394 }
7395 /* fall through */
7396 case 0x0: /* SHADD, UHADD */
7397 case 0x2: /* SRHADD, URHADD */
7398 case 0x4: /* SHSUB, UHSUB */
7399 case 0xc: /* SMAX, UMAX */
7400 case 0xd: /* SMIN, UMIN */
7401 case 0xe: /* SABD, UABD */
7402 case 0xf: /* SABA, UABA */
7403 case 0x12: /* MLA, MLS */
7404 if (size == 3) {
7405 unallocated_encoding(s);
7406 return;
7407 }
8b12a0cf 7408 break;
1f8a73af
PM
7409 case 0x16: /* SQDMULH, SQRDMULH */
7410 if (size == 0 || size == 3) {
7411 unallocated_encoding(s);
7412 return;
7413 }
8b12a0cf 7414 break;
1f8a73af
PM
7415 default:
7416 if (size == 3 && !is_q) {
7417 unallocated_encoding(s);
7418 return;
7419 }
7420 break;
7421 }
7422
7423 if (size == 3) {
7424 for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
7425 TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7426 TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7427 TCGv_i64 tcg_res = tcg_temp_new_i64();
7428
7429 read_vec_element(s, tcg_op1, rn, pass, MO_64);
7430 read_vec_element(s, tcg_op2, rm, pass, MO_64);
7431
7432 handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
7433
7434 write_vec_element(s, tcg_res, rd, pass, MO_64);
7435
7436 tcg_temp_free_i64(tcg_res);
7437 tcg_temp_free_i64(tcg_op1);
7438 tcg_temp_free_i64(tcg_op2);
7439 }
7440 } else {
7441 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
7442 TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7443 TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7444 TCGv_i32 tcg_res = tcg_temp_new_i32();
6d9571f7
PM
7445 NeonGenTwoOpFn *genfn = NULL;
7446 NeonGenTwoOpEnvFn *genenvfn = NULL;
1f8a73af
PM
7447
7448 read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7449 read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7450
7451 switch (opcode) {
8b12a0cf
PM
7452 case 0x0: /* SHADD, UHADD */
7453 {
7454 static NeonGenTwoOpFn * const fns[3][2] = {
7455 { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
7456 { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
7457 { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
7458 };
7459 genfn = fns[size][u];
7460 break;
7461 }
6d9571f7
PM
7462 case 0x1: /* SQADD, UQADD */
7463 {
7464 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7465 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7466 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7467 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7468 };
7469 genenvfn = fns[size][u];
7470 break;
7471 }
8b12a0cf
PM
7472 case 0x2: /* SRHADD, URHADD */
7473 {
7474 static NeonGenTwoOpFn * const fns[3][2] = {
7475 { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
7476 { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
7477 { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
7478 };
7479 genfn = fns[size][u];
7480 break;
7481 }
7482 case 0x4: /* SHSUB, UHSUB */
7483 {
7484 static NeonGenTwoOpFn * const fns[3][2] = {
7485 { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
7486 { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
7487 { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
7488 };
7489 genfn = fns[size][u];
7490 break;
7491 }
6d9571f7
PM
7492 case 0x5: /* SQSUB, UQSUB */
7493 {
7494 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7495 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7496 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7497 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7498 };
7499 genenvfn = fns[size][u];
7500 break;
7501 }
1f8a73af
PM
7502 case 0x6: /* CMGT, CMHI */
7503 {
7504 static NeonGenTwoOpFn * const fns[3][2] = {
7505 { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
7506 { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
7507 { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
7508 };
7509 genfn = fns[size][u];
7510 break;
7511 }
7512 case 0x7: /* CMGE, CMHS */
7513 {
7514 static NeonGenTwoOpFn * const fns[3][2] = {
7515 { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
7516 { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
7517 { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
7518 };
7519 genfn = fns[size][u];
7520 break;
7521 }
6d9571f7
PM
7522 case 0x8: /* SSHL, USHL */
7523 {
7524 static NeonGenTwoOpFn * const fns[3][2] = {
7525 { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
7526 { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
7527 { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
7528 };
7529 genfn = fns[size][u];
7530 break;
7531 }
7532 case 0x9: /* SQSHL, UQSHL */
7533 {
7534 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7535 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7536 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7537 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7538 };
7539 genenvfn = fns[size][u];
7540 break;
7541 }
7542 case 0xa: /* SRSHL, URSHL */
7543 {
7544 static NeonGenTwoOpFn * const fns[3][2] = {
7545 { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
7546 { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
7547 { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
7548 };
7549 genfn = fns[size][u];
7550 break;
7551 }
7552 case 0xb: /* SQRSHL, UQRSHL */
7553 {
7554 static NeonGenTwoOpEnvFn * const fns[3][2] = {
7555 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7556 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7557 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7558 };
7559 genenvfn = fns[size][u];
7560 break;
7561 }
8b12a0cf
PM
7562 case 0xc: /* SMAX, UMAX */
7563 {
7564 static NeonGenTwoOpFn * const fns[3][2] = {
7565 { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
7566 { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
7567 { gen_max_s32, gen_max_u32 },
7568 };
7569 genfn = fns[size][u];
7570 break;
7571 }
7572
7573 case 0xd: /* SMIN, UMIN */
7574 {
7575 static NeonGenTwoOpFn * const fns[3][2] = {
7576 { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
7577 { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
7578 { gen_min_s32, gen_min_u32 },
7579 };
7580 genfn = fns[size][u];
7581 break;
7582 }
7583 case 0xe: /* SABD, UABD */
7584 case 0xf: /* SABA, UABA */
7585 {
7586 static NeonGenTwoOpFn * const fns[3][2] = {
7587 { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
7588 { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
7589 { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
7590 };
7591 genfn = fns[size][u];
7592 break;
7593 }
1f8a73af
PM
7594 case 0x10: /* ADD, SUB */
7595 {
7596 static NeonGenTwoOpFn * const fns[3][2] = {
7597 { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
7598 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
7599 { tcg_gen_add_i32, tcg_gen_sub_i32 },
7600 };
7601 genfn = fns[size][u];
7602 break;
7603 }
7604 case 0x11: /* CMTST, CMEQ */
7605 {
7606 static NeonGenTwoOpFn * const fns[3][2] = {
7607 { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
7608 { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
7609 { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
7610 };
7611 genfn = fns[size][u];
7612 break;
7613 }
8b12a0cf
PM
7614 case 0x13: /* MUL, PMUL */
7615 if (u) {
7616 /* PMUL */
7617 assert(size == 0);
7618 genfn = gen_helper_neon_mul_p8;
7619 break;
7620 }
7621 /* fall through : MUL */
7622 case 0x12: /* MLA, MLS */
7623 {
7624 static NeonGenTwoOpFn * const fns[3] = {
7625 gen_helper_neon_mul_u8,
7626 gen_helper_neon_mul_u16,
7627 tcg_gen_mul_i32,
7628 };
7629 genfn = fns[size];
7630 break;
7631 }
7632 case 0x16: /* SQDMULH, SQRDMULH */
7633 {
7634 static NeonGenTwoOpEnvFn * const fns[2][2] = {
7635 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7636 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7637 };
7638 assert(size == 1 || size == 2);
7639 genenvfn = fns[size - 1][u];
7640 break;
7641 }
1f8a73af
PM
7642 default:
7643 g_assert_not_reached();
7644 }
7645
6d9571f7
PM
7646 if (genenvfn) {
7647 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
7648 } else {
7649 genfn(tcg_res, tcg_op1, tcg_op2);
7650 }
1f8a73af 7651
8b12a0cf
PM
7652 if (opcode == 0xf || opcode == 0x12) {
7653 /* SABA, UABA, MLA, MLS: accumulating ops */
7654 static NeonGenTwoOpFn * const fns[3][2] = {
7655 { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
7656 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
7657 { tcg_gen_add_i32, tcg_gen_sub_i32 },
7658 };
7659 bool is_sub = (opcode == 0x12 && u); /* MLS */
7660
7661 genfn = fns[size][is_sub];
7662 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
7663 genfn(tcg_res, tcg_res, tcg_op1);
7664 }
7665
1f8a73af
PM
7666 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7667
7668 tcg_temp_free_i32(tcg_res);
7669 tcg_temp_free_i32(tcg_op1);
7670 tcg_temp_free_i32(tcg_op2);
7671 }
7672 }
7673
7674 if (!is_q) {
7675 clear_vec_high(s, rd);
7676 }
e1cea114
PM
7677}
7678
384b26fb
AB
7679/* C3.6.16 AdvSIMD three same
7680 * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
7681 * +---+---+---+-----------+------+---+------+--------+---+------+------+
7682 * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
7683 * +---+---+---+-----------+------+---+------+--------+---+------+------+
7684 */
7685static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
7686{
e1cea114
PM
7687 int opcode = extract32(insn, 11, 5);
7688
7689 switch (opcode) {
7690 case 0x3: /* logic ops */
7691 disas_simd_3same_logic(s, insn);
7692 break;
7693 case 0x17: /* ADDP */
7694 case 0x14: /* SMAXP, UMAXP */
7695 case 0x15: /* SMINP, UMINP */
bc242f9b 7696 {
e1cea114 7697 /* Pairwise operations */
bc242f9b
AB
7698 int is_q = extract32(insn, 30, 1);
7699 int u = extract32(insn, 29, 1);
7700 int size = extract32(insn, 22, 2);
7701 int rm = extract32(insn, 16, 5);
7702 int rn = extract32(insn, 5, 5);
7703 int rd = extract32(insn, 0, 5);
7704 if (opcode == 0x17) {
7705 if (u || (size == 3 && !is_q)) {
7706 unallocated_encoding(s);
7707 return;
7708 }
7709 } else {
7710 if (size == 3) {
7711 unallocated_encoding(s);
7712 return;
7713 }
7714 }
7715 handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
e1cea114 7716 break;
bc242f9b 7717 }
e1cea114
PM
7718 case 0x18 ... 0x31:
7719 /* floating point ops, sz[1] and U are part of opcode */
7720 disas_simd_3same_float(s, insn);
7721 break;
7722 default:
7723 disas_simd_3same_int(s, insn);
7724 break;
7725 }
384b26fb
AB
7726}
7727
d980fd59
PM
7728static void handle_2misc_narrow(DisasContext *s, int opcode, bool u, bool is_q,
7729 int size, int rn, int rd)
7730{
7731 /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7732 * in the source becomes a size element in the destination).
7733 */
7734 int pass;
7735 TCGv_i32 tcg_res[2];
7736 int destelt = is_q ? 2 : 0;
7737
7738 for (pass = 0; pass < 2; pass++) {
7739 TCGv_i64 tcg_op = tcg_temp_new_i64();
7740 NeonGenNarrowFn *genfn = NULL;
7741 NeonGenNarrowEnvFn *genenvfn = NULL;
7742
7743 read_vec_element(s, tcg_op, rn, pass, MO_64);
7744 tcg_res[pass] = tcg_temp_new_i32();
7745
7746 switch (opcode) {
7747 case 0x12: /* XTN, SQXTUN */
7748 {
7749 static NeonGenNarrowFn * const xtnfns[3] = {
7750 gen_helper_neon_narrow_u8,
7751 gen_helper_neon_narrow_u16,
7752 tcg_gen_trunc_i64_i32,
7753 };
7754 static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7755 gen_helper_neon_unarrow_sat8,
7756 gen_helper_neon_unarrow_sat16,
7757 gen_helper_neon_unarrow_sat32,
7758 };
7759 if (u) {
7760 genenvfn = sqxtunfns[size];
7761 } else {
7762 genfn = xtnfns[size];
7763 }
7764 break;
7765 }
7766 case 0x14: /* SQXTN, UQXTN */
7767 {
7768 static NeonGenNarrowEnvFn * const fns[3][2] = {
7769 { gen_helper_neon_narrow_sat_s8,
7770 gen_helper_neon_narrow_sat_u8 },
7771 { gen_helper_neon_narrow_sat_s16,
7772 gen_helper_neon_narrow_sat_u16 },
7773 { gen_helper_neon_narrow_sat_s32,
7774 gen_helper_neon_narrow_sat_u32 },
7775 };
7776 genenvfn = fns[size][u];
7777 break;
7778 }
7779 default:
7780 g_assert_not_reached();
7781 }
7782
7783 if (genfn) {
7784 genfn(tcg_res[pass], tcg_op);
7785 } else {
7786 genenvfn(tcg_res[pass], cpu_env, tcg_op);
7787 }
7788
7789 tcg_temp_free_i64(tcg_op);
7790 }
7791
7792 for (pass = 0; pass < 2; pass++) {
7793 write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7794 tcg_temp_free_i32(tcg_res[pass]);
7795 }
7796 if (!is_q) {
7797 clear_vec_high(s, rd);
7798 }
7799}
7800
39d82118
AB
7801static void handle_rev(DisasContext *s, int opcode, bool u,
7802 bool is_q, int size, int rn, int rd)
7803{
7804 int op = (opcode << 1) | u;
7805 int opsz = op + size;
7806 int grp_size = 3 - opsz;
7807 int dsize = is_q ? 128 : 64;
7808 int i;
7809
7810 if (opsz >= 3) {
7811 unallocated_encoding(s);
7812 return;
7813 }
7814
7815 if (size == 0) {
7816 /* Special case bytes, use bswap op on each group of elements */
7817 int groups = dsize / (8 << grp_size);
7818
7819 for (i = 0; i < groups; i++) {
7820 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7821
7822 read_vec_element(s, tcg_tmp, rn, i, grp_size);
7823 switch (grp_size) {
7824 case MO_16:
7825 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
7826 break;
7827 case MO_32:
7828 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
7829 break;
7830 case MO_64:
7831 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
7832 break;
7833 default:
7834 g_assert_not_reached();
7835 }
7836 write_vec_element(s, tcg_tmp, rd, i, grp_size);
7837 tcg_temp_free_i64(tcg_tmp);
7838 }
7839 if (!is_q) {
7840 clear_vec_high(s, rd);
7841 }
7842 } else {
7843 int revmask = (1 << grp_size) - 1;
7844 int esize = 8 << size;
7845 int elements = dsize / esize;
7846 TCGv_i64 tcg_rn = tcg_temp_new_i64();
7847 TCGv_i64 tcg_rd = tcg_const_i64(0);
7848 TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
7849
7850 for (i = 0; i < elements; i++) {
7851 int e_rev = (i & 0xf) ^ revmask;
7852 int off = e_rev * esize;
7853 read_vec_element(s, tcg_rn, rn, i, size);
7854 if (off >= 64) {
7855 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
7856 tcg_rn, off - 64, esize);
7857 } else {
7858 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
7859 }
7860 }
7861 write_vec_element(s, tcg_rd, rd, 0, MO_64);
7862 write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
7863
7864 tcg_temp_free_i64(tcg_rd_hi);
7865 tcg_temp_free_i64(tcg_rd);
7866 tcg_temp_free_i64(tcg_rn);
7867 }
7868}
7869
384b26fb
AB
7870/* C3.6.17 AdvSIMD two reg misc
7871 * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
7872 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7873 * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 | Rn | Rd |
7874 * +---+---+---+-----------+------+-----------+--------+-----+------+------+
7875 */
7876static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
7877{
45aecc6d
PM
7878 int size = extract32(insn, 22, 2);
7879 int opcode = extract32(insn, 12, 5);
7880 bool u = extract32(insn, 29, 1);
7881 bool is_q = extract32(insn, 30, 1);
94b6c911
PM
7882 int rn = extract32(insn, 5, 5);
7883 int rd = extract32(insn, 0, 5);
45aecc6d
PM
7884
7885 switch (opcode) {
7886 case 0x0: /* REV64, REV32 */
7887 case 0x1: /* REV16 */
39d82118 7888 handle_rev(s, opcode, u, is_q, size, rn, rd);
45aecc6d 7889 return;
86cbc418
PM
7890 case 0x5: /* CNT, NOT, RBIT */
7891 if (u && size == 0) {
7892 /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
7893 size = 3;
7894 break;
7895 } else if (u && size == 1) {
7896 /* RBIT */
7897 break;
7898 } else if (!u && size == 0) {
7899 /* CNT */
7900 break;
45aecc6d 7901 }
86cbc418 7902 unallocated_encoding(s);
45aecc6d 7903 return;
d980fd59
PM
7904 case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
7905 case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
7906 if (size == 3) {
7907 unallocated_encoding(s);
7908 return;
7909 }
7910 handle_2misc_narrow(s, opcode, u, is_q, size, rn, rd);
7911 return;
45aecc6d
PM
7912 case 0x2: /* SADDLP, UADDLP */
7913 case 0x4: /* CLS, CLZ */
7914 case 0x6: /* SADALP, UADALP */
45aecc6d
PM
7915 if (size == 3) {
7916 unallocated_encoding(s);
7917 return;
7918 }
7919 unsupported_encoding(s, insn);
7920 return;
7921 case 0x13: /* SHLL, SHLL2 */
7922 if (u == 0 || size == 3) {
7923 unallocated_encoding(s);
7924 return;
7925 }
7926 unsupported_encoding(s, insn);
7927 return;
7928 case 0xa: /* CMLT */
7929 if (u == 1) {
7930 unallocated_encoding(s);
7931 return;
7932 }
7933 /* fall through */
45aecc6d
PM
7934 case 0x8: /* CMGT, CMGE */
7935 case 0x9: /* CMEQ, CMLE */
7936 case 0xb: /* ABS, NEG */
94b6c911
PM
7937 if (size == 3 && !is_q) {
7938 unallocated_encoding(s);
7939 return;
7940 }
7941 break;
7942 case 0x3: /* SUQADD, USQADD */
7943 case 0x7: /* SQABS, SQNEG */
45aecc6d
PM
7944 if (size == 3 && !is_q) {
7945 unallocated_encoding(s);
7946 return;
7947 }
7948 unsupported_encoding(s, insn);
7949 return;
7950 case 0xc ... 0xf:
7951 case 0x16 ... 0x1d:
7952 case 0x1f:
7953 {
7954 /* Floating point: U, size[1] and opcode indicate operation;
7955 * size[0] indicates single or double precision.
7956 */
7957 opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7958 size = extract32(size, 0, 1) ? 3 : 2;
7959 switch (opcode) {
f93d0138
PM
7960 case 0x2f: /* FABS */
7961 case 0x6f: /* FNEG */
7962 if (size == 3 && !is_q) {
7963 unallocated_encoding(s);
7964 return;
7965 }
7966 break;
8908f4d1
AB
7967 case 0x2c: /* FCMGT (zero) */
7968 case 0x2d: /* FCMEQ (zero) */
7969 case 0x2e: /* FCMLT (zero) */
7970 case 0x6c: /* FCMGE (zero) */
7971 case 0x6d: /* FCMLE (zero) */
7972 if (size == 3 && !is_q) {
7973 unallocated_encoding(s);
7974 return;
7975 }
7976 handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
7977 return;
45aecc6d
PM
7978 case 0x16: /* FCVTN, FCVTN2 */
7979 case 0x17: /* FCVTL, FCVTL2 */
7980 case 0x18: /* FRINTN */
7981 case 0x19: /* FRINTM */
7982 case 0x1a: /* FCVTNS */
7983 case 0x1b: /* FCVTMS */
7984 case 0x1c: /* FCVTAS */
7985 case 0x1d: /* SCVTF */
45aecc6d
PM
7986 case 0x38: /* FRINTP */
7987 case 0x39: /* FRINTZ */
7988 case 0x3a: /* FCVTPS */
7989 case 0x3b: /* FCVTZS */
7990 case 0x3c: /* URECPE */
7991 case 0x3d: /* FRECPE */
7992 case 0x56: /* FCVTXN, FCVTXN2 */
7993 case 0x58: /* FRINTA */
7994 case 0x59: /* FRINTX */
7995 case 0x5a: /* FCVTNU */
7996 case 0x5b: /* FCVTMU */
7997 case 0x5c: /* FCVTAU */
7998 case 0x5d: /* UCVTF */
45aecc6d
PM
7999 case 0x79: /* FRINTI */
8000 case 0x7a: /* FCVTPU */
8001 case 0x7b: /* FCVTZU */
8002 case 0x7c: /* URSQRTE */
8003 case 0x7d: /* FRSQRTE */
8004 case 0x7f: /* FSQRT */
8005 unsupported_encoding(s, insn);
8006 return;
8007 default:
8008 unallocated_encoding(s);
8009 return;
8010 }
8011 break;
8012 }
8013 default:
8014 unallocated_encoding(s);
8015 return;
8016 }
94b6c911
PM
8017
8018 if (size == 3) {
8019 /* All 64-bit element operations can be shared with scalar 2misc */
8020 int pass;
8021
8022 for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8023 TCGv_i64 tcg_op = tcg_temp_new_i64();
8024 TCGv_i64 tcg_res = tcg_temp_new_i64();
8025
8026 read_vec_element(s, tcg_op, rn, pass, MO_64);
8027
8028 handle_2misc_64(s, opcode, u, tcg_res, tcg_op);
8029
8030 write_vec_element(s, tcg_res, rd, pass, MO_64);
8031
8032 tcg_temp_free_i64(tcg_res);
8033 tcg_temp_free_i64(tcg_op);
8034 }
8035 } else {
8036 int pass;
8037
8038 for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
8039 TCGv_i32 tcg_op = tcg_temp_new_i32();
8040 TCGv_i32 tcg_res = tcg_temp_new_i32();
8041 TCGCond cond;
8042
8043 read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
8044
8045 if (size == 2) {
8046 /* Special cases for 32 bit elements */
8047 switch (opcode) {
8048 case 0xa: /* CMLT */
8049 /* 32 bit integer comparison against zero, result is
8050 * test ? (2^32 - 1) : 0. We implement via setcond(test)
8051 * and inverting.
8052 */
8053 cond = TCG_COND_LT;
8054 do_cmop:
8055 tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
8056 tcg_gen_neg_i32(tcg_res, tcg_res);
8057 break;
8058 case 0x8: /* CMGT, CMGE */
8059 cond = u ? TCG_COND_GE : TCG_COND_GT;
8060 goto do_cmop;
8061 case 0x9: /* CMEQ, CMLE */
8062 cond = u ? TCG_COND_LE : TCG_COND_EQ;
8063 goto do_cmop;
8064 case 0xb: /* ABS, NEG */
8065 if (u) {
8066 tcg_gen_neg_i32(tcg_res, tcg_op);
8067 } else {
8068 TCGv_i32 tcg_zero = tcg_const_i32(0);
8069 tcg_gen_neg_i32(tcg_res, tcg_op);
8070 tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
8071 tcg_zero, tcg_op, tcg_res);
8072 tcg_temp_free_i32(tcg_zero);
8073 }
8074 break;
f93d0138
PM
8075 case 0x2f: /* FABS */
8076 gen_helper_vfp_abss(tcg_res, tcg_op);
8077 break;
8078 case 0x6f: /* FNEG */
8079 gen_helper_vfp_negs(tcg_res, tcg_op);
8080 break;
94b6c911
PM
8081 default:
8082 g_assert_not_reached();
8083 }
8084 } else {
8085 /* Use helpers for 8 and 16 bit elements */
8086 switch (opcode) {
86cbc418
PM
8087 case 0x5: /* CNT, RBIT */
8088 /* For these two insns size is part of the opcode specifier
8089 * (handled earlier); they always operate on byte elements.
8090 */
8091 if (u) {
8092 gen_helper_neon_rbit_u8(tcg_res, tcg_op);
8093 } else {
8094 gen_helper_neon_cnt_u8(tcg_res, tcg_op);
8095 }
8096 break;
94b6c911
PM
8097 case 0x8: /* CMGT, CMGE */
8098 case 0x9: /* CMEQ, CMLE */
8099 case 0xa: /* CMLT */
8100 {
8101 static NeonGenTwoOpFn * const fns[3][2] = {
8102 { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
8103 { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
8104 { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
8105 };
8106 NeonGenTwoOpFn *genfn;
8107 int comp;
8108 bool reverse;
8109 TCGv_i32 tcg_zero = tcg_const_i32(0);
8110
8111 /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
8112 comp = (opcode - 0x8) * 2 + u;
8113 /* ...but LE, LT are implemented as reverse GE, GT */
8114 reverse = (comp > 2);
8115 if (reverse) {
8116 comp = 4 - comp;
8117 }
8118 genfn = fns[comp][size];
8119 if (reverse) {
8120 genfn(tcg_res, tcg_zero, tcg_op);
8121 } else {
8122 genfn(tcg_res, tcg_op, tcg_zero);
8123 }
8124 tcg_temp_free_i32(tcg_zero);
8125 break;
8126 }
8127 case 0xb: /* ABS, NEG */
8128 if (u) {
8129 TCGv_i32 tcg_zero = tcg_const_i32(0);
8130 if (size) {
8131 gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
8132 } else {
8133 gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
8134 }
8135 tcg_temp_free_i32(tcg_zero);
8136 } else {
8137 if (size) {
8138 gen_helper_neon_abs_s16(tcg_res, tcg_op);
8139 } else {
8140 gen_helper_neon_abs_s8(tcg_res, tcg_op);
8141 }
8142 }
8143 break;
8144 default:
8145 g_assert_not_reached();
8146 }
8147 }
8148
8149 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8150
8151 tcg_temp_free_i32(tcg_res);
8152 tcg_temp_free_i32(tcg_op);
8153 }
8154 }
8155 if (!is_q) {
8156 clear_vec_high(s, rd);
8157 }
384b26fb
AB
8158}
8159
9f82e0ff
PM
8160/* C3.6.13 AdvSIMD scalar x indexed element
8161 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
8162 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
8163 * | 0 1 | U | 1 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
8164 * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
8165 * C3.6.18 AdvSIMD vector x indexed element
384b26fb
AB
8166 * 31 30 29 28 24 23 22 21 20 19 16 15 12 11 10 9 5 4 0
8167 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
8168 * | 0 | Q | U | 0 1 1 1 1 | size | L | M | Rm | opc | H | 0 | Rn | Rd |
8169 * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
8170 */
9f82e0ff 8171static void disas_simd_indexed(DisasContext *s, uint32_t insn)
384b26fb 8172{
f5e51e7f
PM
8173 /* This encoding has two kinds of instruction:
8174 * normal, where we perform elt x idxelt => elt for each
8175 * element in the vector
8176 * long, where we perform elt x idxelt and generate a result of
8177 * double the width of the input element
8178 * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
8179 */
9f82e0ff 8180 bool is_scalar = extract32(insn, 28, 1);
f5e51e7f
PM
8181 bool is_q = extract32(insn, 30, 1);
8182 bool u = extract32(insn, 29, 1);
8183 int size = extract32(insn, 22, 2);
8184 int l = extract32(insn, 21, 1);
8185 int m = extract32(insn, 20, 1);
8186 /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
8187 int rm = extract32(insn, 16, 4);
8188 int opcode = extract32(insn, 12, 4);
8189 int h = extract32(insn, 11, 1);
8190 int rn = extract32(insn, 5, 5);
8191 int rd = extract32(insn, 0, 5);
8192 bool is_long = false;
8193 bool is_fp = false;
8194 int index;
8195 TCGv_ptr fpst;
8196
8197 switch (opcode) {
8198 case 0x0: /* MLA */
8199 case 0x4: /* MLS */
9f82e0ff 8200 if (!u || is_scalar) {
f5e51e7f
PM
8201 unallocated_encoding(s);
8202 return;
8203 }
8204 break;
8205 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8206 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8207 case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
9f82e0ff
PM
8208 if (is_scalar) {
8209 unallocated_encoding(s);
8210 return;
8211 }
f5e51e7f
PM
8212 is_long = true;
8213 break;
8214 case 0x3: /* SQDMLAL, SQDMLAL2 */
8215 case 0x7: /* SQDMLSL, SQDMLSL2 */
8216 case 0xb: /* SQDMULL, SQDMULL2 */
8217 is_long = true;
8218 /* fall through */
8219 case 0xc: /* SQDMULH */
8220 case 0xd: /* SQRDMULH */
f5e51e7f
PM
8221 if (u) {
8222 unallocated_encoding(s);
8223 return;
8224 }
8225 break;
9f82e0ff
PM
8226 case 0x8: /* MUL */
8227 if (u || is_scalar) {
8228 unallocated_encoding(s);
8229 return;
8230 }
8231 break;
f5e51e7f
PM
8232 case 0x1: /* FMLA */
8233 case 0x5: /* FMLS */
8234 if (u) {
8235 unallocated_encoding(s);
8236 return;
8237 }
8238 /* fall through */
8239 case 0x9: /* FMUL, FMULX */
8240 if (!extract32(size, 1, 1)) {
8241 unallocated_encoding(s);
8242 return;
8243 }
8244 is_fp = true;
8245 break;
8246 default:
8247 unallocated_encoding(s);
8248 return;
8249 }
8250
8251 if (is_fp) {
8252 /* low bit of size indicates single/double */
8253 size = extract32(size, 0, 1) ? 3 : 2;
8254 if (size == 2) {
8255 index = h << 1 | l;
8256 } else {
8257 if (l || !is_q) {
8258 unallocated_encoding(s);
8259 return;
8260 }
8261 index = h;
8262 }
8263 rm |= (m << 4);
8264 } else {
8265 switch (size) {
8266 case 1:
8267 index = h << 2 | l << 1 | m;
8268 break;
8269 case 2:
8270 index = h << 1 | l;
8271 rm |= (m << 4);
8272 break;
8273 default:
8274 unallocated_encoding(s);
8275 return;
8276 }
8277 }
8278
f5e51e7f
PM
8279 if (is_fp) {
8280 fpst = get_fpstatus_ptr();
8281 } else {
8282 TCGV_UNUSED_PTR(fpst);
8283 }
8284
8285 if (size == 3) {
8286 TCGv_i64 tcg_idx = tcg_temp_new_i64();
8287 int pass;
8288
8289 assert(is_fp && is_q && !is_long);
8290
8291 read_vec_element(s, tcg_idx, rm, index, MO_64);
8292
9f82e0ff 8293 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
f5e51e7f
PM
8294 TCGv_i64 tcg_op = tcg_temp_new_i64();
8295 TCGv_i64 tcg_res = tcg_temp_new_i64();
8296
8297 read_vec_element(s, tcg_op, rn, pass, MO_64);
8298
8299 switch (opcode) {
8300 case 0x5: /* FMLS */
8301 /* As usual for ARM, separate negation for fused multiply-add */
8302 gen_helper_vfp_negd(tcg_op, tcg_op);
8303 /* fall through */
8304 case 0x1: /* FMLA */
8305 read_vec_element(s, tcg_res, rd, pass, MO_64);
8306 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
8307 break;
8308 case 0x9: /* FMUL, FMULX */
8309 if (u) {
8310 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
8311 } else {
8312 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
8313 }
8314 break;
8315 default:
8316 g_assert_not_reached();
8317 }
8318
8319 write_vec_element(s, tcg_res, rd, pass, MO_64);
8320 tcg_temp_free_i64(tcg_op);
8321 tcg_temp_free_i64(tcg_res);
8322 }
8323
9f82e0ff
PM
8324 if (is_scalar) {
8325 clear_vec_high(s, rd);
8326 }
8327
f5e51e7f
PM
8328 tcg_temp_free_i64(tcg_idx);
8329 } else if (!is_long) {
9f82e0ff
PM
8330 /* 32 bit floating point, or 16 or 32 bit integer.
8331 * For the 16 bit scalar case we use the usual Neon helpers and
8332 * rely on the fact that 0 op 0 == 0 with no side effects.
8333 */
f5e51e7f 8334 TCGv_i32 tcg_idx = tcg_temp_new_i32();
9f82e0ff
PM
8335 int pass, maxpasses;
8336
8337 if (is_scalar) {
8338 maxpasses = 1;
8339 } else {
8340 maxpasses = is_q ? 4 : 2;
8341 }
f5e51e7f
PM
8342
8343 read_vec_element_i32(s, tcg_idx, rm, index, size);
8344
9f82e0ff 8345 if (size == 1 && !is_scalar) {
f5e51e7f
PM
8346 /* The simplest way to handle the 16x16 indexed ops is to duplicate
8347 * the index into both halves of the 32 bit tcg_idx and then use
8348 * the usual Neon helpers.
8349 */
8350 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
8351 }
8352
9f82e0ff 8353 for (pass = 0; pass < maxpasses; pass++) {
f5e51e7f
PM
8354 TCGv_i32 tcg_op = tcg_temp_new_i32();
8355 TCGv_i32 tcg_res = tcg_temp_new_i32();
8356
9f82e0ff 8357 read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
f5e51e7f
PM
8358
8359 switch (opcode) {
8360 case 0x0: /* MLA */
8361 case 0x4: /* MLS */
8362 case 0x8: /* MUL */
8363 {
8364 static NeonGenTwoOpFn * const fns[2][2] = {
8365 { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
8366 { tcg_gen_add_i32, tcg_gen_sub_i32 },
8367 };
8368 NeonGenTwoOpFn *genfn;
8369 bool is_sub = opcode == 0x4;
8370
8371 if (size == 1) {
8372 gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
8373 } else {
8374 tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
8375 }
8376 if (opcode == 0x8) {
8377 break;
8378 }
8379 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
8380 genfn = fns[size - 1][is_sub];
8381 genfn(tcg_res, tcg_op, tcg_res);
8382 break;
8383 }
8384 case 0x5: /* FMLS */
8385 /* As usual for ARM, separate negation for fused multiply-add */
8386 gen_helper_vfp_negs(tcg_op, tcg_op);
8387 /* fall through */
8388 case 0x1: /* FMLA */
8389 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8390 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
8391 break;
8392 case 0x9: /* FMUL, FMULX */
8393 if (u) {
8394 gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
8395 } else {
8396 gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
8397 }
8398 break;
8399 case 0xc: /* SQDMULH */
8400 if (size == 1) {
8401 gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
8402 tcg_op, tcg_idx);
8403 } else {
8404 gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
8405 tcg_op, tcg_idx);
8406 }
8407 break;
8408 case 0xd: /* SQRDMULH */
8409 if (size == 1) {
8410 gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
8411 tcg_op, tcg_idx);
8412 } else {
8413 gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
8414 tcg_op, tcg_idx);
8415 }
8416 break;
8417 default:
8418 g_assert_not_reached();
8419 }
8420
9f82e0ff
PM
8421 if (is_scalar) {
8422 write_fp_sreg(s, rd, tcg_res);
8423 } else {
8424 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8425 }
8426
f5e51e7f
PM
8427 tcg_temp_free_i32(tcg_op);
8428 tcg_temp_free_i32(tcg_res);
8429 }
8430
8431 tcg_temp_free_i32(tcg_idx);
8432
8433 if (!is_q) {
8434 clear_vec_high(s, rd);
8435 }
8436 } else {
8437 /* long ops: 16x16->32 or 32x32->64 */
c44ad1fd
PM
8438 TCGv_i64 tcg_res[2];
8439 int pass;
8440 bool satop = extract32(opcode, 0, 1);
8441 TCGMemOp memop = MO_32;
8442
8443 if (satop || !u) {
8444 memop |= MO_SIGN;
8445 }
8446
8447 if (size == 2) {
8448 TCGv_i64 tcg_idx = tcg_temp_new_i64();
8449
8450 read_vec_element(s, tcg_idx, rm, index, memop);
8451
9f82e0ff 8452 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
c44ad1fd
PM
8453 TCGv_i64 tcg_op = tcg_temp_new_i64();
8454 TCGv_i64 tcg_passres;
9f82e0ff 8455 int passelt;
c44ad1fd 8456
9f82e0ff
PM
8457 if (is_scalar) {
8458 passelt = 0;
8459 } else {
8460 passelt = pass + (is_q * 2);
8461 }
8462
8463 read_vec_element(s, tcg_op, rn, passelt, memop);
c44ad1fd
PM
8464
8465 tcg_res[pass] = tcg_temp_new_i64();
8466
8467 if (opcode == 0xa || opcode == 0xb) {
8468 /* Non-accumulating ops */
8469 tcg_passres = tcg_res[pass];
8470 } else {
8471 tcg_passres = tcg_temp_new_i64();
8472 }
8473
8474 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
8475 tcg_temp_free_i64(tcg_op);
8476
8477 if (satop) {
8478 /* saturating, doubling */
8479 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8480 tcg_passres, tcg_passres);
8481 }
8482
8483 if (opcode == 0xa || opcode == 0xb) {
8484 continue;
8485 }
8486
8487 /* Accumulating op: handle accumulate step */
8488 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8489
8490 switch (opcode) {
8491 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8492 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8493 break;
8494 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8495 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8496 break;
8497 case 0x7: /* SQDMLSL, SQDMLSL2 */
8498 tcg_gen_neg_i64(tcg_passres, tcg_passres);
8499 /* fall through */
8500 case 0x3: /* SQDMLAL, SQDMLAL2 */
8501 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8502 tcg_res[pass],
8503 tcg_passres);
8504 break;
8505 default:
8506 g_assert_not_reached();
8507 }
8508 tcg_temp_free_i64(tcg_passres);
8509 }
8510 tcg_temp_free_i64(tcg_idx);
9f82e0ff
PM
8511
8512 if (is_scalar) {
8513 clear_vec_high(s, rd);
8514 }
c44ad1fd
PM
8515 } else {
8516 TCGv_i32 tcg_idx = tcg_temp_new_i32();
8517
8518 assert(size == 1);
8519 read_vec_element_i32(s, tcg_idx, rm, index, size);
8520
9f82e0ff
PM
8521 if (!is_scalar) {
8522 /* The simplest way to handle the 16x16 indexed ops is to
8523 * duplicate the index into both halves of the 32 bit tcg_idx
8524 * and then use the usual Neon helpers.
8525 */
8526 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
8527 }
c44ad1fd 8528
9f82e0ff 8529 for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
c44ad1fd
PM
8530 TCGv_i32 tcg_op = tcg_temp_new_i32();
8531 TCGv_i64 tcg_passres;
8532
9f82e0ff
PM
8533 if (is_scalar) {
8534 read_vec_element_i32(s, tcg_op, rn, pass, size);
8535 } else {
8536 read_vec_element_i32(s, tcg_op, rn,
8537 pass + (is_q * 2), MO_32);
8538 }
8539
c44ad1fd
PM
8540 tcg_res[pass] = tcg_temp_new_i64();
8541
8542 if (opcode == 0xa || opcode == 0xb) {
8543 /* Non-accumulating ops */
8544 tcg_passres = tcg_res[pass];
8545 } else {
8546 tcg_passres = tcg_temp_new_i64();
8547 }
8548
8549 if (memop & MO_SIGN) {
8550 gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
8551 } else {
8552 gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
8553 }
8554 if (satop) {
8555 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8556 tcg_passres, tcg_passres);
8557 }
8558 tcg_temp_free_i32(tcg_op);
8559
8560 if (opcode == 0xa || opcode == 0xb) {
8561 continue;
8562 }
8563
8564 /* Accumulating op: handle accumulate step */
8565 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8566
8567 switch (opcode) {
8568 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8569 gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
8570 tcg_passres);
8571 break;
8572 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8573 gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
8574 tcg_passres);
8575 break;
8576 case 0x7: /* SQDMLSL, SQDMLSL2 */
8577 gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8578 /* fall through */
8579 case 0x3: /* SQDMLAL, SQDMLAL2 */
8580 gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8581 tcg_res[pass],
8582 tcg_passres);
8583 break;
8584 default:
8585 g_assert_not_reached();
8586 }
8587 tcg_temp_free_i64(tcg_passres);
8588 }
8589 tcg_temp_free_i32(tcg_idx);
9f82e0ff
PM
8590
8591 if (is_scalar) {
8592 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
8593 }
8594 }
8595
8596 if (is_scalar) {
8597 tcg_res[1] = tcg_const_i64(0);
c44ad1fd
PM
8598 }
8599
8600 for (pass = 0; pass < 2; pass++) {
8601 write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8602 tcg_temp_free_i64(tcg_res[pass]);
8603 }
f5e51e7f
PM
8604 }
8605
8606 if (!TCGV_IS_UNUSED_PTR(fpst)) {
8607 tcg_temp_free_ptr(fpst);
8608 }
384b26fb
AB
8609}
8610
8611/* C3.6.19 Crypto AES
8612 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
8613 * +-----------------+------+-----------+--------+-----+------+------+
8614 * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
8615 * +-----------------+------+-----------+--------+-----+------+------+
8616 */
8617static void disas_crypto_aes(DisasContext *s, uint32_t insn)
8618{
8619 unsupported_encoding(s, insn);
8620}
8621
8622/* C3.6.20 Crypto three-reg SHA
8623 * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
8624 * +-----------------+------+---+------+---+--------+-----+------+------+
8625 * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd |
8626 * +-----------------+------+---+------+---+--------+-----+------+------+
8627 */
8628static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
8629{
8630 unsupported_encoding(s, insn);
8631}
8632
8633/* C3.6.21 Crypto two-reg SHA
8634 * 31 24 23 22 21 17 16 12 11 10 9 5 4 0
8635 * +-----------------+------+-----------+--------+-----+------+------+
8636 * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd |
8637 * +-----------------+------+-----------+--------+-----+------+------+
8638 */
8639static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
8640{
8641 unsupported_encoding(s, insn);
8642}
8643
8644/* C3.6 Data processing - SIMD, inc Crypto
8645 *
8646 * As the decode gets a little complex we are using a table based
8647 * approach for this part of the decode.
8648 */
8649static const AArch64DecodeTable data_proc_simd[] = {
8650 /* pattern , mask , fn */
8651 { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
8652 { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
8653 { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
8654 { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
8655 { 0x0e000400, 0x9fe08400, disas_simd_copy },
9f82e0ff 8656 { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
384b26fb
AB
8657 /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
8658 { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
8659 { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
8660 { 0x0e000000, 0xbf208c00, disas_simd_tb },
8661 { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
8662 { 0x2e000000, 0xbf208400, disas_simd_ext },
8663 { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
8664 { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
8665 { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
8666 { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
8667 { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
9f82e0ff 8668 { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
384b26fb
AB
8669 { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
8670 { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
8671 { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
8672 { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
8673 { 0x00000000, 0x00000000, NULL }
8674};
8675
faa0ba46
PM
8676static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
8677{
8678 /* Note that this is called with all non-FP cases from
8679 * table C3-6 so it must UNDEF for entries not specifically
8680 * allocated to instructions in that table.
8681 */
384b26fb
AB
8682 AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
8683 if (fn) {
8684 fn(s, insn);
8685 } else {
8686 unallocated_encoding(s);
8687 }
faa0ba46
PM
8688}
8689
ad7ee8a2
CF
8690/* C3.6 Data processing - SIMD and floating point */
8691static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
8692{
faa0ba46
PM
8693 if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
8694 disas_data_proc_fp(s, insn);
8695 } else {
8696 /* SIMD, including crypto */
8697 disas_data_proc_simd(s, insn);
8698 }
ad7ee8a2
CF
8699}
8700
8701/* C3.1 A64 instruction index by encoding */
40f860cd 8702static void disas_a64_insn(CPUARMState *env, DisasContext *s)
14ade10f
AG
8703{
8704 uint32_t insn;
8705
8706 insn = arm_ldl_code(env, s->pc, s->bswap_code);
8707 s->insn = insn;
8708 s->pc += 4;
8709
ad7ee8a2
CF
8710 switch (extract32(insn, 25, 4)) {
8711 case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
14ade10f
AG
8712 unallocated_encoding(s);
8713 break;
ad7ee8a2
CF
8714 case 0x8: case 0x9: /* Data processing - immediate */
8715 disas_data_proc_imm(s, insn);
8716 break;
8717 case 0xa: case 0xb: /* Branch, exception generation and system insns */
8718 disas_b_exc_sys(s, insn);
8719 break;
8720 case 0x4:
8721 case 0x6:
8722 case 0xc:
8723 case 0xe: /* Loads and stores */
8724 disas_ldst(s, insn);
8725 break;
8726 case 0x5:
8727 case 0xd: /* Data processing - register */
8728 disas_data_proc_reg(s, insn);
8729 break;
8730 case 0x7:
8731 case 0xf: /* Data processing - SIMD and floating point */
8732 disas_data_proc_simd_fp(s, insn);
8733 break;
8734 default:
8735 assert(FALSE); /* all 15 cases should be handled above */
8736 break;
14ade10f 8737 }
11e169de
AG
8738
8739 /* if we allocated any temporaries, free them here */
8740 free_tmp_a64(s);
40f860cd 8741}
14ade10f 8742
40f860cd
PM
8743void gen_intermediate_code_internal_a64(ARMCPU *cpu,
8744 TranslationBlock *tb,
8745 bool search_pc)
8746{
8747 CPUState *cs = CPU(cpu);
8748 CPUARMState *env = &cpu->env;
8749 DisasContext dc1, *dc = &dc1;
8750 CPUBreakpoint *bp;
8751 uint16_t *gen_opc_end;
8752 int j, lj;
8753 target_ulong pc_start;
8754 target_ulong next_page_start;
8755 int num_insns;
8756 int max_insns;
8757
8758 pc_start = tb->pc;
8759
8760 dc->tb = tb;
8761
8762 gen_opc_end = tcg_ctx.gen_opc_buf + OPC_MAX_SIZE;
8763
8764 dc->is_jmp = DISAS_NEXT;
8765 dc->pc = pc_start;
8766 dc->singlestep_enabled = cs->singlestep_enabled;
8767 dc->condjmp = 0;
8768
8769 dc->aarch64 = 1;
8770 dc->thumb = 0;
8771 dc->bswap_code = 0;
8772 dc->condexec_mask = 0;
8773 dc->condexec_cond = 0;
8774#if !defined(CONFIG_USER_ONLY)
8775 dc->user = 0;
8776#endif
8777 dc->vfp_enabled = 0;
8778 dc->vec_len = 0;
8779 dc->vec_stride = 0;
60322b39
PM
8780 dc->cp_regs = cpu->cp_regs;
8781 dc->current_pl = arm_current_pl(env);
40f860cd 8782
11e169de
AG
8783 init_tmp_a64_array(dc);
8784
40f860cd
PM
8785 next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
8786 lj = -1;
8787 num_insns = 0;
8788 max_insns = tb->cflags & CF_COUNT_MASK;
8789 if (max_insns == 0) {
8790 max_insns = CF_COUNT_MASK;
8791 }
8792
8793 gen_tb_start();
8794
8795 tcg_clear_temp_count();
8796
8797 do {
8798 if (unlikely(!QTAILQ_EMPTY(&env->breakpoints))) {
8799 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
8800 if (bp->pc == dc->pc) {
8801 gen_exception_insn(dc, 0, EXCP_DEBUG);
8802 /* Advance PC so that clearing the breakpoint will
8803 invalidate this TB. */
8804 dc->pc += 2;
8805 goto done_generating;
8806 }
8807 }
8808 }
8809
8810 if (search_pc) {
8811 j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
8812 if (lj < j) {
8813 lj++;
8814 while (lj < j) {
8815 tcg_ctx.gen_opc_instr_start[lj++] = 0;
8816 }
8817 }
8818 tcg_ctx.gen_opc_pc[lj] = dc->pc;
8819 tcg_ctx.gen_opc_instr_start[lj] = 1;
8820 tcg_ctx.gen_opc_icount[lj] = num_insns;
8821 }
8822
8823 if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
8824 gen_io_start();
8825 }
8826
8827 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
8828 tcg_gen_debug_insn_start(dc->pc);
8829 }
8830
8831 disas_a64_insn(env, dc);
8832
8833 if (tcg_check_temp_count()) {
8834 fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
8835 dc->pc);
8836 }
8837
8838 /* Translation stops when a conditional branch is encountered.
8839 * Otherwise the subsequent code could get translated several times.
8840 * Also stop translation when a page boundary is reached. This
8841 * ensures prefetch aborts occur at the right place.
8842 */
8843 num_insns++;
8844 } while (!dc->is_jmp && tcg_ctx.gen_opc_ptr < gen_opc_end &&
8845 !cs->singlestep_enabled &&
8846 !singlestep &&
8847 dc->pc < next_page_start &&
8848 num_insns < max_insns);
8849
8850 if (tb->cflags & CF_LAST_IO) {
8851 gen_io_end();
8852 }
8853
8854 if (unlikely(cs->singlestep_enabled) && dc->is_jmp != DISAS_EXC) {
8855 /* Note that this means single stepping WFI doesn't halt the CPU.
8856 * For conditional branch insns this is harmless unreachable code as
8857 * gen_goto_tb() has already handled emitting the debug exception
8858 * (and thus a tb-jump is not possible when singlestepping).
8859 */
8860 assert(dc->is_jmp != DISAS_TB_JUMP);
8861 if (dc->is_jmp != DISAS_JUMP) {
8862 gen_a64_set_pc_im(dc->pc);
8863 }
8864 gen_exception(EXCP_DEBUG);
8865 } else {
8866 switch (dc->is_jmp) {
8867 case DISAS_NEXT:
8868 gen_goto_tb(dc, 1, dc->pc);
8869 break;
8870 default:
40f860cd 8871 case DISAS_UPDATE:
fea50522
PM
8872 gen_a64_set_pc_im(dc->pc);
8873 /* fall through */
8874 case DISAS_JUMP:
40f860cd
PM
8875 /* indicate that the hash table must be used to find the next TB */
8876 tcg_gen_exit_tb(0);
8877 break;
8878 case DISAS_TB_JUMP:
8879 case DISAS_EXC:
8880 case DISAS_SWI:
8881 break;
8882 case DISAS_WFI:
8883 /* This is a special case because we don't want to just halt the CPU
8884 * if trying to debug across a WFI.
8885 */
8886 gen_helper_wfi(cpu_env);
8887 break;
8888 }
8889 }
8890
8891done_generating:
8892 gen_tb_end(tb, num_insns);
8893 *tcg_ctx.gen_opc_ptr = INDEX_op_end;
8894
8895#ifdef DEBUG_DISAS
8896 if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
8897 qemu_log("----------------\n");
8898 qemu_log("IN: %s\n", lookup_symbol(pc_start));
8899 log_target_disas(env, pc_start, dc->pc - pc_start,
999b53ec 8900 4 | (dc->bswap_code << 1));
40f860cd
PM
8901 qemu_log("\n");
8902 }
8903#endif
8904 if (search_pc) {
8905 j = tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
8906 lj++;
8907 while (lj <= j) {
8908 tcg_ctx.gen_opc_instr_start[lj++] = 0;
8909 }
8910 } else {
8911 tb->size = dc->pc - pc_start;
8912 tb->icount = num_insns;
14ade10f
AG
8913 }
8914}
This page took 1.120602 seconds and 4 git commands to generate.