]> Git Repo - qemu.git/blame - target/arm/translate-sve.c
target/arm: Implement SVE floating-point exponential accelerator
[qemu.git] / target / arm / translate-sve.c
CommitLineData
38388f7e
RH
1/*
2 * AArch64 SVE translation
3 *
4 * Copyright (c) 2018 Linaro, Ltd
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "exec/exec-all.h"
23#include "tcg-op.h"
24#include "tcg-op-gvec.h"
028e2a7b 25#include "tcg-gvec-desc.h"
38388f7e
RH
26#include "qemu/log.h"
27#include "arm_ldst.h"
28#include "translate.h"
29#include "internals.h"
30#include "exec/helper-proto.h"
31#include "exec/helper-gen.h"
32#include "exec/log.h"
33#include "trace-tcg.h"
34#include "translate-a64.h"
35
ccd841c3
RH
36/*
37 * Helpers for extracting complex instruction fields.
38 */
39
40/* See e.g. ASR (immediate, predicated).
41 * Returns -1 for unallocated encoding; diagnose later.
42 */
43static int tszimm_esz(int x)
44{
45 x >>= 3; /* discard imm3 */
46 return 31 - clz32(x);
47}
48
49static int tszimm_shr(int x)
50{
51 return (16 << tszimm_esz(x)) - x;
52}
53
54/* See e.g. LSL (immediate, predicated). */
55static int tszimm_shl(int x)
56{
57 return x - (8 << tszimm_esz(x));
58}
59
38388f7e
RH
60/*
61 * Include the generated decoder.
62 */
63
64#include "decode-sve.inc.c"
65
66/*
67 * Implement all of the translator functions referenced by the decoder.
68 */
69
d1822297
RH
70/* Return the offset info CPUARMState of the predicate vector register Pn.
71 * Note for this purpose, FFR is P16.
72 */
73static inline int pred_full_reg_offset(DisasContext *s, int regno)
74{
75 return offsetof(CPUARMState, vfp.pregs[regno]);
76}
77
78/* Return the byte size of the whole predicate register, VL / 64. */
79static inline int pred_full_reg_size(DisasContext *s)
80{
81 return s->sve_len >> 3;
82}
83
516e246a
RH
84/* Round up the size of a register to a size allowed by
85 * the tcg vector infrastructure. Any operation which uses this
86 * size may assume that the bits above pred_full_reg_size are zero,
87 * and must leave them the same way.
88 *
89 * Note that this is not needed for the vector registers as they
90 * are always properly sized for tcg vectors.
91 */
92static int size_for_gvec(int size)
93{
94 if (size <= 8) {
95 return 8;
96 } else {
97 return QEMU_ALIGN_UP(size, 16);
98 }
99}
100
101static int pred_gvec_reg_size(DisasContext *s)
102{
103 return size_for_gvec(pred_full_reg_size(s));
104}
105
39eea561
RH
106/* Invoke a vector expander on two Zregs. */
107static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
108 int esz, int rd, int rn)
38388f7e 109{
39eea561
RH
110 if (sve_access_check(s)) {
111 unsigned vsz = vec_full_reg_size(s);
112 gvec_fn(esz, vec_full_reg_offset(s, rd),
113 vec_full_reg_offset(s, rn), vsz, vsz);
114 }
115 return true;
38388f7e
RH
116}
117
39eea561
RH
118/* Invoke a vector expander on three Zregs. */
119static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
120 int esz, int rd, int rn, int rm)
38388f7e 121{
39eea561
RH
122 if (sve_access_check(s)) {
123 unsigned vsz = vec_full_reg_size(s);
124 gvec_fn(esz, vec_full_reg_offset(s, rd),
125 vec_full_reg_offset(s, rn),
126 vec_full_reg_offset(s, rm), vsz, vsz);
127 }
128 return true;
38388f7e
RH
129}
130
39eea561
RH
131/* Invoke a vector move on two Zregs. */
132static bool do_mov_z(DisasContext *s, int rd, int rn)
38388f7e 133{
39eea561 134 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
38388f7e
RH
135}
136
d9d78dcc
RH
137/* Initialize a Zreg with replications of a 64-bit immediate. */
138static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
139{
140 unsigned vsz = vec_full_reg_size(s);
141 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
142}
143
516e246a
RH
144/* Invoke a vector expander on two Pregs. */
145static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
146 int esz, int rd, int rn)
147{
148 if (sve_access_check(s)) {
149 unsigned psz = pred_gvec_reg_size(s);
150 gvec_fn(esz, pred_full_reg_offset(s, rd),
151 pred_full_reg_offset(s, rn), psz, psz);
152 }
153 return true;
154}
155
156/* Invoke a vector expander on three Pregs. */
157static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
158 int esz, int rd, int rn, int rm)
159{
160 if (sve_access_check(s)) {
161 unsigned psz = pred_gvec_reg_size(s);
162 gvec_fn(esz, pred_full_reg_offset(s, rd),
163 pred_full_reg_offset(s, rn),
164 pred_full_reg_offset(s, rm), psz, psz);
165 }
166 return true;
167}
168
169/* Invoke a vector operation on four Pregs. */
170static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
171 int rd, int rn, int rm, int rg)
172{
173 if (sve_access_check(s)) {
174 unsigned psz = pred_gvec_reg_size(s);
175 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
176 pred_full_reg_offset(s, rn),
177 pred_full_reg_offset(s, rm),
178 pred_full_reg_offset(s, rg),
179 psz, psz, gvec_op);
180 }
181 return true;
182}
183
184/* Invoke a vector move on two Pregs. */
185static bool do_mov_p(DisasContext *s, int rd, int rn)
186{
187 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
188}
189
9e18d7a6
RH
190/* Set the cpu flags as per a return from an SVE helper. */
191static void do_pred_flags(TCGv_i32 t)
192{
193 tcg_gen_mov_i32(cpu_NF, t);
194 tcg_gen_andi_i32(cpu_ZF, t, 2);
195 tcg_gen_andi_i32(cpu_CF, t, 1);
196 tcg_gen_movi_i32(cpu_VF, 0);
197}
198
199/* Subroutines computing the ARM PredTest psuedofunction. */
200static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
201{
202 TCGv_i32 t = tcg_temp_new_i32();
203
204 gen_helper_sve_predtest1(t, d, g);
205 do_pred_flags(t);
206 tcg_temp_free_i32(t);
207}
208
209static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
210{
211 TCGv_ptr dptr = tcg_temp_new_ptr();
212 TCGv_ptr gptr = tcg_temp_new_ptr();
213 TCGv_i32 t;
214
215 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
216 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
217 t = tcg_const_i32(words);
218
219 gen_helper_sve_predtest(t, dptr, gptr, t);
220 tcg_temp_free_ptr(dptr);
221 tcg_temp_free_ptr(gptr);
222
223 do_pred_flags(t);
224 tcg_temp_free_i32(t);
225}
226
028e2a7b
RH
227/* For each element size, the bits within a predicate word that are active. */
228const uint64_t pred_esz_masks[4] = {
229 0xffffffffffffffffull, 0x5555555555555555ull,
230 0x1111111111111111ull, 0x0101010101010101ull
231};
232
39eea561
RH
233/*
234 *** SVE Logical - Unpredicated Group
235 */
236
237static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
238{
239 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
240}
241
242static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
243{
244 if (a->rn == a->rm) { /* MOV */
245 return do_mov_z(s, a->rd, a->rn);
246 } else {
247 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
248 }
249}
250
251static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
252{
253 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
254}
255
256static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
38388f7e 257{
39eea561 258 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
38388f7e 259}
d1822297 260
fea98f9c
RH
261/*
262 *** SVE Integer Arithmetic - Unpredicated Group
263 */
264
265static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
266{
267 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
268}
269
270static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
271{
272 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
273}
274
275static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
276{
277 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
278}
279
280static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
281{
282 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
283}
284
285static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
286{
287 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
288}
289
290static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
291{
292 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
293}
294
f97cfd59
RH
295/*
296 *** SVE Integer Arithmetic - Binary Predicated Group
297 */
298
299static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
300{
301 unsigned vsz = vec_full_reg_size(s);
302 if (fn == NULL) {
303 return false;
304 }
305 if (sve_access_check(s)) {
306 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
307 vec_full_reg_offset(s, a->rn),
308 vec_full_reg_offset(s, a->rm),
309 pred_full_reg_offset(s, a->pg),
310 vsz, vsz, 0, fn);
311 }
312 return true;
313}
314
315#define DO_ZPZZ(NAME, name) \
316static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
317 uint32_t insn) \
318{ \
319 static gen_helper_gvec_4 * const fns[4] = { \
320 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
321 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
322 }; \
323 return do_zpzz_ool(s, a, fns[a->esz]); \
324}
325
326DO_ZPZZ(AND, and)
327DO_ZPZZ(EOR, eor)
328DO_ZPZZ(ORR, orr)
329DO_ZPZZ(BIC, bic)
330
331DO_ZPZZ(ADD, add)
332DO_ZPZZ(SUB, sub)
333
334DO_ZPZZ(SMAX, smax)
335DO_ZPZZ(UMAX, umax)
336DO_ZPZZ(SMIN, smin)
337DO_ZPZZ(UMIN, umin)
338DO_ZPZZ(SABD, sabd)
339DO_ZPZZ(UABD, uabd)
340
341DO_ZPZZ(MUL, mul)
342DO_ZPZZ(SMULH, smulh)
343DO_ZPZZ(UMULH, umulh)
344
27721dbb
RH
345DO_ZPZZ(ASR, asr)
346DO_ZPZZ(LSR, lsr)
347DO_ZPZZ(LSL, lsl)
348
f97cfd59
RH
349static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
350{
351 static gen_helper_gvec_4 * const fns[4] = {
352 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
353 };
354 return do_zpzz_ool(s, a, fns[a->esz]);
355}
356
357static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
358{
359 static gen_helper_gvec_4 * const fns[4] = {
360 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
361 };
362 return do_zpzz_ool(s, a, fns[a->esz]);
363}
364
365#undef DO_ZPZZ
366
afac6d04
RH
367/*
368 *** SVE Integer Arithmetic - Unary Predicated Group
369 */
370
371static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
372{
373 if (fn == NULL) {
374 return false;
375 }
376 if (sve_access_check(s)) {
377 unsigned vsz = vec_full_reg_size(s);
378 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
379 vec_full_reg_offset(s, a->rn),
380 pred_full_reg_offset(s, a->pg),
381 vsz, vsz, 0, fn);
382 }
383 return true;
384}
385
386#define DO_ZPZ(NAME, name) \
387static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
388{ \
389 static gen_helper_gvec_3 * const fns[4] = { \
390 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
391 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
392 }; \
393 return do_zpz_ool(s, a, fns[a->esz]); \
394}
395
396DO_ZPZ(CLS, cls)
397DO_ZPZ(CLZ, clz)
398DO_ZPZ(CNT_zpz, cnt_zpz)
399DO_ZPZ(CNOT, cnot)
400DO_ZPZ(NOT_zpz, not_zpz)
401DO_ZPZ(ABS, abs)
402DO_ZPZ(NEG, neg)
403
404static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
405{
406 static gen_helper_gvec_3 * const fns[4] = {
407 NULL,
408 gen_helper_sve_fabs_h,
409 gen_helper_sve_fabs_s,
410 gen_helper_sve_fabs_d
411 };
412 return do_zpz_ool(s, a, fns[a->esz]);
413}
414
415static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
416{
417 static gen_helper_gvec_3 * const fns[4] = {
418 NULL,
419 gen_helper_sve_fneg_h,
420 gen_helper_sve_fneg_s,
421 gen_helper_sve_fneg_d
422 };
423 return do_zpz_ool(s, a, fns[a->esz]);
424}
425
426static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
427{
428 static gen_helper_gvec_3 * const fns[4] = {
429 NULL,
430 gen_helper_sve_sxtb_h,
431 gen_helper_sve_sxtb_s,
432 gen_helper_sve_sxtb_d
433 };
434 return do_zpz_ool(s, a, fns[a->esz]);
435}
436
437static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
438{
439 static gen_helper_gvec_3 * const fns[4] = {
440 NULL,
441 gen_helper_sve_uxtb_h,
442 gen_helper_sve_uxtb_s,
443 gen_helper_sve_uxtb_d
444 };
445 return do_zpz_ool(s, a, fns[a->esz]);
446}
447
448static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
449{
450 static gen_helper_gvec_3 * const fns[4] = {
451 NULL, NULL,
452 gen_helper_sve_sxth_s,
453 gen_helper_sve_sxth_d
454 };
455 return do_zpz_ool(s, a, fns[a->esz]);
456}
457
458static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
459{
460 static gen_helper_gvec_3 * const fns[4] = {
461 NULL, NULL,
462 gen_helper_sve_uxth_s,
463 gen_helper_sve_uxth_d
464 };
465 return do_zpz_ool(s, a, fns[a->esz]);
466}
467
468static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
469{
470 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
471}
472
473static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
474{
475 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
476}
477
478#undef DO_ZPZ
479
047cec97
RH
480/*
481 *** SVE Integer Reduction Group
482 */
483
484typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
485static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
486 gen_helper_gvec_reduc *fn)
487{
488 unsigned vsz = vec_full_reg_size(s);
489 TCGv_ptr t_zn, t_pg;
490 TCGv_i32 desc;
491 TCGv_i64 temp;
492
493 if (fn == NULL) {
494 return false;
495 }
496 if (!sve_access_check(s)) {
497 return true;
498 }
499
500 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
501 temp = tcg_temp_new_i64();
502 t_zn = tcg_temp_new_ptr();
503 t_pg = tcg_temp_new_ptr();
504
505 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
506 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
507 fn(temp, t_zn, t_pg, desc);
508 tcg_temp_free_ptr(t_zn);
509 tcg_temp_free_ptr(t_pg);
510 tcg_temp_free_i32(desc);
511
512 write_fp_dreg(s, a->rd, temp);
513 tcg_temp_free_i64(temp);
514 return true;
515}
516
517#define DO_VPZ(NAME, name) \
518static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
519{ \
520 static gen_helper_gvec_reduc * const fns[4] = { \
521 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
522 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
523 }; \
524 return do_vpz_ool(s, a, fns[a->esz]); \
525}
526
527DO_VPZ(ORV, orv)
528DO_VPZ(ANDV, andv)
529DO_VPZ(EORV, eorv)
530
531DO_VPZ(UADDV, uaddv)
532DO_VPZ(SMAXV, smaxv)
533DO_VPZ(UMAXV, umaxv)
534DO_VPZ(SMINV, sminv)
535DO_VPZ(UMINV, uminv)
536
537static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
538{
539 static gen_helper_gvec_reduc * const fns[4] = {
540 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
541 gen_helper_sve_saddv_s, NULL
542 };
543 return do_vpz_ool(s, a, fns[a->esz]);
544}
545
546#undef DO_VPZ
547
ccd841c3
RH
548/*
549 *** SVE Shift by Immediate - Predicated Group
550 */
551
552/* Store zero into every active element of Zd. We will use this for two
553 * and three-operand predicated instructions for which logic dictates a
554 * zero result.
555 */
556static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
557{
558 static gen_helper_gvec_2 * const fns[4] = {
559 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
560 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
561 };
562 if (sve_access_check(s)) {
563 unsigned vsz = vec_full_reg_size(s);
564 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
565 pred_full_reg_offset(s, pg),
566 vsz, vsz, 0, fns[esz]);
567 }
568 return true;
569}
570
571static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
572 gen_helper_gvec_3 *fn)
573{
574 if (sve_access_check(s)) {
575 unsigned vsz = vec_full_reg_size(s);
576 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
577 vec_full_reg_offset(s, a->rn),
578 pred_full_reg_offset(s, a->pg),
579 vsz, vsz, a->imm, fn);
580 }
581 return true;
582}
583
584static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
585{
586 static gen_helper_gvec_3 * const fns[4] = {
587 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
588 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
589 };
590 if (a->esz < 0) {
591 /* Invalid tsz encoding -- see tszimm_esz. */
592 return false;
593 }
594 /* Shift by element size is architecturally valid. For
595 arithmetic right-shift, it's the same as by one less. */
596 a->imm = MIN(a->imm, (8 << a->esz) - 1);
597 return do_zpzi_ool(s, a, fns[a->esz]);
598}
599
600static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
601{
602 static gen_helper_gvec_3 * const fns[4] = {
603 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
604 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
605 };
606 if (a->esz < 0) {
607 return false;
608 }
609 /* Shift by element size is architecturally valid.
610 For logical shifts, it is a zeroing operation. */
611 if (a->imm >= (8 << a->esz)) {
612 return do_clr_zp(s, a->rd, a->pg, a->esz);
613 } else {
614 return do_zpzi_ool(s, a, fns[a->esz]);
615 }
616}
617
618static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
619{
620 static gen_helper_gvec_3 * const fns[4] = {
621 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
622 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
623 };
624 if (a->esz < 0) {
625 return false;
626 }
627 /* Shift by element size is architecturally valid.
628 For logical shifts, it is a zeroing operation. */
629 if (a->imm >= (8 << a->esz)) {
630 return do_clr_zp(s, a->rd, a->pg, a->esz);
631 } else {
632 return do_zpzi_ool(s, a, fns[a->esz]);
633 }
634}
635
636static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
637{
638 static gen_helper_gvec_3 * const fns[4] = {
639 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
640 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
641 };
642 if (a->esz < 0) {
643 return false;
644 }
645 /* Shift by element size is architecturally valid. For arithmetic
646 right shift for division, it is a zeroing operation. */
647 if (a->imm >= (8 << a->esz)) {
648 return do_clr_zp(s, a->rd, a->pg, a->esz);
649 } else {
650 return do_zpzi_ool(s, a, fns[a->esz]);
651 }
652}
653
fe7f8dfb
RH
654/*
655 *** SVE Bitwise Shift - Predicated Group
656 */
657
658#define DO_ZPZW(NAME, name) \
659static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
660 uint32_t insn) \
661{ \
662 static gen_helper_gvec_4 * const fns[3] = { \
663 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
664 gen_helper_sve_##name##_zpzw_s, \
665 }; \
666 if (a->esz < 0 || a->esz >= 3) { \
667 return false; \
668 } \
669 return do_zpzz_ool(s, a, fns[a->esz]); \
670}
671
672DO_ZPZW(ASR, asr)
673DO_ZPZW(LSR, lsr)
674DO_ZPZW(LSL, lsl)
675
676#undef DO_ZPZW
677
d9d78dcc
RH
678/*
679 *** SVE Bitwise Shift - Unpredicated Group
680 */
681
682static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
683 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
684 int64_t, uint32_t, uint32_t))
685{
686 if (a->esz < 0) {
687 /* Invalid tsz encoding -- see tszimm_esz. */
688 return false;
689 }
690 if (sve_access_check(s)) {
691 unsigned vsz = vec_full_reg_size(s);
692 /* Shift by element size is architecturally valid. For
693 arithmetic right-shift, it's the same as by one less.
694 Otherwise it is a zeroing operation. */
695 if (a->imm >= 8 << a->esz) {
696 if (asr) {
697 a->imm = (8 << a->esz) - 1;
698 } else {
699 do_dupi_z(s, a->rd, 0);
700 return true;
701 }
702 }
703 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
704 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
705 }
706 return true;
707}
708
709static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
710{
711 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
712}
713
714static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
715{
716 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
717}
718
719static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
720{
721 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
722}
723
724static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
725{
726 if (fn == NULL) {
727 return false;
728 }
729 if (sve_access_check(s)) {
730 unsigned vsz = vec_full_reg_size(s);
731 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
732 vec_full_reg_offset(s, a->rn),
733 vec_full_reg_offset(s, a->rm),
734 vsz, vsz, 0, fn);
735 }
736 return true;
737}
738
739#define DO_ZZW(NAME, name) \
740static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
741 uint32_t insn) \
742{ \
743 static gen_helper_gvec_3 * const fns[4] = { \
744 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
745 gen_helper_sve_##name##_zzw_s, NULL \
746 }; \
747 return do_zzw_ool(s, a, fns[a->esz]); \
748}
749
750DO_ZZW(ASR, asr)
751DO_ZZW(LSR, lsr)
752DO_ZZW(LSL, lsl)
753
754#undef DO_ZZW
755
96a36e4a
RH
756/*
757 *** SVE Integer Multiply-Add Group
758 */
759
760static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
761 gen_helper_gvec_5 *fn)
762{
763 if (sve_access_check(s)) {
764 unsigned vsz = vec_full_reg_size(s);
765 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
766 vec_full_reg_offset(s, a->ra),
767 vec_full_reg_offset(s, a->rn),
768 vec_full_reg_offset(s, a->rm),
769 pred_full_reg_offset(s, a->pg),
770 vsz, vsz, 0, fn);
771 }
772 return true;
773}
774
775#define DO_ZPZZZ(NAME, name) \
776static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
777{ \
778 static gen_helper_gvec_5 * const fns[4] = { \
779 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
780 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
781 }; \
782 return do_zpzzz_ool(s, a, fns[a->esz]); \
783}
784
785DO_ZPZZZ(MLA, mla)
786DO_ZPZZZ(MLS, mls)
787
788#undef DO_ZPZZZ
789
9a56c9c3
RH
790/*
791 *** SVE Index Generation Group
792 */
793
794static void do_index(DisasContext *s, int esz, int rd,
795 TCGv_i64 start, TCGv_i64 incr)
796{
797 unsigned vsz = vec_full_reg_size(s);
798 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
799 TCGv_ptr t_zd = tcg_temp_new_ptr();
800
801 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
802 if (esz == 3) {
803 gen_helper_sve_index_d(t_zd, start, incr, desc);
804 } else {
805 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
806 static index_fn * const fns[3] = {
807 gen_helper_sve_index_b,
808 gen_helper_sve_index_h,
809 gen_helper_sve_index_s,
810 };
811 TCGv_i32 s32 = tcg_temp_new_i32();
812 TCGv_i32 i32 = tcg_temp_new_i32();
813
814 tcg_gen_extrl_i64_i32(s32, start);
815 tcg_gen_extrl_i64_i32(i32, incr);
816 fns[esz](t_zd, s32, i32, desc);
817
818 tcg_temp_free_i32(s32);
819 tcg_temp_free_i32(i32);
820 }
821 tcg_temp_free_ptr(t_zd);
822 tcg_temp_free_i32(desc);
823}
824
825static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
826{
827 if (sve_access_check(s)) {
828 TCGv_i64 start = tcg_const_i64(a->imm1);
829 TCGv_i64 incr = tcg_const_i64(a->imm2);
830 do_index(s, a->esz, a->rd, start, incr);
831 tcg_temp_free_i64(start);
832 tcg_temp_free_i64(incr);
833 }
834 return true;
835}
836
837static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
838{
839 if (sve_access_check(s)) {
840 TCGv_i64 start = tcg_const_i64(a->imm);
841 TCGv_i64 incr = cpu_reg(s, a->rm);
842 do_index(s, a->esz, a->rd, start, incr);
843 tcg_temp_free_i64(start);
844 }
845 return true;
846}
847
848static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
849{
850 if (sve_access_check(s)) {
851 TCGv_i64 start = cpu_reg(s, a->rn);
852 TCGv_i64 incr = tcg_const_i64(a->imm);
853 do_index(s, a->esz, a->rd, start, incr);
854 tcg_temp_free_i64(incr);
855 }
856 return true;
857}
858
859static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
860{
861 if (sve_access_check(s)) {
862 TCGv_i64 start = cpu_reg(s, a->rn);
863 TCGv_i64 incr = cpu_reg(s, a->rm);
864 do_index(s, a->esz, a->rd, start, incr);
865 }
866 return true;
867}
868
96f922cc
RH
869/*
870 *** SVE Stack Allocation Group
871 */
872
873static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
874{
875 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
876 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
877 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
878 return true;
879}
880
881static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
882{
883 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
884 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
885 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
886 return true;
887}
888
889static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
890{
891 TCGv_i64 reg = cpu_reg(s, a->rd);
892 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
893 return true;
894}
895
4b242d9c
RH
896/*
897 *** SVE Compute Vector Address Group
898 */
899
900static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
901{
902 if (sve_access_check(s)) {
903 unsigned vsz = vec_full_reg_size(s);
904 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
905 vec_full_reg_offset(s, a->rn),
906 vec_full_reg_offset(s, a->rm),
907 vsz, vsz, a->imm, fn);
908 }
909 return true;
910}
911
912static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
913{
914 return do_adr(s, a, gen_helper_sve_adr_p32);
915}
916
917static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
918{
919 return do_adr(s, a, gen_helper_sve_adr_p64);
920}
921
922static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
923{
924 return do_adr(s, a, gen_helper_sve_adr_s32);
925}
926
927static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
928{
929 return do_adr(s, a, gen_helper_sve_adr_u32);
930}
931
0762cd42
RH
932/*
933 *** SVE Integer Misc - Unpredicated Group
934 */
935
936static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
937{
938 static gen_helper_gvec_2 * const fns[4] = {
939 NULL,
940 gen_helper_sve_fexpa_h,
941 gen_helper_sve_fexpa_s,
942 gen_helper_sve_fexpa_d,
943 };
944 if (a->esz == 0) {
945 return false;
946 }
947 if (sve_access_check(s)) {
948 unsigned vsz = vec_full_reg_size(s);
949 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
950 vec_full_reg_offset(s, a->rn),
951 vsz, vsz, 0, fns[a->esz]);
952 }
953 return true;
954}
955
516e246a
RH
956/*
957 *** SVE Predicate Logical Operations Group
958 */
959
960static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
961 const GVecGen4 *gvec_op)
962{
963 if (!sve_access_check(s)) {
964 return true;
965 }
966
967 unsigned psz = pred_gvec_reg_size(s);
968 int dofs = pred_full_reg_offset(s, a->rd);
969 int nofs = pred_full_reg_offset(s, a->rn);
970 int mofs = pred_full_reg_offset(s, a->rm);
971 int gofs = pred_full_reg_offset(s, a->pg);
972
973 if (psz == 8) {
974 /* Do the operation and the flags generation in temps. */
975 TCGv_i64 pd = tcg_temp_new_i64();
976 TCGv_i64 pn = tcg_temp_new_i64();
977 TCGv_i64 pm = tcg_temp_new_i64();
978 TCGv_i64 pg = tcg_temp_new_i64();
979
980 tcg_gen_ld_i64(pn, cpu_env, nofs);
981 tcg_gen_ld_i64(pm, cpu_env, mofs);
982 tcg_gen_ld_i64(pg, cpu_env, gofs);
983
984 gvec_op->fni8(pd, pn, pm, pg);
985 tcg_gen_st_i64(pd, cpu_env, dofs);
986
987 do_predtest1(pd, pg);
988
989 tcg_temp_free_i64(pd);
990 tcg_temp_free_i64(pn);
991 tcg_temp_free_i64(pm);
992 tcg_temp_free_i64(pg);
993 } else {
994 /* The operation and flags generation is large. The computation
995 * of the flags depends on the original contents of the guarding
996 * predicate. If the destination overwrites the guarding predicate,
997 * then the easiest way to get this right is to save a copy.
998 */
999 int tofs = gofs;
1000 if (a->rd == a->pg) {
1001 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1002 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1003 }
1004
1005 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1006 do_predtest(s, dofs, tofs, psz / 8);
1007 }
1008 return true;
1009}
1010
1011static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1012{
1013 tcg_gen_and_i64(pd, pn, pm);
1014 tcg_gen_and_i64(pd, pd, pg);
1015}
1016
1017static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1018 TCGv_vec pm, TCGv_vec pg)
1019{
1020 tcg_gen_and_vec(vece, pd, pn, pm);
1021 tcg_gen_and_vec(vece, pd, pd, pg);
1022}
1023
1024static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1025{
1026 static const GVecGen4 op = {
1027 .fni8 = gen_and_pg_i64,
1028 .fniv = gen_and_pg_vec,
1029 .fno = gen_helper_sve_and_pppp,
1030 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1031 };
1032 if (a->s) {
1033 return do_pppp_flags(s, a, &op);
1034 } else if (a->rn == a->rm) {
1035 if (a->pg == a->rn) {
1036 return do_mov_p(s, a->rd, a->rn);
1037 } else {
1038 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1039 }
1040 } else if (a->pg == a->rn || a->pg == a->rm) {
1041 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1042 } else {
1043 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1044 }
1045}
1046
1047static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1048{
1049 tcg_gen_andc_i64(pd, pn, pm);
1050 tcg_gen_and_i64(pd, pd, pg);
1051}
1052
1053static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1054 TCGv_vec pm, TCGv_vec pg)
1055{
1056 tcg_gen_andc_vec(vece, pd, pn, pm);
1057 tcg_gen_and_vec(vece, pd, pd, pg);
1058}
1059
1060static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1061{
1062 static const GVecGen4 op = {
1063 .fni8 = gen_bic_pg_i64,
1064 .fniv = gen_bic_pg_vec,
1065 .fno = gen_helper_sve_bic_pppp,
1066 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1067 };
1068 if (a->s) {
1069 return do_pppp_flags(s, a, &op);
1070 } else if (a->pg == a->rn) {
1071 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1072 } else {
1073 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1074 }
1075}
1076
1077static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1078{
1079 tcg_gen_xor_i64(pd, pn, pm);
1080 tcg_gen_and_i64(pd, pd, pg);
1081}
1082
1083static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1084 TCGv_vec pm, TCGv_vec pg)
1085{
1086 tcg_gen_xor_vec(vece, pd, pn, pm);
1087 tcg_gen_and_vec(vece, pd, pd, pg);
1088}
1089
1090static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1091{
1092 static const GVecGen4 op = {
1093 .fni8 = gen_eor_pg_i64,
1094 .fniv = gen_eor_pg_vec,
1095 .fno = gen_helper_sve_eor_pppp,
1096 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1097 };
1098 if (a->s) {
1099 return do_pppp_flags(s, a, &op);
1100 } else {
1101 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1102 }
1103}
1104
1105static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1106{
1107 tcg_gen_and_i64(pn, pn, pg);
1108 tcg_gen_andc_i64(pm, pm, pg);
1109 tcg_gen_or_i64(pd, pn, pm);
1110}
1111
1112static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1113 TCGv_vec pm, TCGv_vec pg)
1114{
1115 tcg_gen_and_vec(vece, pn, pn, pg);
1116 tcg_gen_andc_vec(vece, pm, pm, pg);
1117 tcg_gen_or_vec(vece, pd, pn, pm);
1118}
1119
1120static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1121{
1122 static const GVecGen4 op = {
1123 .fni8 = gen_sel_pg_i64,
1124 .fniv = gen_sel_pg_vec,
1125 .fno = gen_helper_sve_sel_pppp,
1126 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1127 };
1128 if (a->s) {
1129 return false;
1130 } else {
1131 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1132 }
1133}
1134
1135static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1136{
1137 tcg_gen_or_i64(pd, pn, pm);
1138 tcg_gen_and_i64(pd, pd, pg);
1139}
1140
1141static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1142 TCGv_vec pm, TCGv_vec pg)
1143{
1144 tcg_gen_or_vec(vece, pd, pn, pm);
1145 tcg_gen_and_vec(vece, pd, pd, pg);
1146}
1147
1148static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1149{
1150 static const GVecGen4 op = {
1151 .fni8 = gen_orr_pg_i64,
1152 .fniv = gen_orr_pg_vec,
1153 .fno = gen_helper_sve_orr_pppp,
1154 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1155 };
1156 if (a->s) {
1157 return do_pppp_flags(s, a, &op);
1158 } else if (a->pg == a->rn && a->rn == a->rm) {
1159 return do_mov_p(s, a->rd, a->rn);
1160 } else {
1161 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1162 }
1163}
1164
1165static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1166{
1167 tcg_gen_orc_i64(pd, pn, pm);
1168 tcg_gen_and_i64(pd, pd, pg);
1169}
1170
1171static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1172 TCGv_vec pm, TCGv_vec pg)
1173{
1174 tcg_gen_orc_vec(vece, pd, pn, pm);
1175 tcg_gen_and_vec(vece, pd, pd, pg);
1176}
1177
1178static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1179{
1180 static const GVecGen4 op = {
1181 .fni8 = gen_orn_pg_i64,
1182 .fniv = gen_orn_pg_vec,
1183 .fno = gen_helper_sve_orn_pppp,
1184 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1185 };
1186 if (a->s) {
1187 return do_pppp_flags(s, a, &op);
1188 } else {
1189 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1190 }
1191}
1192
1193static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1194{
1195 tcg_gen_or_i64(pd, pn, pm);
1196 tcg_gen_andc_i64(pd, pg, pd);
1197}
1198
1199static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1200 TCGv_vec pm, TCGv_vec pg)
1201{
1202 tcg_gen_or_vec(vece, pd, pn, pm);
1203 tcg_gen_andc_vec(vece, pd, pg, pd);
1204}
1205
1206static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1207{
1208 static const GVecGen4 op = {
1209 .fni8 = gen_nor_pg_i64,
1210 .fniv = gen_nor_pg_vec,
1211 .fno = gen_helper_sve_nor_pppp,
1212 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1213 };
1214 if (a->s) {
1215 return do_pppp_flags(s, a, &op);
1216 } else {
1217 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1218 }
1219}
1220
1221static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1222{
1223 tcg_gen_and_i64(pd, pn, pm);
1224 tcg_gen_andc_i64(pd, pg, pd);
1225}
1226
1227static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1228 TCGv_vec pm, TCGv_vec pg)
1229{
1230 tcg_gen_and_vec(vece, pd, pn, pm);
1231 tcg_gen_andc_vec(vece, pd, pg, pd);
1232}
1233
1234static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1235{
1236 static const GVecGen4 op = {
1237 .fni8 = gen_nand_pg_i64,
1238 .fniv = gen_nand_pg_vec,
1239 .fno = gen_helper_sve_nand_pppp,
1240 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1241 };
1242 if (a->s) {
1243 return do_pppp_flags(s, a, &op);
1244 } else {
1245 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1246 }
1247}
1248
9e18d7a6
RH
1249/*
1250 *** SVE Predicate Misc Group
1251 */
1252
1253static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1254{
1255 if (sve_access_check(s)) {
1256 int nofs = pred_full_reg_offset(s, a->rn);
1257 int gofs = pred_full_reg_offset(s, a->pg);
1258 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1259
1260 if (words == 1) {
1261 TCGv_i64 pn = tcg_temp_new_i64();
1262 TCGv_i64 pg = tcg_temp_new_i64();
1263
1264 tcg_gen_ld_i64(pn, cpu_env, nofs);
1265 tcg_gen_ld_i64(pg, cpu_env, gofs);
1266 do_predtest1(pn, pg);
1267
1268 tcg_temp_free_i64(pn);
1269 tcg_temp_free_i64(pg);
1270 } else {
1271 do_predtest(s, nofs, gofs, words);
1272 }
1273 }
1274 return true;
1275}
1276
028e2a7b
RH
1277/* See the ARM pseudocode DecodePredCount. */
1278static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1279{
1280 unsigned elements = fullsz >> esz;
1281 unsigned bound;
1282
1283 switch (pattern) {
1284 case 0x0: /* POW2 */
1285 return pow2floor(elements);
1286 case 0x1: /* VL1 */
1287 case 0x2: /* VL2 */
1288 case 0x3: /* VL3 */
1289 case 0x4: /* VL4 */
1290 case 0x5: /* VL5 */
1291 case 0x6: /* VL6 */
1292 case 0x7: /* VL7 */
1293 case 0x8: /* VL8 */
1294 bound = pattern;
1295 break;
1296 case 0x9: /* VL16 */
1297 case 0xa: /* VL32 */
1298 case 0xb: /* VL64 */
1299 case 0xc: /* VL128 */
1300 case 0xd: /* VL256 */
1301 bound = 16 << (pattern - 9);
1302 break;
1303 case 0x1d: /* MUL4 */
1304 return elements - elements % 4;
1305 case 0x1e: /* MUL3 */
1306 return elements - elements % 3;
1307 case 0x1f: /* ALL */
1308 return elements;
1309 default: /* #uimm5 */
1310 return 0;
1311 }
1312 return elements >= bound ? bound : 0;
1313}
1314
1315/* This handles all of the predicate initialization instructions,
1316 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1317 * so that decode_pred_count returns 0. For SETFFR, we will have
1318 * set RD == 16 == FFR.
1319 */
1320static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1321{
1322 if (!sve_access_check(s)) {
1323 return true;
1324 }
1325
1326 unsigned fullsz = vec_full_reg_size(s);
1327 unsigned ofs = pred_full_reg_offset(s, rd);
1328 unsigned numelem, setsz, i;
1329 uint64_t word, lastword;
1330 TCGv_i64 t;
1331
1332 numelem = decode_pred_count(fullsz, pat, esz);
1333
1334 /* Determine what we must store into each bit, and how many. */
1335 if (numelem == 0) {
1336 lastword = word = 0;
1337 setsz = fullsz;
1338 } else {
1339 setsz = numelem << esz;
1340 lastword = word = pred_esz_masks[esz];
1341 if (setsz % 64) {
1342 lastword &= ~(-1ull << (setsz % 64));
1343 }
1344 }
1345
1346 t = tcg_temp_new_i64();
1347 if (fullsz <= 64) {
1348 tcg_gen_movi_i64(t, lastword);
1349 tcg_gen_st_i64(t, cpu_env, ofs);
1350 goto done;
1351 }
1352
1353 if (word == lastword) {
1354 unsigned maxsz = size_for_gvec(fullsz / 8);
1355 unsigned oprsz = size_for_gvec(setsz / 8);
1356
1357 if (oprsz * 8 == setsz) {
1358 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1359 goto done;
1360 }
1361 if (oprsz * 8 == setsz + 8) {
1362 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1363 tcg_gen_movi_i64(t, 0);
1364 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1365 goto done;
1366 }
1367 }
1368
1369 setsz /= 8;
1370 fullsz /= 8;
1371
1372 tcg_gen_movi_i64(t, word);
1373 for (i = 0; i < setsz; i += 8) {
1374 tcg_gen_st_i64(t, cpu_env, ofs + i);
1375 }
1376 if (lastword != word) {
1377 tcg_gen_movi_i64(t, lastword);
1378 tcg_gen_st_i64(t, cpu_env, ofs + i);
1379 i += 8;
1380 }
1381 if (i < fullsz) {
1382 tcg_gen_movi_i64(t, 0);
1383 for (; i < fullsz; i += 8) {
1384 tcg_gen_st_i64(t, cpu_env, ofs + i);
1385 }
1386 }
1387
1388 done:
1389 tcg_temp_free_i64(t);
1390
1391 /* PTRUES */
1392 if (setflag) {
1393 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1394 tcg_gen_movi_i32(cpu_CF, word == 0);
1395 tcg_gen_movi_i32(cpu_VF, 0);
1396 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1397 }
1398 return true;
1399}
1400
1401static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1402{
1403 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1404}
1405
1406static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1407{
1408 /* Note pat == 31 is #all, to set all elements. */
1409 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1410}
1411
1412static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1413{
1414 /* Note pat == 32 is #unimp, to set no elements. */
1415 return do_predset(s, 0, a->rd, 32, false);
1416}
1417
1418static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1419{
1420 /* The path through do_pppp_flags is complicated enough to want to avoid
1421 * duplication. Frob the arguments into the form of a predicated AND.
1422 */
1423 arg_rprr_s alt_a = {
1424 .rd = a->rd, .pg = a->pg, .s = a->s,
1425 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1426 };
1427 return trans_AND_pppp(s, &alt_a, insn);
1428}
1429
1430static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1431{
1432 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1433}
1434
1435static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1436{
1437 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1438}
1439
1440static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1441 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1442 TCGv_ptr, TCGv_i32))
1443{
1444 if (!sve_access_check(s)) {
1445 return true;
1446 }
1447
1448 TCGv_ptr t_pd = tcg_temp_new_ptr();
1449 TCGv_ptr t_pg = tcg_temp_new_ptr();
1450 TCGv_i32 t;
1451 unsigned desc;
1452
1453 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1454 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1455
1456 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1457 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1458 t = tcg_const_i32(desc);
1459
1460 gen_fn(t, t_pd, t_pg, t);
1461 tcg_temp_free_ptr(t_pd);
1462 tcg_temp_free_ptr(t_pg);
1463
1464 do_pred_flags(t);
1465 tcg_temp_free_i32(t);
1466 return true;
1467}
1468
1469static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1470{
1471 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1472}
1473
1474static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1475{
1476 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1477}
1478
d1822297
RH
1479/*
1480 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
1481 */
1482
1483/* Subroutine loading a vector register at VOFS of LEN bytes.
1484 * The load should begin at the address Rn + IMM.
1485 */
1486
1487static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
1488 int rn, int imm)
1489{
1490 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
1491 uint32_t len_remain = len % 8;
1492 uint32_t nparts = len / 8 + ctpop8(len_remain);
1493 int midx = get_mem_index(s);
1494 TCGv_i64 addr, t0, t1;
1495
1496 addr = tcg_temp_new_i64();
1497 t0 = tcg_temp_new_i64();
1498
1499 /* Note that unpredicated load/store of vector/predicate registers
1500 * are defined as a stream of bytes, which equates to little-endian
1501 * operations on larger quantities. There is no nice way to force
1502 * a little-endian load for aarch64_be-linux-user out of line.
1503 *
1504 * Attempt to keep code expansion to a minimum by limiting the
1505 * amount of unrolling done.
1506 */
1507 if (nparts <= 4) {
1508 int i;
1509
1510 for (i = 0; i < len_align; i += 8) {
1511 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
1512 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1513 tcg_gen_st_i64(t0, cpu_env, vofs + i);
1514 }
1515 } else {
1516 TCGLabel *loop = gen_new_label();
1517 TCGv_ptr tp, i = tcg_const_local_ptr(0);
1518
1519 gen_set_label(loop);
1520
1521 /* Minimize the number of local temps that must be re-read from
1522 * the stack each iteration. Instead, re-compute values other
1523 * than the loop counter.
1524 */
1525 tp = tcg_temp_new_ptr();
1526 tcg_gen_addi_ptr(tp, i, imm);
1527 tcg_gen_extu_ptr_i64(addr, tp);
1528 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
1529
1530 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
1531
1532 tcg_gen_add_ptr(tp, cpu_env, i);
1533 tcg_gen_addi_ptr(i, i, 8);
1534 tcg_gen_st_i64(t0, tp, vofs);
1535 tcg_temp_free_ptr(tp);
1536
1537 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
1538 tcg_temp_free_ptr(i);
1539 }
1540
1541 /* Predicate register loads can be any multiple of 2.
1542 * Note that we still store the entire 64-bit unit into cpu_env.
1543 */
1544 if (len_remain) {
1545 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
1546
1547 switch (len_remain) {
1548 case 2:
1549 case 4:
1550 case 8:
1551 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
1552 break;
1553
1554 case 6:
1555 t1 = tcg_temp_new_i64();
1556 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
1557 tcg_gen_addi_i64(addr, addr, 4);
1558 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
1559 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
1560 tcg_temp_free_i64(t1);
1561 break;
1562
1563 default:
1564 g_assert_not_reached();
1565 }
1566 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
1567 }
1568 tcg_temp_free_i64(addr);
1569 tcg_temp_free_i64(t0);
1570}
1571
1572static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
1573{
1574 if (sve_access_check(s)) {
1575 int size = vec_full_reg_size(s);
1576 int off = vec_full_reg_offset(s, a->rd);
1577 do_ldr(s, off, size, a->rn, a->imm * size);
1578 }
1579 return true;
1580}
1581
1582static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
1583{
1584 if (sve_access_check(s)) {
1585 int size = pred_full_reg_size(s);
1586 int off = pred_full_reg_offset(s, a->rd);
1587 do_ldr(s, off, size, a->rn, a->imm * size);
1588 }
1589 return true;
1590}
This page took 0.184849 seconds and 4 git commands to generate.