2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
46 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
47 TCGv_ptr, TCGv_i64, TCGv_i32);
50 * Helpers for extracting complex instruction fields.
53 /* See e.g. ASR (immediate, predicated).
54 * Returns -1 for unallocated encoding; diagnose later.
56 static int tszimm_esz(int x)
58 x >>= 3; /* discard imm3 */
62 static int tszimm_shr(int x)
64 return (16 << tszimm_esz(x)) - x;
67 /* See e.g. LSL (immediate, predicated). */
68 static int tszimm_shl(int x)
70 return x - (8 << tszimm_esz(x));
73 static inline int plus1(int x)
78 /* The SH bit is in bit 8. Extract the low 8 and shift. */
79 static inline int expand_imm_sh8s(int x)
81 return (int8_t)x << (x & 0x100 ? 8 : 0);
84 static inline int expand_imm_sh8u(int x)
86 return (uint8_t)x << (x & 0x100 ? 8 : 0);
89 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
90 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
92 static inline int msz_dtype(int msz)
94 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
99 * Include the generated decoder.
102 #include "decode-sve.inc.c"
105 * Implement all of the translator functions referenced by the decoder.
108 /* Return the offset info CPUARMState of the predicate vector register Pn.
109 * Note for this purpose, FFR is P16.
111 static inline int pred_full_reg_offset(DisasContext *s, int regno)
113 return offsetof(CPUARMState, vfp.pregs[regno]);
116 /* Return the byte size of the whole predicate register, VL / 64. */
117 static inline int pred_full_reg_size(DisasContext *s)
119 return s->sve_len >> 3;
122 /* Round up the size of a register to a size allowed by
123 * the tcg vector infrastructure. Any operation which uses this
124 * size may assume that the bits above pred_full_reg_size are zero,
125 * and must leave them the same way.
127 * Note that this is not needed for the vector registers as they
128 * are always properly sized for tcg vectors.
130 static int size_for_gvec(int size)
135 return QEMU_ALIGN_UP(size, 16);
139 static int pred_gvec_reg_size(DisasContext *s)
141 return size_for_gvec(pred_full_reg_size(s));
144 /* Invoke a vector expander on two Zregs. */
145 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
146 int esz, int rd, int rn)
148 if (sve_access_check(s)) {
149 unsigned vsz = vec_full_reg_size(s);
150 gvec_fn(esz, vec_full_reg_offset(s, rd),
151 vec_full_reg_offset(s, rn), vsz, vsz);
156 /* Invoke a vector expander on three Zregs. */
157 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
158 int esz, int rd, int rn, int rm)
160 if (sve_access_check(s)) {
161 unsigned vsz = vec_full_reg_size(s);
162 gvec_fn(esz, vec_full_reg_offset(s, rd),
163 vec_full_reg_offset(s, rn),
164 vec_full_reg_offset(s, rm), vsz, vsz);
169 /* Invoke a vector move on two Zregs. */
170 static bool do_mov_z(DisasContext *s, int rd, int rn)
172 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
175 /* Initialize a Zreg with replications of a 64-bit immediate. */
176 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
178 unsigned vsz = vec_full_reg_size(s);
179 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
182 /* Invoke a vector expander on two Pregs. */
183 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
184 int esz, int rd, int rn)
186 if (sve_access_check(s)) {
187 unsigned psz = pred_gvec_reg_size(s);
188 gvec_fn(esz, pred_full_reg_offset(s, rd),
189 pred_full_reg_offset(s, rn), psz, psz);
194 /* Invoke a vector expander on three Pregs. */
195 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
196 int esz, int rd, int rn, int rm)
198 if (sve_access_check(s)) {
199 unsigned psz = pred_gvec_reg_size(s);
200 gvec_fn(esz, pred_full_reg_offset(s, rd),
201 pred_full_reg_offset(s, rn),
202 pred_full_reg_offset(s, rm), psz, psz);
207 /* Invoke a vector operation on four Pregs. */
208 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
209 int rd, int rn, int rm, int rg)
211 if (sve_access_check(s)) {
212 unsigned psz = pred_gvec_reg_size(s);
213 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
214 pred_full_reg_offset(s, rn),
215 pred_full_reg_offset(s, rm),
216 pred_full_reg_offset(s, rg),
222 /* Invoke a vector move on two Pregs. */
223 static bool do_mov_p(DisasContext *s, int rd, int rn)
225 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
228 /* Set the cpu flags as per a return from an SVE helper. */
229 static void do_pred_flags(TCGv_i32 t)
231 tcg_gen_mov_i32(cpu_NF, t);
232 tcg_gen_andi_i32(cpu_ZF, t, 2);
233 tcg_gen_andi_i32(cpu_CF, t, 1);
234 tcg_gen_movi_i32(cpu_VF, 0);
237 /* Subroutines computing the ARM PredTest psuedofunction. */
238 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
240 TCGv_i32 t = tcg_temp_new_i32();
242 gen_helper_sve_predtest1(t, d, g);
244 tcg_temp_free_i32(t);
247 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
249 TCGv_ptr dptr = tcg_temp_new_ptr();
250 TCGv_ptr gptr = tcg_temp_new_ptr();
253 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
254 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
255 t = tcg_const_i32(words);
257 gen_helper_sve_predtest(t, dptr, gptr, t);
258 tcg_temp_free_ptr(dptr);
259 tcg_temp_free_ptr(gptr);
262 tcg_temp_free_i32(t);
265 /* For each element size, the bits within a predicate word that are active. */
266 const uint64_t pred_esz_masks[4] = {
267 0xffffffffffffffffull, 0x5555555555555555ull,
268 0x1111111111111111ull, 0x0101010101010101ull
272 *** SVE Logical - Unpredicated Group
275 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
277 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
280 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
282 if (a->rn == a->rm) { /* MOV */
283 return do_mov_z(s, a->rd, a->rn);
285 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
289 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
291 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
294 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
296 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
300 *** SVE Integer Arithmetic - Unpredicated Group
303 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
305 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
308 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
310 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
313 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
315 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
318 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
320 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
323 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
325 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
328 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
330 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
334 *** SVE Integer Arithmetic - Binary Predicated Group
337 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
339 unsigned vsz = vec_full_reg_size(s);
343 if (sve_access_check(s)) {
344 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
345 vec_full_reg_offset(s, a->rn),
346 vec_full_reg_offset(s, a->rm),
347 pred_full_reg_offset(s, a->pg),
353 #define DO_ZPZZ(NAME, name) \
354 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
357 static gen_helper_gvec_4 * const fns[4] = { \
358 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
359 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
361 return do_zpzz_ool(s, a, fns[a->esz]); \
380 DO_ZPZZ(SMULH, smulh)
381 DO_ZPZZ(UMULH, umulh)
387 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
389 static gen_helper_gvec_4 * const fns[4] = {
390 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
392 return do_zpzz_ool(s, a, fns[a->esz]);
395 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
397 static gen_helper_gvec_4 * const fns[4] = {
398 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
400 return do_zpzz_ool(s, a, fns[a->esz]);
408 *** SVE Integer Arithmetic - Unary Predicated Group
411 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
416 if (sve_access_check(s)) {
417 unsigned vsz = vec_full_reg_size(s);
418 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
419 vec_full_reg_offset(s, a->rn),
420 pred_full_reg_offset(s, a->pg),
426 #define DO_ZPZ(NAME, name) \
427 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
429 static gen_helper_gvec_3 * const fns[4] = { \
430 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
431 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
433 return do_zpz_ool(s, a, fns[a->esz]); \
438 DO_ZPZ(CNT_zpz, cnt_zpz)
440 DO_ZPZ(NOT_zpz, not_zpz)
444 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
446 static gen_helper_gvec_3 * const fns[4] = {
448 gen_helper_sve_fabs_h,
449 gen_helper_sve_fabs_s,
450 gen_helper_sve_fabs_d
452 return do_zpz_ool(s, a, fns[a->esz]);
455 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
457 static gen_helper_gvec_3 * const fns[4] = {
459 gen_helper_sve_fneg_h,
460 gen_helper_sve_fneg_s,
461 gen_helper_sve_fneg_d
463 return do_zpz_ool(s, a, fns[a->esz]);
466 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
468 static gen_helper_gvec_3 * const fns[4] = {
470 gen_helper_sve_sxtb_h,
471 gen_helper_sve_sxtb_s,
472 gen_helper_sve_sxtb_d
474 return do_zpz_ool(s, a, fns[a->esz]);
477 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
479 static gen_helper_gvec_3 * const fns[4] = {
481 gen_helper_sve_uxtb_h,
482 gen_helper_sve_uxtb_s,
483 gen_helper_sve_uxtb_d
485 return do_zpz_ool(s, a, fns[a->esz]);
488 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
490 static gen_helper_gvec_3 * const fns[4] = {
492 gen_helper_sve_sxth_s,
493 gen_helper_sve_sxth_d
495 return do_zpz_ool(s, a, fns[a->esz]);
498 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
500 static gen_helper_gvec_3 * const fns[4] = {
502 gen_helper_sve_uxth_s,
503 gen_helper_sve_uxth_d
505 return do_zpz_ool(s, a, fns[a->esz]);
508 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
510 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
513 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
515 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
521 *** SVE Integer Reduction Group
524 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
525 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
526 gen_helper_gvec_reduc *fn)
528 unsigned vsz = vec_full_reg_size(s);
536 if (!sve_access_check(s)) {
540 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
541 temp = tcg_temp_new_i64();
542 t_zn = tcg_temp_new_ptr();
543 t_pg = tcg_temp_new_ptr();
545 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
546 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
547 fn(temp, t_zn, t_pg, desc);
548 tcg_temp_free_ptr(t_zn);
549 tcg_temp_free_ptr(t_pg);
550 tcg_temp_free_i32(desc);
552 write_fp_dreg(s, a->rd, temp);
553 tcg_temp_free_i64(temp);
557 #define DO_VPZ(NAME, name) \
558 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
560 static gen_helper_gvec_reduc * const fns[4] = { \
561 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
562 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
564 return do_vpz_ool(s, a, fns[a->esz]); \
577 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
579 static gen_helper_gvec_reduc * const fns[4] = {
580 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
581 gen_helper_sve_saddv_s, NULL
583 return do_vpz_ool(s, a, fns[a->esz]);
589 *** SVE Shift by Immediate - Predicated Group
592 /* Store zero into every active element of Zd. We will use this for two
593 * and three-operand predicated instructions for which logic dictates a
596 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
598 static gen_helper_gvec_2 * const fns[4] = {
599 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
600 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
602 if (sve_access_check(s)) {
603 unsigned vsz = vec_full_reg_size(s);
604 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
605 pred_full_reg_offset(s, pg),
606 vsz, vsz, 0, fns[esz]);
611 /* Copy Zn into Zd, storing zeros into inactive elements. */
612 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
614 static gen_helper_gvec_3 * const fns[4] = {
615 gen_helper_sve_movz_b, gen_helper_sve_movz_h,
616 gen_helper_sve_movz_s, gen_helper_sve_movz_d,
618 unsigned vsz = vec_full_reg_size(s);
619 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
620 vec_full_reg_offset(s, rn),
621 pred_full_reg_offset(s, pg),
622 vsz, vsz, 0, fns[esz]);
625 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
626 gen_helper_gvec_3 *fn)
628 if (sve_access_check(s)) {
629 unsigned vsz = vec_full_reg_size(s);
630 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
631 vec_full_reg_offset(s, a->rn),
632 pred_full_reg_offset(s, a->pg),
633 vsz, vsz, a->imm, fn);
638 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
640 static gen_helper_gvec_3 * const fns[4] = {
641 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
642 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
645 /* Invalid tsz encoding -- see tszimm_esz. */
648 /* Shift by element size is architecturally valid. For
649 arithmetic right-shift, it's the same as by one less. */
650 a->imm = MIN(a->imm, (8 << a->esz) - 1);
651 return do_zpzi_ool(s, a, fns[a->esz]);
654 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
656 static gen_helper_gvec_3 * const fns[4] = {
657 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
658 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
663 /* Shift by element size is architecturally valid.
664 For logical shifts, it is a zeroing operation. */
665 if (a->imm >= (8 << a->esz)) {
666 return do_clr_zp(s, a->rd, a->pg, a->esz);
668 return do_zpzi_ool(s, a, fns[a->esz]);
672 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
674 static gen_helper_gvec_3 * const fns[4] = {
675 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
676 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
681 /* Shift by element size is architecturally valid.
682 For logical shifts, it is a zeroing operation. */
683 if (a->imm >= (8 << a->esz)) {
684 return do_clr_zp(s, a->rd, a->pg, a->esz);
686 return do_zpzi_ool(s, a, fns[a->esz]);
690 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
692 static gen_helper_gvec_3 * const fns[4] = {
693 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
694 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
699 /* Shift by element size is architecturally valid. For arithmetic
700 right shift for division, it is a zeroing operation. */
701 if (a->imm >= (8 << a->esz)) {
702 return do_clr_zp(s, a->rd, a->pg, a->esz);
704 return do_zpzi_ool(s, a, fns[a->esz]);
709 *** SVE Bitwise Shift - Predicated Group
712 #define DO_ZPZW(NAME, name) \
713 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
716 static gen_helper_gvec_4 * const fns[3] = { \
717 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
718 gen_helper_sve_##name##_zpzw_s, \
720 if (a->esz < 0 || a->esz >= 3) { \
723 return do_zpzz_ool(s, a, fns[a->esz]); \
733 *** SVE Bitwise Shift - Unpredicated Group
736 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
737 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
738 int64_t, uint32_t, uint32_t))
741 /* Invalid tsz encoding -- see tszimm_esz. */
744 if (sve_access_check(s)) {
745 unsigned vsz = vec_full_reg_size(s);
746 /* Shift by element size is architecturally valid. For
747 arithmetic right-shift, it's the same as by one less.
748 Otherwise it is a zeroing operation. */
749 if (a->imm >= 8 << a->esz) {
751 a->imm = (8 << a->esz) - 1;
753 do_dupi_z(s, a->rd, 0);
757 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
758 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
763 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
765 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
768 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
770 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
773 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
775 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
778 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
783 if (sve_access_check(s)) {
784 unsigned vsz = vec_full_reg_size(s);
785 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
786 vec_full_reg_offset(s, a->rn),
787 vec_full_reg_offset(s, a->rm),
793 #define DO_ZZW(NAME, name) \
794 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
797 static gen_helper_gvec_3 * const fns[4] = { \
798 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
799 gen_helper_sve_##name##_zzw_s, NULL \
801 return do_zzw_ool(s, a, fns[a->esz]); \
811 *** SVE Integer Multiply-Add Group
814 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
815 gen_helper_gvec_5 *fn)
817 if (sve_access_check(s)) {
818 unsigned vsz = vec_full_reg_size(s);
819 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
820 vec_full_reg_offset(s, a->ra),
821 vec_full_reg_offset(s, a->rn),
822 vec_full_reg_offset(s, a->rm),
823 pred_full_reg_offset(s, a->pg),
829 #define DO_ZPZZZ(NAME, name) \
830 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
832 static gen_helper_gvec_5 * const fns[4] = { \
833 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
834 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
836 return do_zpzzz_ool(s, a, fns[a->esz]); \
845 *** SVE Index Generation Group
848 static void do_index(DisasContext *s, int esz, int rd,
849 TCGv_i64 start, TCGv_i64 incr)
851 unsigned vsz = vec_full_reg_size(s);
852 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
853 TCGv_ptr t_zd = tcg_temp_new_ptr();
855 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
857 gen_helper_sve_index_d(t_zd, start, incr, desc);
859 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
860 static index_fn * const fns[3] = {
861 gen_helper_sve_index_b,
862 gen_helper_sve_index_h,
863 gen_helper_sve_index_s,
865 TCGv_i32 s32 = tcg_temp_new_i32();
866 TCGv_i32 i32 = tcg_temp_new_i32();
868 tcg_gen_extrl_i64_i32(s32, start);
869 tcg_gen_extrl_i64_i32(i32, incr);
870 fns[esz](t_zd, s32, i32, desc);
872 tcg_temp_free_i32(s32);
873 tcg_temp_free_i32(i32);
875 tcg_temp_free_ptr(t_zd);
876 tcg_temp_free_i32(desc);
879 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
881 if (sve_access_check(s)) {
882 TCGv_i64 start = tcg_const_i64(a->imm1);
883 TCGv_i64 incr = tcg_const_i64(a->imm2);
884 do_index(s, a->esz, a->rd, start, incr);
885 tcg_temp_free_i64(start);
886 tcg_temp_free_i64(incr);
891 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
893 if (sve_access_check(s)) {
894 TCGv_i64 start = tcg_const_i64(a->imm);
895 TCGv_i64 incr = cpu_reg(s, a->rm);
896 do_index(s, a->esz, a->rd, start, incr);
897 tcg_temp_free_i64(start);
902 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
904 if (sve_access_check(s)) {
905 TCGv_i64 start = cpu_reg(s, a->rn);
906 TCGv_i64 incr = tcg_const_i64(a->imm);
907 do_index(s, a->esz, a->rd, start, incr);
908 tcg_temp_free_i64(incr);
913 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
915 if (sve_access_check(s)) {
916 TCGv_i64 start = cpu_reg(s, a->rn);
917 TCGv_i64 incr = cpu_reg(s, a->rm);
918 do_index(s, a->esz, a->rd, start, incr);
924 *** SVE Stack Allocation Group
927 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
929 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
930 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
931 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
935 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
937 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
938 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
939 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
943 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
945 TCGv_i64 reg = cpu_reg(s, a->rd);
946 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
951 *** SVE Compute Vector Address Group
954 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
956 if (sve_access_check(s)) {
957 unsigned vsz = vec_full_reg_size(s);
958 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
959 vec_full_reg_offset(s, a->rn),
960 vec_full_reg_offset(s, a->rm),
961 vsz, vsz, a->imm, fn);
966 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
968 return do_adr(s, a, gen_helper_sve_adr_p32);
971 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
973 return do_adr(s, a, gen_helper_sve_adr_p64);
976 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
978 return do_adr(s, a, gen_helper_sve_adr_s32);
981 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
983 return do_adr(s, a, gen_helper_sve_adr_u32);
987 *** SVE Integer Misc - Unpredicated Group
990 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
992 static gen_helper_gvec_2 * const fns[4] = {
994 gen_helper_sve_fexpa_h,
995 gen_helper_sve_fexpa_s,
996 gen_helper_sve_fexpa_d,
1001 if (sve_access_check(s)) {
1002 unsigned vsz = vec_full_reg_size(s);
1003 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1004 vec_full_reg_offset(s, a->rn),
1005 vsz, vsz, 0, fns[a->esz]);
1010 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1012 static gen_helper_gvec_3 * const fns[4] = {
1014 gen_helper_sve_ftssel_h,
1015 gen_helper_sve_ftssel_s,
1016 gen_helper_sve_ftssel_d,
1021 if (sve_access_check(s)) {
1022 unsigned vsz = vec_full_reg_size(s);
1023 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1024 vec_full_reg_offset(s, a->rn),
1025 vec_full_reg_offset(s, a->rm),
1026 vsz, vsz, 0, fns[a->esz]);
1032 *** SVE Predicate Logical Operations Group
1035 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1036 const GVecGen4 *gvec_op)
1038 if (!sve_access_check(s)) {
1042 unsigned psz = pred_gvec_reg_size(s);
1043 int dofs = pred_full_reg_offset(s, a->rd);
1044 int nofs = pred_full_reg_offset(s, a->rn);
1045 int mofs = pred_full_reg_offset(s, a->rm);
1046 int gofs = pred_full_reg_offset(s, a->pg);
1049 /* Do the operation and the flags generation in temps. */
1050 TCGv_i64 pd = tcg_temp_new_i64();
1051 TCGv_i64 pn = tcg_temp_new_i64();
1052 TCGv_i64 pm = tcg_temp_new_i64();
1053 TCGv_i64 pg = tcg_temp_new_i64();
1055 tcg_gen_ld_i64(pn, cpu_env, nofs);
1056 tcg_gen_ld_i64(pm, cpu_env, mofs);
1057 tcg_gen_ld_i64(pg, cpu_env, gofs);
1059 gvec_op->fni8(pd, pn, pm, pg);
1060 tcg_gen_st_i64(pd, cpu_env, dofs);
1062 do_predtest1(pd, pg);
1064 tcg_temp_free_i64(pd);
1065 tcg_temp_free_i64(pn);
1066 tcg_temp_free_i64(pm);
1067 tcg_temp_free_i64(pg);
1069 /* The operation and flags generation is large. The computation
1070 * of the flags depends on the original contents of the guarding
1071 * predicate. If the destination overwrites the guarding predicate,
1072 * then the easiest way to get this right is to save a copy.
1075 if (a->rd == a->pg) {
1076 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1077 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1080 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1081 do_predtest(s, dofs, tofs, psz / 8);
1086 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1088 tcg_gen_and_i64(pd, pn, pm);
1089 tcg_gen_and_i64(pd, pd, pg);
1092 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1093 TCGv_vec pm, TCGv_vec pg)
1095 tcg_gen_and_vec(vece, pd, pn, pm);
1096 tcg_gen_and_vec(vece, pd, pd, pg);
1099 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1101 static const GVecGen4 op = {
1102 .fni8 = gen_and_pg_i64,
1103 .fniv = gen_and_pg_vec,
1104 .fno = gen_helper_sve_and_pppp,
1105 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1108 return do_pppp_flags(s, a, &op);
1109 } else if (a->rn == a->rm) {
1110 if (a->pg == a->rn) {
1111 return do_mov_p(s, a->rd, a->rn);
1113 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1115 } else if (a->pg == a->rn || a->pg == a->rm) {
1116 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1118 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1122 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1124 tcg_gen_andc_i64(pd, pn, pm);
1125 tcg_gen_and_i64(pd, pd, pg);
1128 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1129 TCGv_vec pm, TCGv_vec pg)
1131 tcg_gen_andc_vec(vece, pd, pn, pm);
1132 tcg_gen_and_vec(vece, pd, pd, pg);
1135 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1137 static const GVecGen4 op = {
1138 .fni8 = gen_bic_pg_i64,
1139 .fniv = gen_bic_pg_vec,
1140 .fno = gen_helper_sve_bic_pppp,
1141 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1144 return do_pppp_flags(s, a, &op);
1145 } else if (a->pg == a->rn) {
1146 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1148 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1152 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1154 tcg_gen_xor_i64(pd, pn, pm);
1155 tcg_gen_and_i64(pd, pd, pg);
1158 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1159 TCGv_vec pm, TCGv_vec pg)
1161 tcg_gen_xor_vec(vece, pd, pn, pm);
1162 tcg_gen_and_vec(vece, pd, pd, pg);
1165 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1167 static const GVecGen4 op = {
1168 .fni8 = gen_eor_pg_i64,
1169 .fniv = gen_eor_pg_vec,
1170 .fno = gen_helper_sve_eor_pppp,
1171 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1174 return do_pppp_flags(s, a, &op);
1176 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1180 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1182 tcg_gen_and_i64(pn, pn, pg);
1183 tcg_gen_andc_i64(pm, pm, pg);
1184 tcg_gen_or_i64(pd, pn, pm);
1187 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1188 TCGv_vec pm, TCGv_vec pg)
1190 tcg_gen_and_vec(vece, pn, pn, pg);
1191 tcg_gen_andc_vec(vece, pm, pm, pg);
1192 tcg_gen_or_vec(vece, pd, pn, pm);
1195 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1197 static const GVecGen4 op = {
1198 .fni8 = gen_sel_pg_i64,
1199 .fniv = gen_sel_pg_vec,
1200 .fno = gen_helper_sve_sel_pppp,
1201 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1206 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1210 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1212 tcg_gen_or_i64(pd, pn, pm);
1213 tcg_gen_and_i64(pd, pd, pg);
1216 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1217 TCGv_vec pm, TCGv_vec pg)
1219 tcg_gen_or_vec(vece, pd, pn, pm);
1220 tcg_gen_and_vec(vece, pd, pd, pg);
1223 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1225 static const GVecGen4 op = {
1226 .fni8 = gen_orr_pg_i64,
1227 .fniv = gen_orr_pg_vec,
1228 .fno = gen_helper_sve_orr_pppp,
1229 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1232 return do_pppp_flags(s, a, &op);
1233 } else if (a->pg == a->rn && a->rn == a->rm) {
1234 return do_mov_p(s, a->rd, a->rn);
1236 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1240 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1242 tcg_gen_orc_i64(pd, pn, pm);
1243 tcg_gen_and_i64(pd, pd, pg);
1246 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1247 TCGv_vec pm, TCGv_vec pg)
1249 tcg_gen_orc_vec(vece, pd, pn, pm);
1250 tcg_gen_and_vec(vece, pd, pd, pg);
1253 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1255 static const GVecGen4 op = {
1256 .fni8 = gen_orn_pg_i64,
1257 .fniv = gen_orn_pg_vec,
1258 .fno = gen_helper_sve_orn_pppp,
1259 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1262 return do_pppp_flags(s, a, &op);
1264 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1268 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1270 tcg_gen_or_i64(pd, pn, pm);
1271 tcg_gen_andc_i64(pd, pg, pd);
1274 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1275 TCGv_vec pm, TCGv_vec pg)
1277 tcg_gen_or_vec(vece, pd, pn, pm);
1278 tcg_gen_andc_vec(vece, pd, pg, pd);
1281 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1283 static const GVecGen4 op = {
1284 .fni8 = gen_nor_pg_i64,
1285 .fniv = gen_nor_pg_vec,
1286 .fno = gen_helper_sve_nor_pppp,
1287 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1290 return do_pppp_flags(s, a, &op);
1292 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1296 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1298 tcg_gen_and_i64(pd, pn, pm);
1299 tcg_gen_andc_i64(pd, pg, pd);
1302 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1303 TCGv_vec pm, TCGv_vec pg)
1305 tcg_gen_and_vec(vece, pd, pn, pm);
1306 tcg_gen_andc_vec(vece, pd, pg, pd);
1309 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1311 static const GVecGen4 op = {
1312 .fni8 = gen_nand_pg_i64,
1313 .fniv = gen_nand_pg_vec,
1314 .fno = gen_helper_sve_nand_pppp,
1315 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1318 return do_pppp_flags(s, a, &op);
1320 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1325 *** SVE Predicate Misc Group
1328 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1330 if (sve_access_check(s)) {
1331 int nofs = pred_full_reg_offset(s, a->rn);
1332 int gofs = pred_full_reg_offset(s, a->pg);
1333 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1336 TCGv_i64 pn = tcg_temp_new_i64();
1337 TCGv_i64 pg = tcg_temp_new_i64();
1339 tcg_gen_ld_i64(pn, cpu_env, nofs);
1340 tcg_gen_ld_i64(pg, cpu_env, gofs);
1341 do_predtest1(pn, pg);
1343 tcg_temp_free_i64(pn);
1344 tcg_temp_free_i64(pg);
1346 do_predtest(s, nofs, gofs, words);
1352 /* See the ARM pseudocode DecodePredCount. */
1353 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1355 unsigned elements = fullsz >> esz;
1359 case 0x0: /* POW2 */
1360 return pow2floor(elements);
1371 case 0x9: /* VL16 */
1372 case 0xa: /* VL32 */
1373 case 0xb: /* VL64 */
1374 case 0xc: /* VL128 */
1375 case 0xd: /* VL256 */
1376 bound = 16 << (pattern - 9);
1378 case 0x1d: /* MUL4 */
1379 return elements - elements % 4;
1380 case 0x1e: /* MUL3 */
1381 return elements - elements % 3;
1382 case 0x1f: /* ALL */
1384 default: /* #uimm5 */
1387 return elements >= bound ? bound : 0;
1390 /* This handles all of the predicate initialization instructions,
1391 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1392 * so that decode_pred_count returns 0. For SETFFR, we will have
1393 * set RD == 16 == FFR.
1395 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1397 if (!sve_access_check(s)) {
1401 unsigned fullsz = vec_full_reg_size(s);
1402 unsigned ofs = pred_full_reg_offset(s, rd);
1403 unsigned numelem, setsz, i;
1404 uint64_t word, lastword;
1407 numelem = decode_pred_count(fullsz, pat, esz);
1409 /* Determine what we must store into each bit, and how many. */
1411 lastword = word = 0;
1414 setsz = numelem << esz;
1415 lastword = word = pred_esz_masks[esz];
1417 lastword &= ~(-1ull << (setsz % 64));
1421 t = tcg_temp_new_i64();
1423 tcg_gen_movi_i64(t, lastword);
1424 tcg_gen_st_i64(t, cpu_env, ofs);
1428 if (word == lastword) {
1429 unsigned maxsz = size_for_gvec(fullsz / 8);
1430 unsigned oprsz = size_for_gvec(setsz / 8);
1432 if (oprsz * 8 == setsz) {
1433 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1436 if (oprsz * 8 == setsz + 8) {
1437 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1438 tcg_gen_movi_i64(t, 0);
1439 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1447 tcg_gen_movi_i64(t, word);
1448 for (i = 0; i < setsz; i += 8) {
1449 tcg_gen_st_i64(t, cpu_env, ofs + i);
1451 if (lastword != word) {
1452 tcg_gen_movi_i64(t, lastword);
1453 tcg_gen_st_i64(t, cpu_env, ofs + i);
1457 tcg_gen_movi_i64(t, 0);
1458 for (; i < fullsz; i += 8) {
1459 tcg_gen_st_i64(t, cpu_env, ofs + i);
1464 tcg_temp_free_i64(t);
1468 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1469 tcg_gen_movi_i32(cpu_CF, word == 0);
1470 tcg_gen_movi_i32(cpu_VF, 0);
1471 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1476 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1478 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1481 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1483 /* Note pat == 31 is #all, to set all elements. */
1484 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1487 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1489 /* Note pat == 32 is #unimp, to set no elements. */
1490 return do_predset(s, 0, a->rd, 32, false);
1493 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1495 /* The path through do_pppp_flags is complicated enough to want to avoid
1496 * duplication. Frob the arguments into the form of a predicated AND.
1498 arg_rprr_s alt_a = {
1499 .rd = a->rd, .pg = a->pg, .s = a->s,
1500 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1502 return trans_AND_pppp(s, &alt_a, insn);
1505 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1507 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1510 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1512 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1515 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1516 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1517 TCGv_ptr, TCGv_i32))
1519 if (!sve_access_check(s)) {
1523 TCGv_ptr t_pd = tcg_temp_new_ptr();
1524 TCGv_ptr t_pg = tcg_temp_new_ptr();
1528 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1529 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1531 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1532 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1533 t = tcg_const_i32(desc);
1535 gen_fn(t, t_pd, t_pg, t);
1536 tcg_temp_free_ptr(t_pd);
1537 tcg_temp_free_ptr(t_pg);
1540 tcg_temp_free_i32(t);
1544 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1546 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1549 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1551 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1555 *** SVE Element Count Group
1558 /* Perform an inline saturating addition of a 32-bit value within
1559 * a 64-bit register. The second operand is known to be positive,
1560 * which halves the comparisions we must perform to bound the result.
1562 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1568 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1570 tcg_gen_ext32u_i64(reg, reg);
1572 tcg_gen_ext32s_i64(reg, reg);
1575 tcg_gen_sub_i64(reg, reg, val);
1576 ibound = (u ? 0 : INT32_MIN);
1579 tcg_gen_add_i64(reg, reg, val);
1580 ibound = (u ? UINT32_MAX : INT32_MAX);
1583 bound = tcg_const_i64(ibound);
1584 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1585 tcg_temp_free_i64(bound);
1588 /* Similarly with 64-bit values. */
1589 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1591 TCGv_i64 t0 = tcg_temp_new_i64();
1592 TCGv_i64 t1 = tcg_temp_new_i64();
1597 tcg_gen_sub_i64(t0, reg, val);
1598 tcg_gen_movi_i64(t1, 0);
1599 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1601 tcg_gen_add_i64(t0, reg, val);
1602 tcg_gen_movi_i64(t1, -1);
1603 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1607 /* Detect signed overflow for subtraction. */
1608 tcg_gen_xor_i64(t0, reg, val);
1609 tcg_gen_sub_i64(t1, reg, val);
1610 tcg_gen_xor_i64(reg, reg, t0);
1611 tcg_gen_and_i64(t0, t0, reg);
1613 /* Bound the result. */
1614 tcg_gen_movi_i64(reg, INT64_MIN);
1615 t2 = tcg_const_i64(0);
1616 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1618 /* Detect signed overflow for addition. */
1619 tcg_gen_xor_i64(t0, reg, val);
1620 tcg_gen_add_i64(reg, reg, val);
1621 tcg_gen_xor_i64(t1, reg, val);
1622 tcg_gen_andc_i64(t0, t1, t0);
1624 /* Bound the result. */
1625 tcg_gen_movi_i64(t1, INT64_MAX);
1626 t2 = tcg_const_i64(0);
1627 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1629 tcg_temp_free_i64(t2);
1631 tcg_temp_free_i64(t0);
1632 tcg_temp_free_i64(t1);
1635 /* Similarly with a vector and a scalar operand. */
1636 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1637 TCGv_i64 val, bool u, bool d)
1639 unsigned vsz = vec_full_reg_size(s);
1640 TCGv_ptr dptr, nptr;
1644 dptr = tcg_temp_new_ptr();
1645 nptr = tcg_temp_new_ptr();
1646 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1647 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1648 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1652 t32 = tcg_temp_new_i32();
1653 tcg_gen_extrl_i64_i32(t32, val);
1655 tcg_gen_neg_i32(t32, t32);
1658 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1660 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1662 tcg_temp_free_i32(t32);
1666 t32 = tcg_temp_new_i32();
1667 tcg_gen_extrl_i64_i32(t32, val);
1669 tcg_gen_neg_i32(t32, t32);
1672 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1674 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1676 tcg_temp_free_i32(t32);
1680 t64 = tcg_temp_new_i64();
1682 tcg_gen_neg_i64(t64, val);
1684 tcg_gen_mov_i64(t64, val);
1687 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1689 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1691 tcg_temp_free_i64(t64);
1697 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1699 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1702 t64 = tcg_temp_new_i64();
1703 tcg_gen_neg_i64(t64, val);
1704 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1705 tcg_temp_free_i64(t64);
1707 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1712 g_assert_not_reached();
1715 tcg_temp_free_ptr(dptr);
1716 tcg_temp_free_ptr(nptr);
1717 tcg_temp_free_i32(desc);
1720 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1722 if (sve_access_check(s)) {
1723 unsigned fullsz = vec_full_reg_size(s);
1724 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1725 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1730 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1732 if (sve_access_check(s)) {
1733 unsigned fullsz = vec_full_reg_size(s);
1734 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1735 int inc = numelem * a->imm * (a->d ? -1 : 1);
1736 TCGv_i64 reg = cpu_reg(s, a->rd);
1738 tcg_gen_addi_i64(reg, reg, inc);
1743 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1746 if (!sve_access_check(s)) {
1750 unsigned fullsz = vec_full_reg_size(s);
1751 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752 int inc = numelem * a->imm;
1753 TCGv_i64 reg = cpu_reg(s, a->rd);
1755 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1758 tcg_gen_ext32u_i64(reg, reg);
1760 tcg_gen_ext32s_i64(reg, reg);
1763 TCGv_i64 t = tcg_const_i64(inc);
1764 do_sat_addsub_32(reg, t, a->u, a->d);
1765 tcg_temp_free_i64(t);
1770 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1773 if (!sve_access_check(s)) {
1777 unsigned fullsz = vec_full_reg_size(s);
1778 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1779 int inc = numelem * a->imm;
1780 TCGv_i64 reg = cpu_reg(s, a->rd);
1783 TCGv_i64 t = tcg_const_i64(inc);
1784 do_sat_addsub_64(reg, t, a->u, a->d);
1785 tcg_temp_free_i64(t);
1790 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1796 unsigned fullsz = vec_full_reg_size(s);
1797 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1798 int inc = numelem * a->imm;
1801 if (sve_access_check(s)) {
1802 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1803 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1804 vec_full_reg_offset(s, a->rn),
1806 tcg_temp_free_i64(t);
1809 do_mov_z(s, a->rd, a->rn);
1814 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1821 unsigned fullsz = vec_full_reg_size(s);
1822 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1823 int inc = numelem * a->imm;
1826 if (sve_access_check(s)) {
1827 TCGv_i64 t = tcg_const_i64(inc);
1828 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1829 tcg_temp_free_i64(t);
1832 do_mov_z(s, a->rd, a->rn);
1838 *** SVE Bitwise Immediate Group
1841 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1844 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1845 extract32(a->dbm, 0, 6),
1846 extract32(a->dbm, 6, 6))) {
1849 if (sve_access_check(s)) {
1850 unsigned vsz = vec_full_reg_size(s);
1851 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1852 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1857 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1859 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1862 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1864 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1867 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1869 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1872 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1875 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1876 extract32(a->dbm, 0, 6),
1877 extract32(a->dbm, 6, 6))) {
1880 if (sve_access_check(s)) {
1881 do_dupi_z(s, a->rd, imm);
1887 *** SVE Integer Wide Immediate - Predicated Group
1890 /* Implement all merging copies. This is used for CPY (immediate),
1891 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1893 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1896 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1897 static gen_cpy * const fns[4] = {
1898 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1899 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1901 unsigned vsz = vec_full_reg_size(s);
1902 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1903 TCGv_ptr t_zd = tcg_temp_new_ptr();
1904 TCGv_ptr t_zn = tcg_temp_new_ptr();
1905 TCGv_ptr t_pg = tcg_temp_new_ptr();
1907 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1908 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1909 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1911 fns[esz](t_zd, t_zn, t_pg, val, desc);
1913 tcg_temp_free_ptr(t_zd);
1914 tcg_temp_free_ptr(t_zn);
1915 tcg_temp_free_ptr(t_pg);
1916 tcg_temp_free_i32(desc);
1919 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1924 if (sve_access_check(s)) {
1925 /* Decode the VFP immediate. */
1926 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1927 TCGv_i64 t_imm = tcg_const_i64(imm);
1928 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1929 tcg_temp_free_i64(t_imm);
1934 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1936 if (a->esz == 0 && extract32(insn, 13, 1)) {
1939 if (sve_access_check(s)) {
1940 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1941 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1942 tcg_temp_free_i64(t_imm);
1947 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1949 static gen_helper_gvec_2i * const fns[4] = {
1950 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1951 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1954 if (a->esz == 0 && extract32(insn, 13, 1)) {
1957 if (sve_access_check(s)) {
1958 unsigned vsz = vec_full_reg_size(s);
1959 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1960 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1961 pred_full_reg_offset(s, a->pg),
1962 t_imm, vsz, vsz, 0, fns[a->esz]);
1963 tcg_temp_free_i64(t_imm);
1969 *** SVE Permute Extract Group
1972 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1974 if (!sve_access_check(s)) {
1978 unsigned vsz = vec_full_reg_size(s);
1979 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1980 unsigned n_siz = vsz - n_ofs;
1981 unsigned d = vec_full_reg_offset(s, a->rd);
1982 unsigned n = vec_full_reg_offset(s, a->rn);
1983 unsigned m = vec_full_reg_offset(s, a->rm);
1985 /* Use host vector move insns if we have appropriate sizes
1986 * and no unfortunate overlap.
1989 && n_ofs == size_for_gvec(n_ofs)
1990 && n_siz == size_for_gvec(n_siz)
1991 && (d != n || n_siz <= n_ofs)) {
1992 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1994 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1997 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2003 *** SVE Permute - Unpredicated Group
2006 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2008 if (sve_access_check(s)) {
2009 unsigned vsz = vec_full_reg_size(s);
2010 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2011 vsz, vsz, cpu_reg_sp(s, a->rn));
2016 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2018 if ((a->imm & 0x1f) == 0) {
2021 if (sve_access_check(s)) {
2022 unsigned vsz = vec_full_reg_size(s);
2023 unsigned dofs = vec_full_reg_offset(s, a->rd);
2024 unsigned esz, index;
2026 esz = ctz32(a->imm);
2027 index = a->imm >> (esz + 1);
2029 if ((index << esz) < vsz) {
2030 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2031 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2033 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2039 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2041 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2042 static gen_insr * const fns[4] = {
2043 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2044 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2046 unsigned vsz = vec_full_reg_size(s);
2047 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2048 TCGv_ptr t_zd = tcg_temp_new_ptr();
2049 TCGv_ptr t_zn = tcg_temp_new_ptr();
2051 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2052 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2054 fns[a->esz](t_zd, t_zn, val, desc);
2056 tcg_temp_free_ptr(t_zd);
2057 tcg_temp_free_ptr(t_zn);
2058 tcg_temp_free_i32(desc);
2061 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2063 if (sve_access_check(s)) {
2064 TCGv_i64 t = tcg_temp_new_i64();
2065 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2066 do_insr_i64(s, a, t);
2067 tcg_temp_free_i64(t);
2072 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2074 if (sve_access_check(s)) {
2075 do_insr_i64(s, a, cpu_reg(s, a->rm));
2080 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2082 static gen_helper_gvec_2 * const fns[4] = {
2083 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2084 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2087 if (sve_access_check(s)) {
2088 unsigned vsz = vec_full_reg_size(s);
2089 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2090 vec_full_reg_offset(s, a->rn),
2091 vsz, vsz, 0, fns[a->esz]);
2096 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2098 static gen_helper_gvec_3 * const fns[4] = {
2099 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2100 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2103 if (sve_access_check(s)) {
2104 unsigned vsz = vec_full_reg_size(s);
2105 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2106 vec_full_reg_offset(s, a->rn),
2107 vec_full_reg_offset(s, a->rm),
2108 vsz, vsz, 0, fns[a->esz]);
2113 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2115 static gen_helper_gvec_2 * const fns[4][2] = {
2117 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2118 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2119 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2125 if (sve_access_check(s)) {
2126 unsigned vsz = vec_full_reg_size(s);
2127 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2128 vec_full_reg_offset(s, a->rn)
2129 + (a->h ? vsz / 2 : 0),
2130 vsz, vsz, 0, fns[a->esz][a->u]);
2136 *** SVE Permute - Predicates Group
2139 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2140 gen_helper_gvec_3 *fn)
2142 if (!sve_access_check(s)) {
2146 unsigned vsz = pred_full_reg_size(s);
2148 /* Predicate sizes may be smaller and cannot use simd_desc.
2149 We cannot round up, as we do elsewhere, because we need
2150 the exact size for ZIP2 and REV. We retain the style for
2151 the other helpers for consistency. */
2152 TCGv_ptr t_d = tcg_temp_new_ptr();
2153 TCGv_ptr t_n = tcg_temp_new_ptr();
2154 TCGv_ptr t_m = tcg_temp_new_ptr();
2159 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2160 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2162 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2163 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2164 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2165 t_desc = tcg_const_i32(desc);
2167 fn(t_d, t_n, t_m, t_desc);
2169 tcg_temp_free_ptr(t_d);
2170 tcg_temp_free_ptr(t_n);
2171 tcg_temp_free_ptr(t_m);
2172 tcg_temp_free_i32(t_desc);
2176 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2177 gen_helper_gvec_2 *fn)
2179 if (!sve_access_check(s)) {
2183 unsigned vsz = pred_full_reg_size(s);
2184 TCGv_ptr t_d = tcg_temp_new_ptr();
2185 TCGv_ptr t_n = tcg_temp_new_ptr();
2189 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2190 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2192 /* Predicate sizes may be smaller and cannot use simd_desc.
2193 We cannot round up, as we do elsewhere, because we need
2194 the exact size for ZIP2 and REV. We retain the style for
2195 the other helpers for consistency. */
2198 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2199 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2200 t_desc = tcg_const_i32(desc);
2202 fn(t_d, t_n, t_desc);
2204 tcg_temp_free_i32(t_desc);
2205 tcg_temp_free_ptr(t_d);
2206 tcg_temp_free_ptr(t_n);
2210 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2212 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2215 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2217 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2220 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2222 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2225 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2227 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2230 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2232 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2235 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2237 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2240 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2242 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2245 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2247 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2250 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2252 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2256 *** SVE Permute - Interleaving Group
2259 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2261 static gen_helper_gvec_3 * const fns[4] = {
2262 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2263 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2266 if (sve_access_check(s)) {
2267 unsigned vsz = vec_full_reg_size(s);
2268 unsigned high_ofs = high ? vsz / 2 : 0;
2269 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2270 vec_full_reg_offset(s, a->rn) + high_ofs,
2271 vec_full_reg_offset(s, a->rm) + high_ofs,
2272 vsz, vsz, 0, fns[a->esz]);
2277 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2278 gen_helper_gvec_3 *fn)
2280 if (sve_access_check(s)) {
2281 unsigned vsz = vec_full_reg_size(s);
2282 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2283 vec_full_reg_offset(s, a->rn),
2284 vec_full_reg_offset(s, a->rm),
2285 vsz, vsz, data, fn);
2290 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2292 return do_zip(s, a, false);
2295 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2297 return do_zip(s, a, true);
2300 static gen_helper_gvec_3 * const uzp_fns[4] = {
2301 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2302 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2305 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2307 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2310 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2312 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2315 static gen_helper_gvec_3 * const trn_fns[4] = {
2316 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2317 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2320 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2322 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2325 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2327 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2331 *** SVE Permute Vector - Predicated Group
2334 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2336 static gen_helper_gvec_3 * const fns[4] = {
2337 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2339 return do_zpz_ool(s, a, fns[a->esz]);
2342 /* Call the helper that computes the ARM LastActiveElement pseudocode
2343 * function, scaled by the element size. This includes the not found
2344 * indication; e.g. not found for esz=3 is -8.
2346 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2348 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2349 * round up, as we do elsewhere, because we need the exact size.
2351 TCGv_ptr t_p = tcg_temp_new_ptr();
2353 unsigned vsz = pred_full_reg_size(s);
2357 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2359 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2360 t_desc = tcg_const_i32(desc);
2362 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2364 tcg_temp_free_i32(t_desc);
2365 tcg_temp_free_ptr(t_p);
2368 /* Increment LAST to the offset of the next element in the vector,
2369 * wrapping around to 0.
2371 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2373 unsigned vsz = vec_full_reg_size(s);
2375 tcg_gen_addi_i32(last, last, 1 << esz);
2376 if (is_power_of_2(vsz)) {
2377 tcg_gen_andi_i32(last, last, vsz - 1);
2379 TCGv_i32 max = tcg_const_i32(vsz);
2380 TCGv_i32 zero = tcg_const_i32(0);
2381 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2382 tcg_temp_free_i32(max);
2383 tcg_temp_free_i32(zero);
2387 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2388 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390 unsigned vsz = vec_full_reg_size(s);
2392 if (is_power_of_2(vsz)) {
2393 tcg_gen_andi_i32(last, last, vsz - 1);
2395 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2396 TCGv_i32 zero = tcg_const_i32(0);
2397 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2398 tcg_temp_free_i32(max);
2399 tcg_temp_free_i32(zero);
2403 /* Load an unsigned element of ESZ from BASE+OFS. */
2404 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2406 TCGv_i64 r = tcg_temp_new_i64();
2410 tcg_gen_ld8u_i64(r, base, ofs);
2413 tcg_gen_ld16u_i64(r, base, ofs);
2416 tcg_gen_ld32u_i64(r, base, ofs);
2419 tcg_gen_ld_i64(r, base, ofs);
2422 g_assert_not_reached();
2427 /* Load an unsigned element of ESZ from RM[LAST]. */
2428 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2431 TCGv_ptr p = tcg_temp_new_ptr();
2434 /* Convert offset into vector into offset into ENV.
2435 * The final adjustment for the vector register base
2436 * is added via constant offset to the load.
2438 #ifdef HOST_WORDS_BIGENDIAN
2439 /* Adjust for element ordering. See vec_reg_offset. */
2441 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2444 tcg_gen_ext_i32_ptr(p, last);
2445 tcg_gen_add_ptr(p, p, cpu_env);
2447 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2448 tcg_temp_free_ptr(p);
2453 /* Compute CLAST for a Zreg. */
2454 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2459 unsigned vsz, esz = a->esz;
2461 if (!sve_access_check(s)) {
2465 last = tcg_temp_local_new_i32();
2466 over = gen_new_label();
2468 find_last_active(s, last, esz, a->pg);
2470 /* There is of course no movcond for a 2048-bit vector,
2471 * so we must branch over the actual store.
2473 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2476 incr_last_active(s, last, esz);
2479 ele = load_last_active(s, last, a->rm, esz);
2480 tcg_temp_free_i32(last);
2482 vsz = vec_full_reg_size(s);
2483 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2484 tcg_temp_free_i64(ele);
2486 /* If this insn used MOVPRFX, we may need a second move. */
2487 if (a->rd != a->rn) {
2488 TCGLabel *done = gen_new_label();
2491 gen_set_label(over);
2492 do_mov_z(s, a->rd, a->rn);
2494 gen_set_label(done);
2496 gen_set_label(over);
2501 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2503 return do_clast_vector(s, a, false);
2506 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2508 return do_clast_vector(s, a, true);
2511 /* Compute CLAST for a scalar. */
2512 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2513 bool before, TCGv_i64 reg_val)
2515 TCGv_i32 last = tcg_temp_new_i32();
2516 TCGv_i64 ele, cmp, zero;
2518 find_last_active(s, last, esz, pg);
2520 /* Extend the original value of last prior to incrementing. */
2521 cmp = tcg_temp_new_i64();
2522 tcg_gen_ext_i32_i64(cmp, last);
2525 incr_last_active(s, last, esz);
2528 /* The conceit here is that while last < 0 indicates not found, after
2529 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2530 * from which we can load garbage. We then discard the garbage with
2531 * a conditional move.
2533 ele = load_last_active(s, last, rm, esz);
2534 tcg_temp_free_i32(last);
2536 zero = tcg_const_i64(0);
2537 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2539 tcg_temp_free_i64(zero);
2540 tcg_temp_free_i64(cmp);
2541 tcg_temp_free_i64(ele);
2544 /* Compute CLAST for a Vreg. */
2545 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2547 if (sve_access_check(s)) {
2549 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2550 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2552 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2553 write_fp_dreg(s, a->rd, reg);
2554 tcg_temp_free_i64(reg);
2559 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2561 return do_clast_fp(s, a, false);
2564 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2566 return do_clast_fp(s, a, true);
2569 /* Compute CLAST for a Xreg. */
2570 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2574 if (!sve_access_check(s)) {
2578 reg = cpu_reg(s, a->rd);
2581 tcg_gen_ext8u_i64(reg, reg);
2584 tcg_gen_ext16u_i64(reg, reg);
2587 tcg_gen_ext32u_i64(reg, reg);
2592 g_assert_not_reached();
2595 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2599 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2601 return do_clast_general(s, a, false);
2604 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2606 return do_clast_general(s, a, true);
2609 /* Compute LAST for a scalar. */
2610 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2611 int pg, int rm, bool before)
2613 TCGv_i32 last = tcg_temp_new_i32();
2616 find_last_active(s, last, esz, pg);
2618 wrap_last_active(s, last, esz);
2620 incr_last_active(s, last, esz);
2623 ret = load_last_active(s, last, rm, esz);
2624 tcg_temp_free_i32(last);
2628 /* Compute LAST for a Vreg. */
2629 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2631 if (sve_access_check(s)) {
2632 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2633 write_fp_dreg(s, a->rd, val);
2634 tcg_temp_free_i64(val);
2639 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2641 return do_last_fp(s, a, false);
2644 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2646 return do_last_fp(s, a, true);
2649 /* Compute LAST for a Xreg. */
2650 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2652 if (sve_access_check(s)) {
2653 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2654 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2655 tcg_temp_free_i64(val);
2660 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2662 return do_last_general(s, a, false);
2665 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2667 return do_last_general(s, a, true);
2670 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2672 if (sve_access_check(s)) {
2673 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2678 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2680 if (sve_access_check(s)) {
2681 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2682 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2683 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2684 tcg_temp_free_i64(t);
2689 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2691 static gen_helper_gvec_3 * const fns[4] = {
2693 gen_helper_sve_revb_h,
2694 gen_helper_sve_revb_s,
2695 gen_helper_sve_revb_d,
2697 return do_zpz_ool(s, a, fns[a->esz]);
2700 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2702 static gen_helper_gvec_3 * const fns[4] = {
2705 gen_helper_sve_revh_s,
2706 gen_helper_sve_revh_d,
2708 return do_zpz_ool(s, a, fns[a->esz]);
2711 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2713 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2716 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2718 static gen_helper_gvec_3 * const fns[4] = {
2719 gen_helper_sve_rbit_b,
2720 gen_helper_sve_rbit_h,
2721 gen_helper_sve_rbit_s,
2722 gen_helper_sve_rbit_d,
2724 return do_zpz_ool(s, a, fns[a->esz]);
2727 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2729 if (sve_access_check(s)) {
2730 unsigned vsz = vec_full_reg_size(s);
2731 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2732 vec_full_reg_offset(s, a->rn),
2733 vec_full_reg_offset(s, a->rm),
2734 pred_full_reg_offset(s, a->pg),
2735 vsz, vsz, a->esz, gen_helper_sve_splice);
2741 *** SVE Integer Compare - Vectors Group
2744 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2745 gen_helper_gvec_flags_4 *gen_fn)
2747 TCGv_ptr pd, zn, zm, pg;
2751 if (gen_fn == NULL) {
2754 if (!sve_access_check(s)) {
2758 vsz = vec_full_reg_size(s);
2759 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2760 pd = tcg_temp_new_ptr();
2761 zn = tcg_temp_new_ptr();
2762 zm = tcg_temp_new_ptr();
2763 pg = tcg_temp_new_ptr();
2765 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2766 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2767 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2768 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2770 gen_fn(t, pd, zn, zm, pg, t);
2772 tcg_temp_free_ptr(pd);
2773 tcg_temp_free_ptr(zn);
2774 tcg_temp_free_ptr(zm);
2775 tcg_temp_free_ptr(pg);
2779 tcg_temp_free_i32(t);
2783 #define DO_PPZZ(NAME, name) \
2784 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2787 static gen_helper_gvec_flags_4 * const fns[4] = { \
2788 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2789 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2791 return do_ppzz_flags(s, a, fns[a->esz]); \
2794 DO_PPZZ(CMPEQ, cmpeq)
2795 DO_PPZZ(CMPNE, cmpne)
2796 DO_PPZZ(CMPGT, cmpgt)
2797 DO_PPZZ(CMPGE, cmpge)
2798 DO_PPZZ(CMPHI, cmphi)
2799 DO_PPZZ(CMPHS, cmphs)
2803 #define DO_PPZW(NAME, name) \
2804 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2807 static gen_helper_gvec_flags_4 * const fns[4] = { \
2808 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2809 gen_helper_sve_##name##_ppzw_s, NULL \
2811 return do_ppzz_flags(s, a, fns[a->esz]); \
2814 DO_PPZW(CMPEQ, cmpeq)
2815 DO_PPZW(CMPNE, cmpne)
2816 DO_PPZW(CMPGT, cmpgt)
2817 DO_PPZW(CMPGE, cmpge)
2818 DO_PPZW(CMPHI, cmphi)
2819 DO_PPZW(CMPHS, cmphs)
2820 DO_PPZW(CMPLT, cmplt)
2821 DO_PPZW(CMPLE, cmple)
2822 DO_PPZW(CMPLO, cmplo)
2823 DO_PPZW(CMPLS, cmpls)
2828 *** SVE Integer Compare - Immediate Groups
2831 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2832 gen_helper_gvec_flags_3 *gen_fn)
2834 TCGv_ptr pd, zn, pg;
2838 if (gen_fn == NULL) {
2841 if (!sve_access_check(s)) {
2845 vsz = vec_full_reg_size(s);
2846 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2847 pd = tcg_temp_new_ptr();
2848 zn = tcg_temp_new_ptr();
2849 pg = tcg_temp_new_ptr();
2851 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2852 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2853 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2855 gen_fn(t, pd, zn, pg, t);
2857 tcg_temp_free_ptr(pd);
2858 tcg_temp_free_ptr(zn);
2859 tcg_temp_free_ptr(pg);
2863 tcg_temp_free_i32(t);
2867 #define DO_PPZI(NAME, name) \
2868 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2871 static gen_helper_gvec_flags_3 * const fns[4] = { \
2872 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2873 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2875 return do_ppzi_flags(s, a, fns[a->esz]); \
2878 DO_PPZI(CMPEQ, cmpeq)
2879 DO_PPZI(CMPNE, cmpne)
2880 DO_PPZI(CMPGT, cmpgt)
2881 DO_PPZI(CMPGE, cmpge)
2882 DO_PPZI(CMPHI, cmphi)
2883 DO_PPZI(CMPHS, cmphs)
2884 DO_PPZI(CMPLT, cmplt)
2885 DO_PPZI(CMPLE, cmple)
2886 DO_PPZI(CMPLO, cmplo)
2887 DO_PPZI(CMPLS, cmpls)
2892 *** SVE Partition Break Group
2895 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2896 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2898 if (!sve_access_check(s)) {
2902 unsigned vsz = pred_full_reg_size(s);
2904 /* Predicate sizes may be smaller and cannot use simd_desc. */
2905 TCGv_ptr d = tcg_temp_new_ptr();
2906 TCGv_ptr n = tcg_temp_new_ptr();
2907 TCGv_ptr m = tcg_temp_new_ptr();
2908 TCGv_ptr g = tcg_temp_new_ptr();
2909 TCGv_i32 t = tcg_const_i32(vsz - 2);
2911 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2912 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2913 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2914 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2917 fn_s(t, d, n, m, g, t);
2922 tcg_temp_free_ptr(d);
2923 tcg_temp_free_ptr(n);
2924 tcg_temp_free_ptr(m);
2925 tcg_temp_free_ptr(g);
2926 tcg_temp_free_i32(t);
2930 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2931 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2933 if (!sve_access_check(s)) {
2937 unsigned vsz = pred_full_reg_size(s);
2939 /* Predicate sizes may be smaller and cannot use simd_desc. */
2940 TCGv_ptr d = tcg_temp_new_ptr();
2941 TCGv_ptr n = tcg_temp_new_ptr();
2942 TCGv_ptr g = tcg_temp_new_ptr();
2943 TCGv_i32 t = tcg_const_i32(vsz - 2);
2945 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2946 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2947 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2950 fn_s(t, d, n, g, t);
2955 tcg_temp_free_ptr(d);
2956 tcg_temp_free_ptr(n);
2957 tcg_temp_free_ptr(g);
2958 tcg_temp_free_i32(t);
2962 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2964 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2967 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2969 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2972 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2974 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2977 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2979 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2982 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2984 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2987 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2989 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2992 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2994 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2998 *** SVE Predicate Count Group
3001 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3003 unsigned psz = pred_full_reg_size(s);
3008 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3010 TCGv_i64 g = tcg_temp_new_i64();
3011 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3012 tcg_gen_and_i64(val, val, g);
3013 tcg_temp_free_i64(g);
3016 /* Reduce the pred_esz_masks value simply to reduce the
3017 * size of the code generated here.
3019 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3020 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3022 tcg_gen_ctpop_i64(val, val);
3024 TCGv_ptr t_pn = tcg_temp_new_ptr();
3025 TCGv_ptr t_pg = tcg_temp_new_ptr();
3030 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3032 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3033 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3034 t_desc = tcg_const_i32(desc);
3036 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3037 tcg_temp_free_ptr(t_pn);
3038 tcg_temp_free_ptr(t_pg);
3039 tcg_temp_free_i32(t_desc);
3043 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3045 if (sve_access_check(s)) {
3046 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3051 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3054 if (sve_access_check(s)) {
3055 TCGv_i64 reg = cpu_reg(s, a->rd);
3056 TCGv_i64 val = tcg_temp_new_i64();
3058 do_cntp(s, val, a->esz, a->pg, a->pg);
3060 tcg_gen_sub_i64(reg, reg, val);
3062 tcg_gen_add_i64(reg, reg, val);
3064 tcg_temp_free_i64(val);
3069 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3075 if (sve_access_check(s)) {
3076 unsigned vsz = vec_full_reg_size(s);
3077 TCGv_i64 val = tcg_temp_new_i64();
3078 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3080 do_cntp(s, val, a->esz, a->pg, a->pg);
3081 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3082 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3087 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3090 if (sve_access_check(s)) {
3091 TCGv_i64 reg = cpu_reg(s, a->rd);
3092 TCGv_i64 val = tcg_temp_new_i64();
3094 do_cntp(s, val, a->esz, a->pg, a->pg);
3095 do_sat_addsub_32(reg, val, a->u, a->d);
3100 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3103 if (sve_access_check(s)) {
3104 TCGv_i64 reg = cpu_reg(s, a->rd);
3105 TCGv_i64 val = tcg_temp_new_i64();
3107 do_cntp(s, val, a->esz, a->pg, a->pg);
3108 do_sat_addsub_64(reg, val, a->u, a->d);
3113 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3119 if (sve_access_check(s)) {
3120 TCGv_i64 val = tcg_temp_new_i64();
3121 do_cntp(s, val, a->esz, a->pg, a->pg);
3122 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3128 *** SVE Integer Compare Scalars Group
3131 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3133 if (!sve_access_check(s)) {
3137 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3138 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3139 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3140 TCGv_i64 cmp = tcg_temp_new_i64();
3142 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3143 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3144 tcg_temp_free_i64(cmp);
3146 /* VF = !NF & !CF. */
3147 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3148 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3150 /* Both NF and VF actually look at bit 31. */
3151 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3152 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3156 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3158 if (!sve_access_check(s)) {
3162 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3163 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3164 TCGv_i64 t0 = tcg_temp_new_i64();
3165 TCGv_i64 t1 = tcg_temp_new_i64();
3168 unsigned desc, vsz = vec_full_reg_size(s);
3173 tcg_gen_ext32u_i64(op0, op0);
3174 tcg_gen_ext32u_i64(op1, op1);
3176 tcg_gen_ext32s_i64(op0, op0);
3177 tcg_gen_ext32s_i64(op1, op1);
3181 /* For the helper, compress the different conditions into a computation
3182 * of how many iterations for which the condition is true.
3184 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3185 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3186 * aren't that large, so any value >= predicate size is sufficient.
3188 tcg_gen_sub_i64(t0, op1, op0);
3190 /* t0 = MIN(op1 - op0, vsz). */
3191 tcg_gen_movi_i64(t1, vsz);
3192 tcg_gen_umin_i64(t0, t0, t1);
3194 /* Equality means one more iteration. */
3195 tcg_gen_addi_i64(t0, t0, 1);
3198 /* t0 = (condition true ? t0 : 0). */
3200 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3201 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3202 tcg_gen_movi_i64(t1, 0);
3203 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3205 t2 = tcg_temp_new_i32();
3206 tcg_gen_extrl_i64_i32(t2, t0);
3207 tcg_temp_free_i64(t0);
3208 tcg_temp_free_i64(t1);
3210 desc = (vsz / 8) - 2;
3211 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3212 t3 = tcg_const_i32(desc);
3214 ptr = tcg_temp_new_ptr();
3215 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3217 gen_helper_sve_while(t2, ptr, t2, t3);
3220 tcg_temp_free_ptr(ptr);
3221 tcg_temp_free_i32(t2);
3222 tcg_temp_free_i32(t3);
3227 *** SVE Integer Wide Immediate - Unpredicated Group
3230 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3235 if (sve_access_check(s)) {
3236 unsigned vsz = vec_full_reg_size(s);
3237 int dofs = vec_full_reg_offset(s, a->rd);
3240 /* Decode the VFP immediate. */
3241 imm = vfp_expand_imm(a->esz, a->imm);
3242 imm = dup_const(a->esz, imm);
3244 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3249 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3251 if (a->esz == 0 && extract32(insn, 13, 1)) {
3254 if (sve_access_check(s)) {
3255 unsigned vsz = vec_full_reg_size(s);
3256 int dofs = vec_full_reg_offset(s, a->rd);
3258 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3263 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3265 if (a->esz == 0 && extract32(insn, 13, 1)) {
3268 if (sve_access_check(s)) {
3269 unsigned vsz = vec_full_reg_size(s);
3270 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3271 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3276 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3279 return trans_ADD_zzi(s, a, insn);
3282 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3284 static const GVecGen2s op[4] = {
3285 { .fni8 = tcg_gen_vec_sub8_i64,
3286 .fniv = tcg_gen_sub_vec,
3287 .fno = gen_helper_sve_subri_b,
3288 .opc = INDEX_op_sub_vec,
3290 .scalar_first = true },
3291 { .fni8 = tcg_gen_vec_sub16_i64,
3292 .fniv = tcg_gen_sub_vec,
3293 .fno = gen_helper_sve_subri_h,
3294 .opc = INDEX_op_sub_vec,
3296 .scalar_first = true },
3297 { .fni4 = tcg_gen_sub_i32,
3298 .fniv = tcg_gen_sub_vec,
3299 .fno = gen_helper_sve_subri_s,
3300 .opc = INDEX_op_sub_vec,
3302 .scalar_first = true },
3303 { .fni8 = tcg_gen_sub_i64,
3304 .fniv = tcg_gen_sub_vec,
3305 .fno = gen_helper_sve_subri_d,
3306 .opc = INDEX_op_sub_vec,
3307 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3309 .scalar_first = true }
3312 if (a->esz == 0 && extract32(insn, 13, 1)) {
3315 if (sve_access_check(s)) {
3316 unsigned vsz = vec_full_reg_size(s);
3317 TCGv_i64 c = tcg_const_i64(a->imm);
3318 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3319 vec_full_reg_offset(s, a->rn),
3320 vsz, vsz, c, &op[a->esz]);
3321 tcg_temp_free_i64(c);
3326 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3328 if (sve_access_check(s)) {
3329 unsigned vsz = vec_full_reg_size(s);
3330 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3331 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3336 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3339 if (a->esz == 0 && extract32(insn, 13, 1)) {
3342 if (sve_access_check(s)) {
3343 TCGv_i64 val = tcg_const_i64(a->imm);
3344 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3345 tcg_temp_free_i64(val);
3350 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3352 return do_zzi_sat(s, a, insn, false, false);
3355 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3357 return do_zzi_sat(s, a, insn, true, false);
3360 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3362 return do_zzi_sat(s, a, insn, false, true);
3365 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3367 return do_zzi_sat(s, a, insn, true, true);
3370 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3372 if (sve_access_check(s)) {
3373 unsigned vsz = vec_full_reg_size(s);
3374 TCGv_i64 c = tcg_const_i64(a->imm);
3376 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3377 vec_full_reg_offset(s, a->rn),
3378 c, vsz, vsz, 0, fn);
3379 tcg_temp_free_i64(c);
3384 #define DO_ZZI(NAME, name) \
3385 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3388 static gen_helper_gvec_2i * const fns[4] = { \
3389 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3390 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3392 return do_zzi_ool(s, a, fns[a->esz]); \
3403 *** SVE Floating Point Accumulating Reduction Group
3406 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3408 typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3409 TCGv_ptr, TCGv_ptr, TCGv_i32);
3410 static fadda_fn * const fns[3] = {
3411 gen_helper_sve_fadda_h,
3412 gen_helper_sve_fadda_s,
3413 gen_helper_sve_fadda_d,
3415 unsigned vsz = vec_full_reg_size(s);
3416 TCGv_ptr t_rm, t_pg, t_fpst;
3423 if (!sve_access_check(s)) {
3427 t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3428 t_rm = tcg_temp_new_ptr();
3429 t_pg = tcg_temp_new_ptr();
3430 tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3431 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3432 t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3433 t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3435 fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3437 tcg_temp_free_i32(t_desc);
3438 tcg_temp_free_ptr(t_fpst);
3439 tcg_temp_free_ptr(t_pg);
3440 tcg_temp_free_ptr(t_rm);
3442 write_fp_dreg(s, a->rd, t_val);
3443 tcg_temp_free_i64(t_val);
3448 *** SVE Floating Point Arithmetic - Unpredicated Group
3451 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3452 gen_helper_gvec_3_ptr *fn)
3457 if (sve_access_check(s)) {
3458 unsigned vsz = vec_full_reg_size(s);
3459 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3460 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3461 vec_full_reg_offset(s, a->rn),
3462 vec_full_reg_offset(s, a->rm),
3463 status, vsz, vsz, 0, fn);
3464 tcg_temp_free_ptr(status);
3470 #define DO_FP3(NAME, name) \
3471 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3473 static gen_helper_gvec_3_ptr * const fns[4] = { \
3474 NULL, gen_helper_gvec_##name##_h, \
3475 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3477 return do_zzz_fp(s, a, fns[a->esz]); \
3480 DO_FP3(FADD_zzz, fadd)
3481 DO_FP3(FSUB_zzz, fsub)
3482 DO_FP3(FMUL_zzz, fmul)
3483 DO_FP3(FTSMUL, ftsmul)
3484 DO_FP3(FRECPS, recps)
3485 DO_FP3(FRSQRTS, rsqrts)
3490 *** SVE Floating Point Arithmetic - Predicated Group
3493 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3494 gen_helper_gvec_4_ptr *fn)
3499 if (sve_access_check(s)) {
3500 unsigned vsz = vec_full_reg_size(s);
3501 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3502 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3503 vec_full_reg_offset(s, a->rn),
3504 vec_full_reg_offset(s, a->rm),
3505 pred_full_reg_offset(s, a->pg),
3506 status, vsz, vsz, 0, fn);
3507 tcg_temp_free_ptr(status);
3512 #define DO_FP3(NAME, name) \
3513 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3515 static gen_helper_gvec_4_ptr * const fns[4] = { \
3516 NULL, gen_helper_sve_##name##_h, \
3517 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3519 return do_zpzz_fp(s, a, fns[a->esz]); \
3522 DO_FP3(FADD_zpzz, fadd)
3523 DO_FP3(FSUB_zpzz, fsub)
3524 DO_FP3(FMUL_zpzz, fmul)
3525 DO_FP3(FMIN_zpzz, fmin)
3526 DO_FP3(FMAX_zpzz, fmax)
3527 DO_FP3(FMINNM_zpzz, fminnum)
3528 DO_FP3(FMAXNM_zpzz, fmaxnum)
3530 DO_FP3(FSCALE, fscalbn)
3532 DO_FP3(FMULX, fmulx)
3536 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3538 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3543 if (!sve_access_check(s)) {
3547 unsigned vsz = vec_full_reg_size(s);
3550 TCGv_ptr pg = tcg_temp_new_ptr();
3552 /* We would need 7 operands to pass these arguments "properly".
3553 * So we encode all the register numbers into the descriptor.
3555 desc = deposit32(a->rd, 5, 5, a->rn);
3556 desc = deposit32(desc, 10, 5, a->rm);
3557 desc = deposit32(desc, 15, 5, a->ra);
3558 desc = simd_desc(vsz, vsz, desc);
3560 t_desc = tcg_const_i32(desc);
3561 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3562 fn(cpu_env, pg, t_desc);
3563 tcg_temp_free_i32(t_desc);
3564 tcg_temp_free_ptr(pg);
3568 #define DO_FMLA(NAME, name) \
3569 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3571 static gen_helper_sve_fmla * const fns[4] = { \
3572 NULL, gen_helper_sve_##name##_h, \
3573 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3575 return do_fmla(s, a, fns[a->esz]); \
3578 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3579 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3580 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3581 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3586 *** SVE Floating Point Unary Operations Predicated Group
3589 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3590 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3592 if (sve_access_check(s)) {
3593 unsigned vsz = vec_full_reg_size(s);
3594 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3595 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3596 vec_full_reg_offset(s, rn),
3597 pred_full_reg_offset(s, pg),
3598 status, vsz, vsz, 0, fn);
3599 tcg_temp_free_ptr(status);
3604 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3606 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3609 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3611 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3614 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3616 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3619 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3621 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3624 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3626 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3629 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3631 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3634 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3636 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3639 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3641 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3644 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3646 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3649 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3651 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3654 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3656 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3659 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3661 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
3664 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3666 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
3669 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3671 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
3675 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3678 /* Subroutine loading a vector register at VOFS of LEN bytes.
3679 * The load should begin at the address Rn + IMM.
3682 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3685 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3686 uint32_t len_remain = len % 8;
3687 uint32_t nparts = len / 8 + ctpop8(len_remain);
3688 int midx = get_mem_index(s);
3689 TCGv_i64 addr, t0, t1;
3691 addr = tcg_temp_new_i64();
3692 t0 = tcg_temp_new_i64();
3694 /* Note that unpredicated load/store of vector/predicate registers
3695 * are defined as a stream of bytes, which equates to little-endian
3696 * operations on larger quantities. There is no nice way to force
3697 * a little-endian load for aarch64_be-linux-user out of line.
3699 * Attempt to keep code expansion to a minimum by limiting the
3700 * amount of unrolling done.
3705 for (i = 0; i < len_align; i += 8) {
3706 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3707 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3708 tcg_gen_st_i64(t0, cpu_env, vofs + i);
3711 TCGLabel *loop = gen_new_label();
3712 TCGv_ptr tp, i = tcg_const_local_ptr(0);
3714 gen_set_label(loop);
3716 /* Minimize the number of local temps that must be re-read from
3717 * the stack each iteration. Instead, re-compute values other
3718 * than the loop counter.
3720 tp = tcg_temp_new_ptr();
3721 tcg_gen_addi_ptr(tp, i, imm);
3722 tcg_gen_extu_ptr_i64(addr, tp);
3723 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3725 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3727 tcg_gen_add_ptr(tp, cpu_env, i);
3728 tcg_gen_addi_ptr(i, i, 8);
3729 tcg_gen_st_i64(t0, tp, vofs);
3730 tcg_temp_free_ptr(tp);
3732 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3733 tcg_temp_free_ptr(i);
3736 /* Predicate register loads can be any multiple of 2.
3737 * Note that we still store the entire 64-bit unit into cpu_env.
3740 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3742 switch (len_remain) {
3746 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3750 t1 = tcg_temp_new_i64();
3751 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3752 tcg_gen_addi_i64(addr, addr, 4);
3753 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3754 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3755 tcg_temp_free_i64(t1);
3759 g_assert_not_reached();
3761 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3763 tcg_temp_free_i64(addr);
3764 tcg_temp_free_i64(t0);
3767 /* Similarly for stores. */
3768 static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
3771 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3772 uint32_t len_remain = len % 8;
3773 uint32_t nparts = len / 8 + ctpop8(len_remain);
3774 int midx = get_mem_index(s);
3777 addr = tcg_temp_new_i64();
3778 t0 = tcg_temp_new_i64();
3780 /* Note that unpredicated load/store of vector/predicate registers
3781 * are defined as a stream of bytes, which equates to little-endian
3782 * operations on larger quantities. There is no nice way to force
3783 * a little-endian store for aarch64_be-linux-user out of line.
3785 * Attempt to keep code expansion to a minimum by limiting the
3786 * amount of unrolling done.
3791 for (i = 0; i < len_align; i += 8) {
3792 tcg_gen_ld_i64(t0, cpu_env, vofs + i);
3793 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3794 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3797 TCGLabel *loop = gen_new_label();
3798 TCGv_ptr t2, i = tcg_const_local_ptr(0);
3800 gen_set_label(loop);
3802 t2 = tcg_temp_new_ptr();
3803 tcg_gen_add_ptr(t2, cpu_env, i);
3804 tcg_gen_ld_i64(t0, t2, vofs);
3806 /* Minimize the number of local temps that must be re-read from
3807 * the stack each iteration. Instead, re-compute values other
3808 * than the loop counter.
3810 tcg_gen_addi_ptr(t2, i, imm);
3811 tcg_gen_extu_ptr_i64(addr, t2);
3812 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3813 tcg_temp_free_ptr(t2);
3815 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3817 tcg_gen_addi_ptr(i, i, 8);
3819 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3820 tcg_temp_free_ptr(i);
3823 /* Predicate register stores can be any multiple of 2. */
3825 tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
3826 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3828 switch (len_remain) {
3832 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3836 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
3837 tcg_gen_addi_i64(addr, addr, 4);
3838 tcg_gen_shri_i64(t0, t0, 32);
3839 tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
3843 g_assert_not_reached();
3846 tcg_temp_free_i64(addr);
3847 tcg_temp_free_i64(t0);
3850 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3852 if (sve_access_check(s)) {
3853 int size = vec_full_reg_size(s);
3854 int off = vec_full_reg_offset(s, a->rd);
3855 do_ldr(s, off, size, a->rn, a->imm * size);
3860 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3862 if (sve_access_check(s)) {
3863 int size = pred_full_reg_size(s);
3864 int off = pred_full_reg_offset(s, a->rd);
3865 do_ldr(s, off, size, a->rn, a->imm * size);
3870 static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3872 if (sve_access_check(s)) {
3873 int size = vec_full_reg_size(s);
3874 int off = vec_full_reg_offset(s, a->rd);
3875 do_str(s, off, size, a->rn, a->imm * size);
3880 static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3882 if (sve_access_check(s)) {
3883 int size = pred_full_reg_size(s);
3884 int off = pred_full_reg_offset(s, a->rd);
3885 do_str(s, off, size, a->rn, a->imm * size);
3891 *** SVE Memory - Contiguous Load Group
3894 /* The memory mode of the dtype. */
3895 static const TCGMemOp dtype_mop[16] = {
3896 MO_UB, MO_UB, MO_UB, MO_UB,
3897 MO_SL, MO_UW, MO_UW, MO_UW,
3898 MO_SW, MO_SW, MO_UL, MO_UL,
3899 MO_SB, MO_SB, MO_SB, MO_Q
3902 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
3904 /* The vector element size of dtype. */
3905 static const uint8_t dtype_esz[16] = {
3912 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3913 gen_helper_gvec_mem *fn)
3915 unsigned vsz = vec_full_reg_size(s);
3919 /* For e.g. LD4, there are not enough arguments to pass all 4
3920 * registers as pointers, so encode the regno into the data field.
3921 * For consistency, do this even for LD1.
3923 desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
3924 t_pg = tcg_temp_new_ptr();
3926 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3927 fn(cpu_env, t_pg, addr, desc);
3929 tcg_temp_free_ptr(t_pg);
3930 tcg_temp_free_i32(desc);
3933 static void do_ld_zpa(DisasContext *s, int zt, int pg,
3934 TCGv_i64 addr, int dtype, int nreg)
3936 static gen_helper_gvec_mem * const fns[16][4] = {
3937 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
3938 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
3939 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
3940 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
3941 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
3943 { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
3944 { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
3945 gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
3946 { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
3947 { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
3949 { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
3950 { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
3951 { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
3952 gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
3953 { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
3955 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
3956 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
3957 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
3958 { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
3959 gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
3961 gen_helper_gvec_mem *fn = fns[dtype][nreg];
3963 /* While there are holes in the table, they are not
3964 * accessible via the instruction encoding.
3967 do_mem_zpa(s, zt, pg, addr, fn);
3970 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3975 if (sve_access_check(s)) {
3976 TCGv_i64 addr = new_tmp_a64(s);
3977 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
3978 (a->nreg + 1) << dtype_msz(a->dtype));
3979 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3980 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3985 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3987 if (sve_access_check(s)) {
3988 int vsz = vec_full_reg_size(s);
3989 int elements = vsz >> dtype_esz[a->dtype];
3990 TCGv_i64 addr = new_tmp_a64(s);
3992 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3993 (a->imm * elements * (a->nreg + 1))
3994 << dtype_msz(a->dtype));
3995 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4000 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4002 static gen_helper_gvec_mem * const fns[16] = {
4003 gen_helper_sve_ldff1bb_r,
4004 gen_helper_sve_ldff1bhu_r,
4005 gen_helper_sve_ldff1bsu_r,
4006 gen_helper_sve_ldff1bdu_r,
4008 gen_helper_sve_ldff1sds_r,
4009 gen_helper_sve_ldff1hh_r,
4010 gen_helper_sve_ldff1hsu_r,
4011 gen_helper_sve_ldff1hdu_r,
4013 gen_helper_sve_ldff1hds_r,
4014 gen_helper_sve_ldff1hss_r,
4015 gen_helper_sve_ldff1ss_r,
4016 gen_helper_sve_ldff1sdu_r,
4018 gen_helper_sve_ldff1bds_r,
4019 gen_helper_sve_ldff1bss_r,
4020 gen_helper_sve_ldff1bhs_r,
4021 gen_helper_sve_ldff1dd_r,
4024 if (sve_access_check(s)) {
4025 TCGv_i64 addr = new_tmp_a64(s);
4026 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4027 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4028 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4033 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4035 static gen_helper_gvec_mem * const fns[16] = {
4036 gen_helper_sve_ldnf1bb_r,
4037 gen_helper_sve_ldnf1bhu_r,
4038 gen_helper_sve_ldnf1bsu_r,
4039 gen_helper_sve_ldnf1bdu_r,
4041 gen_helper_sve_ldnf1sds_r,
4042 gen_helper_sve_ldnf1hh_r,
4043 gen_helper_sve_ldnf1hsu_r,
4044 gen_helper_sve_ldnf1hdu_r,
4046 gen_helper_sve_ldnf1hds_r,
4047 gen_helper_sve_ldnf1hss_r,
4048 gen_helper_sve_ldnf1ss_r,
4049 gen_helper_sve_ldnf1sdu_r,
4051 gen_helper_sve_ldnf1bds_r,
4052 gen_helper_sve_ldnf1bss_r,
4053 gen_helper_sve_ldnf1bhs_r,
4054 gen_helper_sve_ldnf1dd_r,
4057 if (sve_access_check(s)) {
4058 int vsz = vec_full_reg_size(s);
4059 int elements = vsz >> dtype_esz[a->dtype];
4060 int off = (a->imm * elements) << dtype_msz(a->dtype);
4061 TCGv_i64 addr = new_tmp_a64(s);
4063 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4064 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4069 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4071 static gen_helper_gvec_mem * const fns[4] = {
4072 gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4073 gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4075 unsigned vsz = vec_full_reg_size(s);
4079 /* Load the first quadword using the normal predicated load helpers. */
4080 desc = tcg_const_i32(simd_desc(16, 16, zt));
4081 t_pg = tcg_temp_new_ptr();
4083 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4084 fns[msz](cpu_env, t_pg, addr, desc);
4086 tcg_temp_free_ptr(t_pg);
4087 tcg_temp_free_i32(desc);
4089 /* Replicate that first quadword. */
4091 unsigned dofs = vec_full_reg_offset(s, zt);
4092 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4096 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4101 if (sve_access_check(s)) {
4102 int msz = dtype_msz(a->dtype);
4103 TCGv_i64 addr = new_tmp_a64(s);
4104 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4105 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4106 do_ldrq(s, a->rd, a->pg, addr, msz);
4111 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4113 if (sve_access_check(s)) {
4114 TCGv_i64 addr = new_tmp_a64(s);
4115 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4116 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4121 /* Load and broadcast element. */
4122 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4124 if (!sve_access_check(s)) {
4128 unsigned vsz = vec_full_reg_size(s);
4129 unsigned psz = pred_full_reg_size(s);
4130 unsigned esz = dtype_esz[a->dtype];
4131 TCGLabel *over = gen_new_label();
4134 /* If the guarding predicate has no bits set, no load occurs. */
4136 /* Reduce the pred_esz_masks value simply to reduce the
4137 * size of the code generated here.
4139 uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4140 temp = tcg_temp_new_i64();
4141 tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4142 tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4143 tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4144 tcg_temp_free_i64(temp);
4146 TCGv_i32 t32 = tcg_temp_new_i32();
4147 find_last_active(s, t32, esz, a->pg);
4148 tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4149 tcg_temp_free_i32(t32);
4152 /* Load the data. */
4153 temp = tcg_temp_new_i64();
4154 tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4155 tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4156 s->be_data | dtype_mop[a->dtype]);
4158 /* Broadcast to *all* elements. */
4159 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4161 tcg_temp_free_i64(temp);
4163 /* Zero the inactive elements. */
4164 gen_set_label(over);
4165 do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4169 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4170 int msz, int esz, int nreg)
4172 static gen_helper_gvec_mem * const fn_single[4][4] = {
4173 { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4174 gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4175 { NULL, gen_helper_sve_st1hh_r,
4176 gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4178 gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4179 { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4181 static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4182 { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4183 gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4184 { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4185 gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4186 { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4187 gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4189 gen_helper_gvec_mem *fn;
4193 fn = fn_single[msz][esz];
4195 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4197 fn = fn_multiple[nreg - 1][msz];
4200 do_mem_zpa(s, zt, pg, addr, fn);
4203 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4205 if (a->rm == 31 || a->msz > a->esz) {
4208 if (sve_access_check(s)) {
4209 TCGv_i64 addr = new_tmp_a64(s);
4210 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4211 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4212 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4217 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4219 if (a->msz > a->esz) {
4222 if (sve_access_check(s)) {
4223 int vsz = vec_full_reg_size(s);
4224 int elements = vsz >> a->esz;
4225 TCGv_i64 addr = new_tmp_a64(s);
4227 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4228 (a->imm * elements * (a->nreg + 1)) << a->msz);
4229 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4235 *** SVE gather loads / scatter stores
4238 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4239 TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4241 unsigned vsz = vec_full_reg_size(s);
4242 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4243 TCGv_ptr t_zm = tcg_temp_new_ptr();
4244 TCGv_ptr t_pg = tcg_temp_new_ptr();
4245 TCGv_ptr t_zt = tcg_temp_new_ptr();
4247 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4248 tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4249 tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4250 fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4252 tcg_temp_free_ptr(t_zt);
4253 tcg_temp_free_ptr(t_zm);
4254 tcg_temp_free_ptr(t_pg);
4255 tcg_temp_free_i32(desc);
4258 /* Indexed by [ff][xs][u][msz]. */
4259 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4260 { { { gen_helper_sve_ldbss_zsu,
4261 gen_helper_sve_ldhss_zsu,
4263 { gen_helper_sve_ldbsu_zsu,
4264 gen_helper_sve_ldhsu_zsu,
4265 gen_helper_sve_ldssu_zsu, } },
4266 { { gen_helper_sve_ldbss_zss,
4267 gen_helper_sve_ldhss_zss,
4269 { gen_helper_sve_ldbsu_zss,
4270 gen_helper_sve_ldhsu_zss,
4271 gen_helper_sve_ldssu_zss, } } },
4272 /* TODO fill in first-fault handlers */
4275 /* Note that we overload xs=2 to indicate 64-bit offset. */
4276 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4277 { { { gen_helper_sve_ldbds_zsu,
4278 gen_helper_sve_ldhds_zsu,
4279 gen_helper_sve_ldsds_zsu,
4281 { gen_helper_sve_ldbdu_zsu,
4282 gen_helper_sve_ldhdu_zsu,
4283 gen_helper_sve_ldsdu_zsu,
4284 gen_helper_sve_ldddu_zsu, } },
4285 { { gen_helper_sve_ldbds_zss,
4286 gen_helper_sve_ldhds_zss,
4287 gen_helper_sve_ldsds_zss,
4289 { gen_helper_sve_ldbdu_zss,
4290 gen_helper_sve_ldhdu_zss,
4291 gen_helper_sve_ldsdu_zss,
4292 gen_helper_sve_ldddu_zss, } },
4293 { { gen_helper_sve_ldbds_zd,
4294 gen_helper_sve_ldhds_zd,
4295 gen_helper_sve_ldsds_zd,
4297 { gen_helper_sve_ldbdu_zd,
4298 gen_helper_sve_ldhdu_zd,
4299 gen_helper_sve_ldsdu_zd,
4300 gen_helper_sve_ldddu_zd, } } },
4301 /* TODO fill in first-fault handlers */
4304 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
4306 gen_helper_gvec_mem_scatter *fn = NULL;
4308 if (!sve_access_check(s)) {
4314 fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
4317 fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
4322 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4323 cpu_reg_sp(s, a->rn), fn);
4327 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
4329 gen_helper_gvec_mem_scatter *fn = NULL;
4332 if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
4335 if (!sve_access_check(s)) {
4341 fn = gather_load_fn32[a->ff][0][a->u][a->msz];
4344 fn = gather_load_fn64[a->ff][2][a->u][a->msz];
4349 /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
4350 * by loading the immediate into the scalar parameter.
4352 imm = tcg_const_i64(a->imm << a->msz);
4353 do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
4354 tcg_temp_free_i64(imm);
4358 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
4360 /* Indexed by [xs][msz]. */
4361 static gen_helper_gvec_mem_scatter * const fn32[2][3] = {
4362 { gen_helper_sve_stbs_zsu,
4363 gen_helper_sve_sths_zsu,
4364 gen_helper_sve_stss_zsu, },
4365 { gen_helper_sve_stbs_zss,
4366 gen_helper_sve_sths_zss,
4367 gen_helper_sve_stss_zss, },
4369 /* Note that we overload xs=2 to indicate 64-bit offset. */
4370 static gen_helper_gvec_mem_scatter * const fn64[3][4] = {
4371 { gen_helper_sve_stbd_zsu,
4372 gen_helper_sve_sthd_zsu,
4373 gen_helper_sve_stsd_zsu,
4374 gen_helper_sve_stdd_zsu, },
4375 { gen_helper_sve_stbd_zss,
4376 gen_helper_sve_sthd_zss,
4377 gen_helper_sve_stsd_zss,
4378 gen_helper_sve_stdd_zss, },
4379 { gen_helper_sve_stbd_zd,
4380 gen_helper_sve_sthd_zd,
4381 gen_helper_sve_stsd_zd,
4382 gen_helper_sve_stdd_zd, },
4384 gen_helper_gvec_mem_scatter *fn;
4386 if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
4389 if (!sve_access_check(s)) {
4394 fn = fn32[a->xs][a->msz];
4397 fn = fn64[a->xs][a->msz];
4400 g_assert_not_reached();
4402 do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4403 cpu_reg_sp(s, a->rn), fn);
4411 static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
4413 /* Prefetch is a nop within QEMU. */
4414 sve_access_check(s);
4418 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
4423 /* Prefetch is a nop within QEMU. */
4424 sve_access_check(s);