2 * AArch64 SVE translation
4 * Copyright (c) 2018 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
22 #include "exec/exec-all.h"
24 #include "tcg-op-gvec.h"
25 #include "tcg-gvec-desc.h"
28 #include "translate.h"
29 #include "internals.h"
30 #include "exec/helper-proto.h"
31 #include "exec/helper-gen.h"
33 #include "trace-tcg.h"
34 #include "translate-a64.h"
37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38 TCGv_i64, uint32_t, uint32_t);
40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
43 TCGv_ptr, TCGv_ptr, TCGv_i32);
45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
48 * Helpers for extracting complex instruction fields.
51 /* See e.g. ASR (immediate, predicated).
52 * Returns -1 for unallocated encoding; diagnose later.
54 static int tszimm_esz(int x)
56 x >>= 3; /* discard imm3 */
60 static int tszimm_shr(int x)
62 return (16 << tszimm_esz(x)) - x;
65 /* See e.g. LSL (immediate, predicated). */
66 static int tszimm_shl(int x)
68 return x - (8 << tszimm_esz(x));
71 static inline int plus1(int x)
76 /* The SH bit is in bit 8. Extract the low 8 and shift. */
77 static inline int expand_imm_sh8s(int x)
79 return (int8_t)x << (x & 0x100 ? 8 : 0);
82 static inline int expand_imm_sh8u(int x)
84 return (uint8_t)x << (x & 0x100 ? 8 : 0);
87 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
88 * with unsigned data. C.f. SVE Memory Contiguous Load Group.
90 static inline int msz_dtype(int msz)
92 static const uint8_t dtype[4] = { 0, 5, 10, 15 };
97 * Include the generated decoder.
100 #include "decode-sve.inc.c"
103 * Implement all of the translator functions referenced by the decoder.
106 /* Return the offset info CPUARMState of the predicate vector register Pn.
107 * Note for this purpose, FFR is P16.
109 static inline int pred_full_reg_offset(DisasContext *s, int regno)
111 return offsetof(CPUARMState, vfp.pregs[regno]);
114 /* Return the byte size of the whole predicate register, VL / 64. */
115 static inline int pred_full_reg_size(DisasContext *s)
117 return s->sve_len >> 3;
120 /* Round up the size of a register to a size allowed by
121 * the tcg vector infrastructure. Any operation which uses this
122 * size may assume that the bits above pred_full_reg_size are zero,
123 * and must leave them the same way.
125 * Note that this is not needed for the vector registers as they
126 * are always properly sized for tcg vectors.
128 static int size_for_gvec(int size)
133 return QEMU_ALIGN_UP(size, 16);
137 static int pred_gvec_reg_size(DisasContext *s)
139 return size_for_gvec(pred_full_reg_size(s));
142 /* Invoke a vector expander on two Zregs. */
143 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
144 int esz, int rd, int rn)
146 if (sve_access_check(s)) {
147 unsigned vsz = vec_full_reg_size(s);
148 gvec_fn(esz, vec_full_reg_offset(s, rd),
149 vec_full_reg_offset(s, rn), vsz, vsz);
154 /* Invoke a vector expander on three Zregs. */
155 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
156 int esz, int rd, int rn, int rm)
158 if (sve_access_check(s)) {
159 unsigned vsz = vec_full_reg_size(s);
160 gvec_fn(esz, vec_full_reg_offset(s, rd),
161 vec_full_reg_offset(s, rn),
162 vec_full_reg_offset(s, rm), vsz, vsz);
167 /* Invoke a vector move on two Zregs. */
168 static bool do_mov_z(DisasContext *s, int rd, int rn)
170 return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
173 /* Initialize a Zreg with replications of a 64-bit immediate. */
174 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
176 unsigned vsz = vec_full_reg_size(s);
177 tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
180 /* Invoke a vector expander on two Pregs. */
181 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
182 int esz, int rd, int rn)
184 if (sve_access_check(s)) {
185 unsigned psz = pred_gvec_reg_size(s);
186 gvec_fn(esz, pred_full_reg_offset(s, rd),
187 pred_full_reg_offset(s, rn), psz, psz);
192 /* Invoke a vector expander on three Pregs. */
193 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
194 int esz, int rd, int rn, int rm)
196 if (sve_access_check(s)) {
197 unsigned psz = pred_gvec_reg_size(s);
198 gvec_fn(esz, pred_full_reg_offset(s, rd),
199 pred_full_reg_offset(s, rn),
200 pred_full_reg_offset(s, rm), psz, psz);
205 /* Invoke a vector operation on four Pregs. */
206 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
207 int rd, int rn, int rm, int rg)
209 if (sve_access_check(s)) {
210 unsigned psz = pred_gvec_reg_size(s);
211 tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
212 pred_full_reg_offset(s, rn),
213 pred_full_reg_offset(s, rm),
214 pred_full_reg_offset(s, rg),
220 /* Invoke a vector move on two Pregs. */
221 static bool do_mov_p(DisasContext *s, int rd, int rn)
223 return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
226 /* Set the cpu flags as per a return from an SVE helper. */
227 static void do_pred_flags(TCGv_i32 t)
229 tcg_gen_mov_i32(cpu_NF, t);
230 tcg_gen_andi_i32(cpu_ZF, t, 2);
231 tcg_gen_andi_i32(cpu_CF, t, 1);
232 tcg_gen_movi_i32(cpu_VF, 0);
235 /* Subroutines computing the ARM PredTest psuedofunction. */
236 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
238 TCGv_i32 t = tcg_temp_new_i32();
240 gen_helper_sve_predtest1(t, d, g);
242 tcg_temp_free_i32(t);
245 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
247 TCGv_ptr dptr = tcg_temp_new_ptr();
248 TCGv_ptr gptr = tcg_temp_new_ptr();
251 tcg_gen_addi_ptr(dptr, cpu_env, dofs);
252 tcg_gen_addi_ptr(gptr, cpu_env, gofs);
253 t = tcg_const_i32(words);
255 gen_helper_sve_predtest(t, dptr, gptr, t);
256 tcg_temp_free_ptr(dptr);
257 tcg_temp_free_ptr(gptr);
260 tcg_temp_free_i32(t);
263 /* For each element size, the bits within a predicate word that are active. */
264 const uint64_t pred_esz_masks[4] = {
265 0xffffffffffffffffull, 0x5555555555555555ull,
266 0x1111111111111111ull, 0x0101010101010101ull
270 *** SVE Logical - Unpredicated Group
273 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
275 return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
278 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
280 if (a->rn == a->rm) { /* MOV */
281 return do_mov_z(s, a->rd, a->rn);
283 return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
287 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
289 return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
292 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
294 return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
298 *** SVE Integer Arithmetic - Unpredicated Group
301 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
303 return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
306 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
308 return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
311 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
313 return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
316 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
318 return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
321 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
323 return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
326 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
328 return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
332 *** SVE Integer Arithmetic - Binary Predicated Group
335 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
337 unsigned vsz = vec_full_reg_size(s);
341 if (sve_access_check(s)) {
342 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
343 vec_full_reg_offset(s, a->rn),
344 vec_full_reg_offset(s, a->rm),
345 pred_full_reg_offset(s, a->pg),
351 #define DO_ZPZZ(NAME, name) \
352 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a, \
355 static gen_helper_gvec_4 * const fns[4] = { \
356 gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
357 gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
359 return do_zpzz_ool(s, a, fns[a->esz]); \
378 DO_ZPZZ(SMULH, smulh)
379 DO_ZPZZ(UMULH, umulh)
385 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
387 static gen_helper_gvec_4 * const fns[4] = {
388 NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
390 return do_zpzz_ool(s, a, fns[a->esz]);
393 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
395 static gen_helper_gvec_4 * const fns[4] = {
396 NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
398 return do_zpzz_ool(s, a, fns[a->esz]);
406 *** SVE Integer Arithmetic - Unary Predicated Group
409 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
414 if (sve_access_check(s)) {
415 unsigned vsz = vec_full_reg_size(s);
416 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
417 vec_full_reg_offset(s, a->rn),
418 pred_full_reg_offset(s, a->pg),
424 #define DO_ZPZ(NAME, name) \
425 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
427 static gen_helper_gvec_3 * const fns[4] = { \
428 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
429 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
431 return do_zpz_ool(s, a, fns[a->esz]); \
436 DO_ZPZ(CNT_zpz, cnt_zpz)
438 DO_ZPZ(NOT_zpz, not_zpz)
442 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
444 static gen_helper_gvec_3 * const fns[4] = {
446 gen_helper_sve_fabs_h,
447 gen_helper_sve_fabs_s,
448 gen_helper_sve_fabs_d
450 return do_zpz_ool(s, a, fns[a->esz]);
453 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
455 static gen_helper_gvec_3 * const fns[4] = {
457 gen_helper_sve_fneg_h,
458 gen_helper_sve_fneg_s,
459 gen_helper_sve_fneg_d
461 return do_zpz_ool(s, a, fns[a->esz]);
464 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
466 static gen_helper_gvec_3 * const fns[4] = {
468 gen_helper_sve_sxtb_h,
469 gen_helper_sve_sxtb_s,
470 gen_helper_sve_sxtb_d
472 return do_zpz_ool(s, a, fns[a->esz]);
475 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
477 static gen_helper_gvec_3 * const fns[4] = {
479 gen_helper_sve_uxtb_h,
480 gen_helper_sve_uxtb_s,
481 gen_helper_sve_uxtb_d
483 return do_zpz_ool(s, a, fns[a->esz]);
486 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
488 static gen_helper_gvec_3 * const fns[4] = {
490 gen_helper_sve_sxth_s,
491 gen_helper_sve_sxth_d
493 return do_zpz_ool(s, a, fns[a->esz]);
496 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
498 static gen_helper_gvec_3 * const fns[4] = {
500 gen_helper_sve_uxth_s,
501 gen_helper_sve_uxth_d
503 return do_zpz_ool(s, a, fns[a->esz]);
506 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
508 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
511 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
513 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
519 *** SVE Integer Reduction Group
522 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
523 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
524 gen_helper_gvec_reduc *fn)
526 unsigned vsz = vec_full_reg_size(s);
534 if (!sve_access_check(s)) {
538 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
539 temp = tcg_temp_new_i64();
540 t_zn = tcg_temp_new_ptr();
541 t_pg = tcg_temp_new_ptr();
543 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
544 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
545 fn(temp, t_zn, t_pg, desc);
546 tcg_temp_free_ptr(t_zn);
547 tcg_temp_free_ptr(t_pg);
548 tcg_temp_free_i32(desc);
550 write_fp_dreg(s, a->rd, temp);
551 tcg_temp_free_i64(temp);
555 #define DO_VPZ(NAME, name) \
556 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
558 static gen_helper_gvec_reduc * const fns[4] = { \
559 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
560 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
562 return do_vpz_ool(s, a, fns[a->esz]); \
575 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
577 static gen_helper_gvec_reduc * const fns[4] = {
578 gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
579 gen_helper_sve_saddv_s, NULL
581 return do_vpz_ool(s, a, fns[a->esz]);
587 *** SVE Shift by Immediate - Predicated Group
590 /* Store zero into every active element of Zd. We will use this for two
591 * and three-operand predicated instructions for which logic dictates a
594 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
596 static gen_helper_gvec_2 * const fns[4] = {
597 gen_helper_sve_clr_b, gen_helper_sve_clr_h,
598 gen_helper_sve_clr_s, gen_helper_sve_clr_d,
600 if (sve_access_check(s)) {
601 unsigned vsz = vec_full_reg_size(s);
602 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
603 pred_full_reg_offset(s, pg),
604 vsz, vsz, 0, fns[esz]);
609 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
610 gen_helper_gvec_3 *fn)
612 if (sve_access_check(s)) {
613 unsigned vsz = vec_full_reg_size(s);
614 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
615 vec_full_reg_offset(s, a->rn),
616 pred_full_reg_offset(s, a->pg),
617 vsz, vsz, a->imm, fn);
622 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
624 static gen_helper_gvec_3 * const fns[4] = {
625 gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
626 gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
629 /* Invalid tsz encoding -- see tszimm_esz. */
632 /* Shift by element size is architecturally valid. For
633 arithmetic right-shift, it's the same as by one less. */
634 a->imm = MIN(a->imm, (8 << a->esz) - 1);
635 return do_zpzi_ool(s, a, fns[a->esz]);
638 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
640 static gen_helper_gvec_3 * const fns[4] = {
641 gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
642 gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
647 /* Shift by element size is architecturally valid.
648 For logical shifts, it is a zeroing operation. */
649 if (a->imm >= (8 << a->esz)) {
650 return do_clr_zp(s, a->rd, a->pg, a->esz);
652 return do_zpzi_ool(s, a, fns[a->esz]);
656 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
658 static gen_helper_gvec_3 * const fns[4] = {
659 gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
660 gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
665 /* Shift by element size is architecturally valid.
666 For logical shifts, it is a zeroing operation. */
667 if (a->imm >= (8 << a->esz)) {
668 return do_clr_zp(s, a->rd, a->pg, a->esz);
670 return do_zpzi_ool(s, a, fns[a->esz]);
674 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
676 static gen_helper_gvec_3 * const fns[4] = {
677 gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
678 gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
683 /* Shift by element size is architecturally valid. For arithmetic
684 right shift for division, it is a zeroing operation. */
685 if (a->imm >= (8 << a->esz)) {
686 return do_clr_zp(s, a->rd, a->pg, a->esz);
688 return do_zpzi_ool(s, a, fns[a->esz]);
693 *** SVE Bitwise Shift - Predicated Group
696 #define DO_ZPZW(NAME, name) \
697 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a, \
700 static gen_helper_gvec_4 * const fns[3] = { \
701 gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
702 gen_helper_sve_##name##_zpzw_s, \
704 if (a->esz < 0 || a->esz >= 3) { \
707 return do_zpzz_ool(s, a, fns[a->esz]); \
717 *** SVE Bitwise Shift - Unpredicated Group
720 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
721 void (*gvec_fn)(unsigned, uint32_t, uint32_t,
722 int64_t, uint32_t, uint32_t))
725 /* Invalid tsz encoding -- see tszimm_esz. */
728 if (sve_access_check(s)) {
729 unsigned vsz = vec_full_reg_size(s);
730 /* Shift by element size is architecturally valid. For
731 arithmetic right-shift, it's the same as by one less.
732 Otherwise it is a zeroing operation. */
733 if (a->imm >= 8 << a->esz) {
735 a->imm = (8 << a->esz) - 1;
737 do_dupi_z(s, a->rd, 0);
741 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
742 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
747 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
749 return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
752 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
754 return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
757 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
759 return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
762 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
767 if (sve_access_check(s)) {
768 unsigned vsz = vec_full_reg_size(s);
769 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
770 vec_full_reg_offset(s, a->rn),
771 vec_full_reg_offset(s, a->rm),
777 #define DO_ZZW(NAME, name) \
778 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a, \
781 static gen_helper_gvec_3 * const fns[4] = { \
782 gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
783 gen_helper_sve_##name##_zzw_s, NULL \
785 return do_zzw_ool(s, a, fns[a->esz]); \
795 *** SVE Integer Multiply-Add Group
798 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
799 gen_helper_gvec_5 *fn)
801 if (sve_access_check(s)) {
802 unsigned vsz = vec_full_reg_size(s);
803 tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
804 vec_full_reg_offset(s, a->ra),
805 vec_full_reg_offset(s, a->rn),
806 vec_full_reg_offset(s, a->rm),
807 pred_full_reg_offset(s, a->pg),
813 #define DO_ZPZZZ(NAME, name) \
814 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
816 static gen_helper_gvec_5 * const fns[4] = { \
817 gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
818 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
820 return do_zpzzz_ool(s, a, fns[a->esz]); \
829 *** SVE Index Generation Group
832 static void do_index(DisasContext *s, int esz, int rd,
833 TCGv_i64 start, TCGv_i64 incr)
835 unsigned vsz = vec_full_reg_size(s);
836 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
837 TCGv_ptr t_zd = tcg_temp_new_ptr();
839 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
841 gen_helper_sve_index_d(t_zd, start, incr, desc);
843 typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
844 static index_fn * const fns[3] = {
845 gen_helper_sve_index_b,
846 gen_helper_sve_index_h,
847 gen_helper_sve_index_s,
849 TCGv_i32 s32 = tcg_temp_new_i32();
850 TCGv_i32 i32 = tcg_temp_new_i32();
852 tcg_gen_extrl_i64_i32(s32, start);
853 tcg_gen_extrl_i64_i32(i32, incr);
854 fns[esz](t_zd, s32, i32, desc);
856 tcg_temp_free_i32(s32);
857 tcg_temp_free_i32(i32);
859 tcg_temp_free_ptr(t_zd);
860 tcg_temp_free_i32(desc);
863 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
865 if (sve_access_check(s)) {
866 TCGv_i64 start = tcg_const_i64(a->imm1);
867 TCGv_i64 incr = tcg_const_i64(a->imm2);
868 do_index(s, a->esz, a->rd, start, incr);
869 tcg_temp_free_i64(start);
870 tcg_temp_free_i64(incr);
875 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
877 if (sve_access_check(s)) {
878 TCGv_i64 start = tcg_const_i64(a->imm);
879 TCGv_i64 incr = cpu_reg(s, a->rm);
880 do_index(s, a->esz, a->rd, start, incr);
881 tcg_temp_free_i64(start);
886 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
888 if (sve_access_check(s)) {
889 TCGv_i64 start = cpu_reg(s, a->rn);
890 TCGv_i64 incr = tcg_const_i64(a->imm);
891 do_index(s, a->esz, a->rd, start, incr);
892 tcg_temp_free_i64(incr);
897 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
899 if (sve_access_check(s)) {
900 TCGv_i64 start = cpu_reg(s, a->rn);
901 TCGv_i64 incr = cpu_reg(s, a->rm);
902 do_index(s, a->esz, a->rd, start, incr);
908 *** SVE Stack Allocation Group
911 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
913 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
914 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
915 tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
919 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
921 TCGv_i64 rd = cpu_reg_sp(s, a->rd);
922 TCGv_i64 rn = cpu_reg_sp(s, a->rn);
923 tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
927 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
929 TCGv_i64 reg = cpu_reg(s, a->rd);
930 tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
935 *** SVE Compute Vector Address Group
938 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
940 if (sve_access_check(s)) {
941 unsigned vsz = vec_full_reg_size(s);
942 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
943 vec_full_reg_offset(s, a->rn),
944 vec_full_reg_offset(s, a->rm),
945 vsz, vsz, a->imm, fn);
950 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
952 return do_adr(s, a, gen_helper_sve_adr_p32);
955 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
957 return do_adr(s, a, gen_helper_sve_adr_p64);
960 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
962 return do_adr(s, a, gen_helper_sve_adr_s32);
965 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
967 return do_adr(s, a, gen_helper_sve_adr_u32);
971 *** SVE Integer Misc - Unpredicated Group
974 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
976 static gen_helper_gvec_2 * const fns[4] = {
978 gen_helper_sve_fexpa_h,
979 gen_helper_sve_fexpa_s,
980 gen_helper_sve_fexpa_d,
985 if (sve_access_check(s)) {
986 unsigned vsz = vec_full_reg_size(s);
987 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
988 vec_full_reg_offset(s, a->rn),
989 vsz, vsz, 0, fns[a->esz]);
994 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
996 static gen_helper_gvec_3 * const fns[4] = {
998 gen_helper_sve_ftssel_h,
999 gen_helper_sve_ftssel_s,
1000 gen_helper_sve_ftssel_d,
1005 if (sve_access_check(s)) {
1006 unsigned vsz = vec_full_reg_size(s);
1007 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1008 vec_full_reg_offset(s, a->rn),
1009 vec_full_reg_offset(s, a->rm),
1010 vsz, vsz, 0, fns[a->esz]);
1016 *** SVE Predicate Logical Operations Group
1019 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1020 const GVecGen4 *gvec_op)
1022 if (!sve_access_check(s)) {
1026 unsigned psz = pred_gvec_reg_size(s);
1027 int dofs = pred_full_reg_offset(s, a->rd);
1028 int nofs = pred_full_reg_offset(s, a->rn);
1029 int mofs = pred_full_reg_offset(s, a->rm);
1030 int gofs = pred_full_reg_offset(s, a->pg);
1033 /* Do the operation and the flags generation in temps. */
1034 TCGv_i64 pd = tcg_temp_new_i64();
1035 TCGv_i64 pn = tcg_temp_new_i64();
1036 TCGv_i64 pm = tcg_temp_new_i64();
1037 TCGv_i64 pg = tcg_temp_new_i64();
1039 tcg_gen_ld_i64(pn, cpu_env, nofs);
1040 tcg_gen_ld_i64(pm, cpu_env, mofs);
1041 tcg_gen_ld_i64(pg, cpu_env, gofs);
1043 gvec_op->fni8(pd, pn, pm, pg);
1044 tcg_gen_st_i64(pd, cpu_env, dofs);
1046 do_predtest1(pd, pg);
1048 tcg_temp_free_i64(pd);
1049 tcg_temp_free_i64(pn);
1050 tcg_temp_free_i64(pm);
1051 tcg_temp_free_i64(pg);
1053 /* The operation and flags generation is large. The computation
1054 * of the flags depends on the original contents of the guarding
1055 * predicate. If the destination overwrites the guarding predicate,
1056 * then the easiest way to get this right is to save a copy.
1059 if (a->rd == a->pg) {
1060 tofs = offsetof(CPUARMState, vfp.preg_tmp);
1061 tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1064 tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1065 do_predtest(s, dofs, tofs, psz / 8);
1070 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1072 tcg_gen_and_i64(pd, pn, pm);
1073 tcg_gen_and_i64(pd, pd, pg);
1076 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1077 TCGv_vec pm, TCGv_vec pg)
1079 tcg_gen_and_vec(vece, pd, pn, pm);
1080 tcg_gen_and_vec(vece, pd, pd, pg);
1083 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1085 static const GVecGen4 op = {
1086 .fni8 = gen_and_pg_i64,
1087 .fniv = gen_and_pg_vec,
1088 .fno = gen_helper_sve_and_pppp,
1089 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1092 return do_pppp_flags(s, a, &op);
1093 } else if (a->rn == a->rm) {
1094 if (a->pg == a->rn) {
1095 return do_mov_p(s, a->rd, a->rn);
1097 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1099 } else if (a->pg == a->rn || a->pg == a->rm) {
1100 return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1102 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1106 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1108 tcg_gen_andc_i64(pd, pn, pm);
1109 tcg_gen_and_i64(pd, pd, pg);
1112 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1113 TCGv_vec pm, TCGv_vec pg)
1115 tcg_gen_andc_vec(vece, pd, pn, pm);
1116 tcg_gen_and_vec(vece, pd, pd, pg);
1119 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1121 static const GVecGen4 op = {
1122 .fni8 = gen_bic_pg_i64,
1123 .fniv = gen_bic_pg_vec,
1124 .fno = gen_helper_sve_bic_pppp,
1125 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1128 return do_pppp_flags(s, a, &op);
1129 } else if (a->pg == a->rn) {
1130 return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1132 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1136 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1138 tcg_gen_xor_i64(pd, pn, pm);
1139 tcg_gen_and_i64(pd, pd, pg);
1142 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1143 TCGv_vec pm, TCGv_vec pg)
1145 tcg_gen_xor_vec(vece, pd, pn, pm);
1146 tcg_gen_and_vec(vece, pd, pd, pg);
1149 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1151 static const GVecGen4 op = {
1152 .fni8 = gen_eor_pg_i64,
1153 .fniv = gen_eor_pg_vec,
1154 .fno = gen_helper_sve_eor_pppp,
1155 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1158 return do_pppp_flags(s, a, &op);
1160 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1164 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1166 tcg_gen_and_i64(pn, pn, pg);
1167 tcg_gen_andc_i64(pm, pm, pg);
1168 tcg_gen_or_i64(pd, pn, pm);
1171 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1172 TCGv_vec pm, TCGv_vec pg)
1174 tcg_gen_and_vec(vece, pn, pn, pg);
1175 tcg_gen_andc_vec(vece, pm, pm, pg);
1176 tcg_gen_or_vec(vece, pd, pn, pm);
1179 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1181 static const GVecGen4 op = {
1182 .fni8 = gen_sel_pg_i64,
1183 .fniv = gen_sel_pg_vec,
1184 .fno = gen_helper_sve_sel_pppp,
1185 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1190 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1194 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1196 tcg_gen_or_i64(pd, pn, pm);
1197 tcg_gen_and_i64(pd, pd, pg);
1200 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1201 TCGv_vec pm, TCGv_vec pg)
1203 tcg_gen_or_vec(vece, pd, pn, pm);
1204 tcg_gen_and_vec(vece, pd, pd, pg);
1207 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1209 static const GVecGen4 op = {
1210 .fni8 = gen_orr_pg_i64,
1211 .fniv = gen_orr_pg_vec,
1212 .fno = gen_helper_sve_orr_pppp,
1213 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1216 return do_pppp_flags(s, a, &op);
1217 } else if (a->pg == a->rn && a->rn == a->rm) {
1218 return do_mov_p(s, a->rd, a->rn);
1220 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1224 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1226 tcg_gen_orc_i64(pd, pn, pm);
1227 tcg_gen_and_i64(pd, pd, pg);
1230 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1231 TCGv_vec pm, TCGv_vec pg)
1233 tcg_gen_orc_vec(vece, pd, pn, pm);
1234 tcg_gen_and_vec(vece, pd, pd, pg);
1237 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1239 static const GVecGen4 op = {
1240 .fni8 = gen_orn_pg_i64,
1241 .fniv = gen_orn_pg_vec,
1242 .fno = gen_helper_sve_orn_pppp,
1243 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1246 return do_pppp_flags(s, a, &op);
1248 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1252 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1254 tcg_gen_or_i64(pd, pn, pm);
1255 tcg_gen_andc_i64(pd, pg, pd);
1258 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1259 TCGv_vec pm, TCGv_vec pg)
1261 tcg_gen_or_vec(vece, pd, pn, pm);
1262 tcg_gen_andc_vec(vece, pd, pg, pd);
1265 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1267 static const GVecGen4 op = {
1268 .fni8 = gen_nor_pg_i64,
1269 .fniv = gen_nor_pg_vec,
1270 .fno = gen_helper_sve_nor_pppp,
1271 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1274 return do_pppp_flags(s, a, &op);
1276 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1280 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1282 tcg_gen_and_i64(pd, pn, pm);
1283 tcg_gen_andc_i64(pd, pg, pd);
1286 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1287 TCGv_vec pm, TCGv_vec pg)
1289 tcg_gen_and_vec(vece, pd, pn, pm);
1290 tcg_gen_andc_vec(vece, pd, pg, pd);
1293 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1295 static const GVecGen4 op = {
1296 .fni8 = gen_nand_pg_i64,
1297 .fniv = gen_nand_pg_vec,
1298 .fno = gen_helper_sve_nand_pppp,
1299 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1302 return do_pppp_flags(s, a, &op);
1304 return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1309 *** SVE Predicate Misc Group
1312 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1314 if (sve_access_check(s)) {
1315 int nofs = pred_full_reg_offset(s, a->rn);
1316 int gofs = pred_full_reg_offset(s, a->pg);
1317 int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1320 TCGv_i64 pn = tcg_temp_new_i64();
1321 TCGv_i64 pg = tcg_temp_new_i64();
1323 tcg_gen_ld_i64(pn, cpu_env, nofs);
1324 tcg_gen_ld_i64(pg, cpu_env, gofs);
1325 do_predtest1(pn, pg);
1327 tcg_temp_free_i64(pn);
1328 tcg_temp_free_i64(pg);
1330 do_predtest(s, nofs, gofs, words);
1336 /* See the ARM pseudocode DecodePredCount. */
1337 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1339 unsigned elements = fullsz >> esz;
1343 case 0x0: /* POW2 */
1344 return pow2floor(elements);
1355 case 0x9: /* VL16 */
1356 case 0xa: /* VL32 */
1357 case 0xb: /* VL64 */
1358 case 0xc: /* VL128 */
1359 case 0xd: /* VL256 */
1360 bound = 16 << (pattern - 9);
1362 case 0x1d: /* MUL4 */
1363 return elements - elements % 4;
1364 case 0x1e: /* MUL3 */
1365 return elements - elements % 3;
1366 case 0x1f: /* ALL */
1368 default: /* #uimm5 */
1371 return elements >= bound ? bound : 0;
1374 /* This handles all of the predicate initialization instructions,
1375 * PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1376 * so that decode_pred_count returns 0. For SETFFR, we will have
1377 * set RD == 16 == FFR.
1379 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1381 if (!sve_access_check(s)) {
1385 unsigned fullsz = vec_full_reg_size(s);
1386 unsigned ofs = pred_full_reg_offset(s, rd);
1387 unsigned numelem, setsz, i;
1388 uint64_t word, lastword;
1391 numelem = decode_pred_count(fullsz, pat, esz);
1393 /* Determine what we must store into each bit, and how many. */
1395 lastword = word = 0;
1398 setsz = numelem << esz;
1399 lastword = word = pred_esz_masks[esz];
1401 lastword &= ~(-1ull << (setsz % 64));
1405 t = tcg_temp_new_i64();
1407 tcg_gen_movi_i64(t, lastword);
1408 tcg_gen_st_i64(t, cpu_env, ofs);
1412 if (word == lastword) {
1413 unsigned maxsz = size_for_gvec(fullsz / 8);
1414 unsigned oprsz = size_for_gvec(setsz / 8);
1416 if (oprsz * 8 == setsz) {
1417 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1420 if (oprsz * 8 == setsz + 8) {
1421 tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1422 tcg_gen_movi_i64(t, 0);
1423 tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1431 tcg_gen_movi_i64(t, word);
1432 for (i = 0; i < setsz; i += 8) {
1433 tcg_gen_st_i64(t, cpu_env, ofs + i);
1435 if (lastword != word) {
1436 tcg_gen_movi_i64(t, lastword);
1437 tcg_gen_st_i64(t, cpu_env, ofs + i);
1441 tcg_gen_movi_i64(t, 0);
1442 for (; i < fullsz; i += 8) {
1443 tcg_gen_st_i64(t, cpu_env, ofs + i);
1448 tcg_temp_free_i64(t);
1452 tcg_gen_movi_i32(cpu_NF, -(word != 0));
1453 tcg_gen_movi_i32(cpu_CF, word == 0);
1454 tcg_gen_movi_i32(cpu_VF, 0);
1455 tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1460 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1462 return do_predset(s, a->esz, a->rd, a->pat, a->s);
1465 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1467 /* Note pat == 31 is #all, to set all elements. */
1468 return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1471 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1473 /* Note pat == 32 is #unimp, to set no elements. */
1474 return do_predset(s, 0, a->rd, 32, false);
1477 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1479 /* The path through do_pppp_flags is complicated enough to want to avoid
1480 * duplication. Frob the arguments into the form of a predicated AND.
1482 arg_rprr_s alt_a = {
1483 .rd = a->rd, .pg = a->pg, .s = a->s,
1484 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1486 return trans_AND_pppp(s, &alt_a, insn);
1489 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1491 return do_mov_p(s, a->rd, FFR_PRED_NUM);
1494 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1496 return do_mov_p(s, FFR_PRED_NUM, a->rn);
1499 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1500 void (*gen_fn)(TCGv_i32, TCGv_ptr,
1501 TCGv_ptr, TCGv_i32))
1503 if (!sve_access_check(s)) {
1507 TCGv_ptr t_pd = tcg_temp_new_ptr();
1508 TCGv_ptr t_pg = tcg_temp_new_ptr();
1512 desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1513 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1515 tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1516 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1517 t = tcg_const_i32(desc);
1519 gen_fn(t, t_pd, t_pg, t);
1520 tcg_temp_free_ptr(t_pd);
1521 tcg_temp_free_ptr(t_pg);
1524 tcg_temp_free_i32(t);
1528 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1530 return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1533 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1535 return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1539 *** SVE Element Count Group
1542 /* Perform an inline saturating addition of a 32-bit value within
1543 * a 64-bit register. The second operand is known to be positive,
1544 * which halves the comparisions we must perform to bound the result.
1546 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1552 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1554 tcg_gen_ext32u_i64(reg, reg);
1556 tcg_gen_ext32s_i64(reg, reg);
1559 tcg_gen_sub_i64(reg, reg, val);
1560 ibound = (u ? 0 : INT32_MIN);
1563 tcg_gen_add_i64(reg, reg, val);
1564 ibound = (u ? UINT32_MAX : INT32_MAX);
1567 bound = tcg_const_i64(ibound);
1568 tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1569 tcg_temp_free_i64(bound);
1572 /* Similarly with 64-bit values. */
1573 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1575 TCGv_i64 t0 = tcg_temp_new_i64();
1576 TCGv_i64 t1 = tcg_temp_new_i64();
1581 tcg_gen_sub_i64(t0, reg, val);
1582 tcg_gen_movi_i64(t1, 0);
1583 tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1585 tcg_gen_add_i64(t0, reg, val);
1586 tcg_gen_movi_i64(t1, -1);
1587 tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1591 /* Detect signed overflow for subtraction. */
1592 tcg_gen_xor_i64(t0, reg, val);
1593 tcg_gen_sub_i64(t1, reg, val);
1594 tcg_gen_xor_i64(reg, reg, t0);
1595 tcg_gen_and_i64(t0, t0, reg);
1597 /* Bound the result. */
1598 tcg_gen_movi_i64(reg, INT64_MIN);
1599 t2 = tcg_const_i64(0);
1600 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1602 /* Detect signed overflow for addition. */
1603 tcg_gen_xor_i64(t0, reg, val);
1604 tcg_gen_add_i64(reg, reg, val);
1605 tcg_gen_xor_i64(t1, reg, val);
1606 tcg_gen_andc_i64(t0, t1, t0);
1608 /* Bound the result. */
1609 tcg_gen_movi_i64(t1, INT64_MAX);
1610 t2 = tcg_const_i64(0);
1611 tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1613 tcg_temp_free_i64(t2);
1615 tcg_temp_free_i64(t0);
1616 tcg_temp_free_i64(t1);
1619 /* Similarly with a vector and a scalar operand. */
1620 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1621 TCGv_i64 val, bool u, bool d)
1623 unsigned vsz = vec_full_reg_size(s);
1624 TCGv_ptr dptr, nptr;
1628 dptr = tcg_temp_new_ptr();
1629 nptr = tcg_temp_new_ptr();
1630 tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1631 tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1632 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1636 t32 = tcg_temp_new_i32();
1637 tcg_gen_extrl_i64_i32(t32, val);
1639 tcg_gen_neg_i32(t32, t32);
1642 gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1644 gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1646 tcg_temp_free_i32(t32);
1650 t32 = tcg_temp_new_i32();
1651 tcg_gen_extrl_i64_i32(t32, val);
1653 tcg_gen_neg_i32(t32, t32);
1656 gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1658 gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1660 tcg_temp_free_i32(t32);
1664 t64 = tcg_temp_new_i64();
1666 tcg_gen_neg_i64(t64, val);
1668 tcg_gen_mov_i64(t64, val);
1671 gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1673 gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1675 tcg_temp_free_i64(t64);
1681 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1683 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1686 t64 = tcg_temp_new_i64();
1687 tcg_gen_neg_i64(t64, val);
1688 gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1689 tcg_temp_free_i64(t64);
1691 gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1696 g_assert_not_reached();
1699 tcg_temp_free_ptr(dptr);
1700 tcg_temp_free_ptr(nptr);
1701 tcg_temp_free_i32(desc);
1704 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1706 if (sve_access_check(s)) {
1707 unsigned fullsz = vec_full_reg_size(s);
1708 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1709 tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1714 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1716 if (sve_access_check(s)) {
1717 unsigned fullsz = vec_full_reg_size(s);
1718 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1719 int inc = numelem * a->imm * (a->d ? -1 : 1);
1720 TCGv_i64 reg = cpu_reg(s, a->rd);
1722 tcg_gen_addi_i64(reg, reg, inc);
1727 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1730 if (!sve_access_check(s)) {
1734 unsigned fullsz = vec_full_reg_size(s);
1735 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1736 int inc = numelem * a->imm;
1737 TCGv_i64 reg = cpu_reg(s, a->rd);
1739 /* Use normal 64-bit arithmetic to detect 32-bit overflow. */
1742 tcg_gen_ext32u_i64(reg, reg);
1744 tcg_gen_ext32s_i64(reg, reg);
1747 TCGv_i64 t = tcg_const_i64(inc);
1748 do_sat_addsub_32(reg, t, a->u, a->d);
1749 tcg_temp_free_i64(t);
1754 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1757 if (!sve_access_check(s)) {
1761 unsigned fullsz = vec_full_reg_size(s);
1762 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1763 int inc = numelem * a->imm;
1764 TCGv_i64 reg = cpu_reg(s, a->rd);
1767 TCGv_i64 t = tcg_const_i64(inc);
1768 do_sat_addsub_64(reg, t, a->u, a->d);
1769 tcg_temp_free_i64(t);
1774 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1780 unsigned fullsz = vec_full_reg_size(s);
1781 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1782 int inc = numelem * a->imm;
1785 if (sve_access_check(s)) {
1786 TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1787 tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1788 vec_full_reg_offset(s, a->rn),
1790 tcg_temp_free_i64(t);
1793 do_mov_z(s, a->rd, a->rn);
1798 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1805 unsigned fullsz = vec_full_reg_size(s);
1806 unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1807 int inc = numelem * a->imm;
1810 if (sve_access_check(s)) {
1811 TCGv_i64 t = tcg_const_i64(inc);
1812 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1813 tcg_temp_free_i64(t);
1816 do_mov_z(s, a->rd, a->rn);
1822 *** SVE Bitwise Immediate Group
1825 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1828 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1829 extract32(a->dbm, 0, 6),
1830 extract32(a->dbm, 6, 6))) {
1833 if (sve_access_check(s)) {
1834 unsigned vsz = vec_full_reg_size(s);
1835 gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1836 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1841 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1843 return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1846 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1848 return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1851 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1853 return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1856 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1859 if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1860 extract32(a->dbm, 0, 6),
1861 extract32(a->dbm, 6, 6))) {
1864 if (sve_access_check(s)) {
1865 do_dupi_z(s, a->rd, imm);
1871 *** SVE Integer Wide Immediate - Predicated Group
1874 /* Implement all merging copies. This is used for CPY (immediate),
1875 * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1877 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1880 typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1881 static gen_cpy * const fns[4] = {
1882 gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1883 gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1885 unsigned vsz = vec_full_reg_size(s);
1886 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1887 TCGv_ptr t_zd = tcg_temp_new_ptr();
1888 TCGv_ptr t_zn = tcg_temp_new_ptr();
1889 TCGv_ptr t_pg = tcg_temp_new_ptr();
1891 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1892 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1893 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1895 fns[esz](t_zd, t_zn, t_pg, val, desc);
1897 tcg_temp_free_ptr(t_zd);
1898 tcg_temp_free_ptr(t_zn);
1899 tcg_temp_free_ptr(t_pg);
1900 tcg_temp_free_i32(desc);
1903 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1908 if (sve_access_check(s)) {
1909 /* Decode the VFP immediate. */
1910 uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1911 TCGv_i64 t_imm = tcg_const_i64(imm);
1912 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1913 tcg_temp_free_i64(t_imm);
1918 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1920 if (a->esz == 0 && extract32(insn, 13, 1)) {
1923 if (sve_access_check(s)) {
1924 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1925 do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1926 tcg_temp_free_i64(t_imm);
1931 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1933 static gen_helper_gvec_2i * const fns[4] = {
1934 gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1935 gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1938 if (a->esz == 0 && extract32(insn, 13, 1)) {
1941 if (sve_access_check(s)) {
1942 unsigned vsz = vec_full_reg_size(s);
1943 TCGv_i64 t_imm = tcg_const_i64(a->imm);
1944 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1945 pred_full_reg_offset(s, a->pg),
1946 t_imm, vsz, vsz, 0, fns[a->esz]);
1947 tcg_temp_free_i64(t_imm);
1953 *** SVE Permute Extract Group
1956 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1958 if (!sve_access_check(s)) {
1962 unsigned vsz = vec_full_reg_size(s);
1963 unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1964 unsigned n_siz = vsz - n_ofs;
1965 unsigned d = vec_full_reg_offset(s, a->rd);
1966 unsigned n = vec_full_reg_offset(s, a->rn);
1967 unsigned m = vec_full_reg_offset(s, a->rm);
1969 /* Use host vector move insns if we have appropriate sizes
1970 * and no unfortunate overlap.
1973 && n_ofs == size_for_gvec(n_ofs)
1974 && n_siz == size_for_gvec(n_siz)
1975 && (d != n || n_siz <= n_ofs)) {
1976 tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1978 tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1981 tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1987 *** SVE Permute - Unpredicated Group
1990 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1992 if (sve_access_check(s)) {
1993 unsigned vsz = vec_full_reg_size(s);
1994 tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1995 vsz, vsz, cpu_reg_sp(s, a->rn));
2000 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2002 if ((a->imm & 0x1f) == 0) {
2005 if (sve_access_check(s)) {
2006 unsigned vsz = vec_full_reg_size(s);
2007 unsigned dofs = vec_full_reg_offset(s, a->rd);
2008 unsigned esz, index;
2010 esz = ctz32(a->imm);
2011 index = a->imm >> (esz + 1);
2013 if ((index << esz) < vsz) {
2014 unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2015 tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2017 tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2023 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2025 typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2026 static gen_insr * const fns[4] = {
2027 gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2028 gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2030 unsigned vsz = vec_full_reg_size(s);
2031 TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2032 TCGv_ptr t_zd = tcg_temp_new_ptr();
2033 TCGv_ptr t_zn = tcg_temp_new_ptr();
2035 tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2036 tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2038 fns[a->esz](t_zd, t_zn, val, desc);
2040 tcg_temp_free_ptr(t_zd);
2041 tcg_temp_free_ptr(t_zn);
2042 tcg_temp_free_i32(desc);
2045 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2047 if (sve_access_check(s)) {
2048 TCGv_i64 t = tcg_temp_new_i64();
2049 tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2050 do_insr_i64(s, a, t);
2051 tcg_temp_free_i64(t);
2056 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2058 if (sve_access_check(s)) {
2059 do_insr_i64(s, a, cpu_reg(s, a->rm));
2064 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2066 static gen_helper_gvec_2 * const fns[4] = {
2067 gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2068 gen_helper_sve_rev_s, gen_helper_sve_rev_d
2071 if (sve_access_check(s)) {
2072 unsigned vsz = vec_full_reg_size(s);
2073 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2074 vec_full_reg_offset(s, a->rn),
2075 vsz, vsz, 0, fns[a->esz]);
2080 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2082 static gen_helper_gvec_3 * const fns[4] = {
2083 gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2084 gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2087 if (sve_access_check(s)) {
2088 unsigned vsz = vec_full_reg_size(s);
2089 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2090 vec_full_reg_offset(s, a->rn),
2091 vec_full_reg_offset(s, a->rm),
2092 vsz, vsz, 0, fns[a->esz]);
2097 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2099 static gen_helper_gvec_2 * const fns[4][2] = {
2101 { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2102 { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2103 { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2109 if (sve_access_check(s)) {
2110 unsigned vsz = vec_full_reg_size(s);
2111 tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2112 vec_full_reg_offset(s, a->rn)
2113 + (a->h ? vsz / 2 : 0),
2114 vsz, vsz, 0, fns[a->esz][a->u]);
2120 *** SVE Permute - Predicates Group
2123 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2124 gen_helper_gvec_3 *fn)
2126 if (!sve_access_check(s)) {
2130 unsigned vsz = pred_full_reg_size(s);
2132 /* Predicate sizes may be smaller and cannot use simd_desc.
2133 We cannot round up, as we do elsewhere, because we need
2134 the exact size for ZIP2 and REV. We retain the style for
2135 the other helpers for consistency. */
2136 TCGv_ptr t_d = tcg_temp_new_ptr();
2137 TCGv_ptr t_n = tcg_temp_new_ptr();
2138 TCGv_ptr t_m = tcg_temp_new_ptr();
2143 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2144 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2146 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2147 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2148 tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2149 t_desc = tcg_const_i32(desc);
2151 fn(t_d, t_n, t_m, t_desc);
2153 tcg_temp_free_ptr(t_d);
2154 tcg_temp_free_ptr(t_n);
2155 tcg_temp_free_ptr(t_m);
2156 tcg_temp_free_i32(t_desc);
2160 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2161 gen_helper_gvec_2 *fn)
2163 if (!sve_access_check(s)) {
2167 unsigned vsz = pred_full_reg_size(s);
2168 TCGv_ptr t_d = tcg_temp_new_ptr();
2169 TCGv_ptr t_n = tcg_temp_new_ptr();
2173 tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2174 tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2176 /* Predicate sizes may be smaller and cannot use simd_desc.
2177 We cannot round up, as we do elsewhere, because we need
2178 the exact size for ZIP2 and REV. We retain the style for
2179 the other helpers for consistency. */
2182 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2183 desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2184 t_desc = tcg_const_i32(desc);
2186 fn(t_d, t_n, t_desc);
2188 tcg_temp_free_i32(t_desc);
2189 tcg_temp_free_ptr(t_d);
2190 tcg_temp_free_ptr(t_n);
2194 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2196 return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2199 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2201 return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2204 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2206 return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2209 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2211 return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2214 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2216 return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2219 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2221 return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2224 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2226 return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2229 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2231 return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2234 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2236 return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2240 *** SVE Permute - Interleaving Group
2243 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2245 static gen_helper_gvec_3 * const fns[4] = {
2246 gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2247 gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2250 if (sve_access_check(s)) {
2251 unsigned vsz = vec_full_reg_size(s);
2252 unsigned high_ofs = high ? vsz / 2 : 0;
2253 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2254 vec_full_reg_offset(s, a->rn) + high_ofs,
2255 vec_full_reg_offset(s, a->rm) + high_ofs,
2256 vsz, vsz, 0, fns[a->esz]);
2261 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2262 gen_helper_gvec_3 *fn)
2264 if (sve_access_check(s)) {
2265 unsigned vsz = vec_full_reg_size(s);
2266 tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2267 vec_full_reg_offset(s, a->rn),
2268 vec_full_reg_offset(s, a->rm),
2269 vsz, vsz, data, fn);
2274 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2276 return do_zip(s, a, false);
2279 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2281 return do_zip(s, a, true);
2284 static gen_helper_gvec_3 * const uzp_fns[4] = {
2285 gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2286 gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2289 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2291 return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2294 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2296 return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2299 static gen_helper_gvec_3 * const trn_fns[4] = {
2300 gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2301 gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2304 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2306 return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2309 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2311 return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2315 *** SVE Permute Vector - Predicated Group
2318 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2320 static gen_helper_gvec_3 * const fns[4] = {
2321 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2323 return do_zpz_ool(s, a, fns[a->esz]);
2326 /* Call the helper that computes the ARM LastActiveElement pseudocode
2327 * function, scaled by the element size. This includes the not found
2328 * indication; e.g. not found for esz=3 is -8.
2330 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2332 /* Predicate sizes may be smaller and cannot use simd_desc. We cannot
2333 * round up, as we do elsewhere, because we need the exact size.
2335 TCGv_ptr t_p = tcg_temp_new_ptr();
2337 unsigned vsz = pred_full_reg_size(s);
2341 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2343 tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2344 t_desc = tcg_const_i32(desc);
2346 gen_helper_sve_last_active_element(ret, t_p, t_desc);
2348 tcg_temp_free_i32(t_desc);
2349 tcg_temp_free_ptr(t_p);
2352 /* Increment LAST to the offset of the next element in the vector,
2353 * wrapping around to 0.
2355 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2357 unsigned vsz = vec_full_reg_size(s);
2359 tcg_gen_addi_i32(last, last, 1 << esz);
2360 if (is_power_of_2(vsz)) {
2361 tcg_gen_andi_i32(last, last, vsz - 1);
2363 TCGv_i32 max = tcg_const_i32(vsz);
2364 TCGv_i32 zero = tcg_const_i32(0);
2365 tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2366 tcg_temp_free_i32(max);
2367 tcg_temp_free_i32(zero);
2371 /* If LAST < 0, set LAST to the offset of the last element in the vector. */
2372 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2374 unsigned vsz = vec_full_reg_size(s);
2376 if (is_power_of_2(vsz)) {
2377 tcg_gen_andi_i32(last, last, vsz - 1);
2379 TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2380 TCGv_i32 zero = tcg_const_i32(0);
2381 tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2382 tcg_temp_free_i32(max);
2383 tcg_temp_free_i32(zero);
2387 /* Load an unsigned element of ESZ from BASE+OFS. */
2388 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2390 TCGv_i64 r = tcg_temp_new_i64();
2394 tcg_gen_ld8u_i64(r, base, ofs);
2397 tcg_gen_ld16u_i64(r, base, ofs);
2400 tcg_gen_ld32u_i64(r, base, ofs);
2403 tcg_gen_ld_i64(r, base, ofs);
2406 g_assert_not_reached();
2411 /* Load an unsigned element of ESZ from RM[LAST]. */
2412 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2415 TCGv_ptr p = tcg_temp_new_ptr();
2418 /* Convert offset into vector into offset into ENV.
2419 * The final adjustment for the vector register base
2420 * is added via constant offset to the load.
2422 #ifdef HOST_WORDS_BIGENDIAN
2423 /* Adjust for element ordering. See vec_reg_offset. */
2425 tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2428 tcg_gen_ext_i32_ptr(p, last);
2429 tcg_gen_add_ptr(p, p, cpu_env);
2431 r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2432 tcg_temp_free_ptr(p);
2437 /* Compute CLAST for a Zreg. */
2438 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2443 unsigned vsz, esz = a->esz;
2445 if (!sve_access_check(s)) {
2449 last = tcg_temp_local_new_i32();
2450 over = gen_new_label();
2452 find_last_active(s, last, esz, a->pg);
2454 /* There is of course no movcond for a 2048-bit vector,
2455 * so we must branch over the actual store.
2457 tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2460 incr_last_active(s, last, esz);
2463 ele = load_last_active(s, last, a->rm, esz);
2464 tcg_temp_free_i32(last);
2466 vsz = vec_full_reg_size(s);
2467 tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2468 tcg_temp_free_i64(ele);
2470 /* If this insn used MOVPRFX, we may need a second move. */
2471 if (a->rd != a->rn) {
2472 TCGLabel *done = gen_new_label();
2475 gen_set_label(over);
2476 do_mov_z(s, a->rd, a->rn);
2478 gen_set_label(done);
2480 gen_set_label(over);
2485 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2487 return do_clast_vector(s, a, false);
2490 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2492 return do_clast_vector(s, a, true);
2495 /* Compute CLAST for a scalar. */
2496 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2497 bool before, TCGv_i64 reg_val)
2499 TCGv_i32 last = tcg_temp_new_i32();
2500 TCGv_i64 ele, cmp, zero;
2502 find_last_active(s, last, esz, pg);
2504 /* Extend the original value of last prior to incrementing. */
2505 cmp = tcg_temp_new_i64();
2506 tcg_gen_ext_i32_i64(cmp, last);
2509 incr_last_active(s, last, esz);
2512 /* The conceit here is that while last < 0 indicates not found, after
2513 * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2514 * from which we can load garbage. We then discard the garbage with
2515 * a conditional move.
2517 ele = load_last_active(s, last, rm, esz);
2518 tcg_temp_free_i32(last);
2520 zero = tcg_const_i64(0);
2521 tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2523 tcg_temp_free_i64(zero);
2524 tcg_temp_free_i64(cmp);
2525 tcg_temp_free_i64(ele);
2528 /* Compute CLAST for a Vreg. */
2529 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2531 if (sve_access_check(s)) {
2533 int ofs = vec_reg_offset(s, a->rd, 0, esz);
2534 TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2536 do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2537 write_fp_dreg(s, a->rd, reg);
2538 tcg_temp_free_i64(reg);
2543 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2545 return do_clast_fp(s, a, false);
2548 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2550 return do_clast_fp(s, a, true);
2553 /* Compute CLAST for a Xreg. */
2554 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2558 if (!sve_access_check(s)) {
2562 reg = cpu_reg(s, a->rd);
2565 tcg_gen_ext8u_i64(reg, reg);
2568 tcg_gen_ext16u_i64(reg, reg);
2571 tcg_gen_ext32u_i64(reg, reg);
2576 g_assert_not_reached();
2579 do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2583 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2585 return do_clast_general(s, a, false);
2588 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2590 return do_clast_general(s, a, true);
2593 /* Compute LAST for a scalar. */
2594 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2595 int pg, int rm, bool before)
2597 TCGv_i32 last = tcg_temp_new_i32();
2600 find_last_active(s, last, esz, pg);
2602 wrap_last_active(s, last, esz);
2604 incr_last_active(s, last, esz);
2607 ret = load_last_active(s, last, rm, esz);
2608 tcg_temp_free_i32(last);
2612 /* Compute LAST for a Vreg. */
2613 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2615 if (sve_access_check(s)) {
2616 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2617 write_fp_dreg(s, a->rd, val);
2618 tcg_temp_free_i64(val);
2623 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2625 return do_last_fp(s, a, false);
2628 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2630 return do_last_fp(s, a, true);
2633 /* Compute LAST for a Xreg. */
2634 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2636 if (sve_access_check(s)) {
2637 TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2638 tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2639 tcg_temp_free_i64(val);
2644 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2646 return do_last_general(s, a, false);
2649 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2651 return do_last_general(s, a, true);
2654 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2656 if (sve_access_check(s)) {
2657 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2662 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2664 if (sve_access_check(s)) {
2665 int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2666 TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2667 do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2668 tcg_temp_free_i64(t);
2673 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2675 static gen_helper_gvec_3 * const fns[4] = {
2677 gen_helper_sve_revb_h,
2678 gen_helper_sve_revb_s,
2679 gen_helper_sve_revb_d,
2681 return do_zpz_ool(s, a, fns[a->esz]);
2684 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2686 static gen_helper_gvec_3 * const fns[4] = {
2689 gen_helper_sve_revh_s,
2690 gen_helper_sve_revh_d,
2692 return do_zpz_ool(s, a, fns[a->esz]);
2695 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2697 return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2700 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2702 static gen_helper_gvec_3 * const fns[4] = {
2703 gen_helper_sve_rbit_b,
2704 gen_helper_sve_rbit_h,
2705 gen_helper_sve_rbit_s,
2706 gen_helper_sve_rbit_d,
2708 return do_zpz_ool(s, a, fns[a->esz]);
2711 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2713 if (sve_access_check(s)) {
2714 unsigned vsz = vec_full_reg_size(s);
2715 tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2716 vec_full_reg_offset(s, a->rn),
2717 vec_full_reg_offset(s, a->rm),
2718 pred_full_reg_offset(s, a->pg),
2719 vsz, vsz, a->esz, gen_helper_sve_splice);
2725 *** SVE Integer Compare - Vectors Group
2728 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2729 gen_helper_gvec_flags_4 *gen_fn)
2731 TCGv_ptr pd, zn, zm, pg;
2735 if (gen_fn == NULL) {
2738 if (!sve_access_check(s)) {
2742 vsz = vec_full_reg_size(s);
2743 t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2744 pd = tcg_temp_new_ptr();
2745 zn = tcg_temp_new_ptr();
2746 zm = tcg_temp_new_ptr();
2747 pg = tcg_temp_new_ptr();
2749 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2750 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2751 tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2752 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2754 gen_fn(t, pd, zn, zm, pg, t);
2756 tcg_temp_free_ptr(pd);
2757 tcg_temp_free_ptr(zn);
2758 tcg_temp_free_ptr(zm);
2759 tcg_temp_free_ptr(pg);
2763 tcg_temp_free_i32(t);
2767 #define DO_PPZZ(NAME, name) \
2768 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a, \
2771 static gen_helper_gvec_flags_4 * const fns[4] = { \
2772 gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2773 gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2775 return do_ppzz_flags(s, a, fns[a->esz]); \
2778 DO_PPZZ(CMPEQ, cmpeq)
2779 DO_PPZZ(CMPNE, cmpne)
2780 DO_PPZZ(CMPGT, cmpgt)
2781 DO_PPZZ(CMPGE, cmpge)
2782 DO_PPZZ(CMPHI, cmphi)
2783 DO_PPZZ(CMPHS, cmphs)
2787 #define DO_PPZW(NAME, name) \
2788 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a, \
2791 static gen_helper_gvec_flags_4 * const fns[4] = { \
2792 gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2793 gen_helper_sve_##name##_ppzw_s, NULL \
2795 return do_ppzz_flags(s, a, fns[a->esz]); \
2798 DO_PPZW(CMPEQ, cmpeq)
2799 DO_PPZW(CMPNE, cmpne)
2800 DO_PPZW(CMPGT, cmpgt)
2801 DO_PPZW(CMPGE, cmpge)
2802 DO_PPZW(CMPHI, cmphi)
2803 DO_PPZW(CMPHS, cmphs)
2804 DO_PPZW(CMPLT, cmplt)
2805 DO_PPZW(CMPLE, cmple)
2806 DO_PPZW(CMPLO, cmplo)
2807 DO_PPZW(CMPLS, cmpls)
2812 *** SVE Integer Compare - Immediate Groups
2815 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2816 gen_helper_gvec_flags_3 *gen_fn)
2818 TCGv_ptr pd, zn, pg;
2822 if (gen_fn == NULL) {
2825 if (!sve_access_check(s)) {
2829 vsz = vec_full_reg_size(s);
2830 t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2831 pd = tcg_temp_new_ptr();
2832 zn = tcg_temp_new_ptr();
2833 pg = tcg_temp_new_ptr();
2835 tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2836 tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2837 tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2839 gen_fn(t, pd, zn, pg, t);
2841 tcg_temp_free_ptr(pd);
2842 tcg_temp_free_ptr(zn);
2843 tcg_temp_free_ptr(pg);
2847 tcg_temp_free_i32(t);
2851 #define DO_PPZI(NAME, name) \
2852 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a, \
2855 static gen_helper_gvec_flags_3 * const fns[4] = { \
2856 gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2857 gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2859 return do_ppzi_flags(s, a, fns[a->esz]); \
2862 DO_PPZI(CMPEQ, cmpeq)
2863 DO_PPZI(CMPNE, cmpne)
2864 DO_PPZI(CMPGT, cmpgt)
2865 DO_PPZI(CMPGE, cmpge)
2866 DO_PPZI(CMPHI, cmphi)
2867 DO_PPZI(CMPHS, cmphs)
2868 DO_PPZI(CMPLT, cmplt)
2869 DO_PPZI(CMPLE, cmple)
2870 DO_PPZI(CMPLO, cmplo)
2871 DO_PPZI(CMPLS, cmpls)
2876 *** SVE Partition Break Group
2879 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2880 gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2882 if (!sve_access_check(s)) {
2886 unsigned vsz = pred_full_reg_size(s);
2888 /* Predicate sizes may be smaller and cannot use simd_desc. */
2889 TCGv_ptr d = tcg_temp_new_ptr();
2890 TCGv_ptr n = tcg_temp_new_ptr();
2891 TCGv_ptr m = tcg_temp_new_ptr();
2892 TCGv_ptr g = tcg_temp_new_ptr();
2893 TCGv_i32 t = tcg_const_i32(vsz - 2);
2895 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2896 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2897 tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2898 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2901 fn_s(t, d, n, m, g, t);
2906 tcg_temp_free_ptr(d);
2907 tcg_temp_free_ptr(n);
2908 tcg_temp_free_ptr(m);
2909 tcg_temp_free_ptr(g);
2910 tcg_temp_free_i32(t);
2914 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2915 gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2917 if (!sve_access_check(s)) {
2921 unsigned vsz = pred_full_reg_size(s);
2923 /* Predicate sizes may be smaller and cannot use simd_desc. */
2924 TCGv_ptr d = tcg_temp_new_ptr();
2925 TCGv_ptr n = tcg_temp_new_ptr();
2926 TCGv_ptr g = tcg_temp_new_ptr();
2927 TCGv_i32 t = tcg_const_i32(vsz - 2);
2929 tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2930 tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2931 tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2934 fn_s(t, d, n, g, t);
2939 tcg_temp_free_ptr(d);
2940 tcg_temp_free_ptr(n);
2941 tcg_temp_free_ptr(g);
2942 tcg_temp_free_i32(t);
2946 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2948 return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2951 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2953 return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2956 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2958 return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2961 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2963 return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2966 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2968 return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2971 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2973 return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2976 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2978 return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2982 *** SVE Predicate Count Group
2985 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2987 unsigned psz = pred_full_reg_size(s);
2992 tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2994 TCGv_i64 g = tcg_temp_new_i64();
2995 tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2996 tcg_gen_and_i64(val, val, g);
2997 tcg_temp_free_i64(g);
3000 /* Reduce the pred_esz_masks value simply to reduce the
3001 * size of the code generated here.
3003 psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3004 tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3006 tcg_gen_ctpop_i64(val, val);
3008 TCGv_ptr t_pn = tcg_temp_new_ptr();
3009 TCGv_ptr t_pg = tcg_temp_new_ptr();
3014 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3016 tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3017 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3018 t_desc = tcg_const_i32(desc);
3020 gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3021 tcg_temp_free_ptr(t_pn);
3022 tcg_temp_free_ptr(t_pg);
3023 tcg_temp_free_i32(t_desc);
3027 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3029 if (sve_access_check(s)) {
3030 do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3035 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3038 if (sve_access_check(s)) {
3039 TCGv_i64 reg = cpu_reg(s, a->rd);
3040 TCGv_i64 val = tcg_temp_new_i64();
3042 do_cntp(s, val, a->esz, a->pg, a->pg);
3044 tcg_gen_sub_i64(reg, reg, val);
3046 tcg_gen_add_i64(reg, reg, val);
3048 tcg_temp_free_i64(val);
3053 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3059 if (sve_access_check(s)) {
3060 unsigned vsz = vec_full_reg_size(s);
3061 TCGv_i64 val = tcg_temp_new_i64();
3062 GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3064 do_cntp(s, val, a->esz, a->pg, a->pg);
3065 gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3066 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3071 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3074 if (sve_access_check(s)) {
3075 TCGv_i64 reg = cpu_reg(s, a->rd);
3076 TCGv_i64 val = tcg_temp_new_i64();
3078 do_cntp(s, val, a->esz, a->pg, a->pg);
3079 do_sat_addsub_32(reg, val, a->u, a->d);
3084 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3087 if (sve_access_check(s)) {
3088 TCGv_i64 reg = cpu_reg(s, a->rd);
3089 TCGv_i64 val = tcg_temp_new_i64();
3091 do_cntp(s, val, a->esz, a->pg, a->pg);
3092 do_sat_addsub_64(reg, val, a->u, a->d);
3097 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3103 if (sve_access_check(s)) {
3104 TCGv_i64 val = tcg_temp_new_i64();
3105 do_cntp(s, val, a->esz, a->pg, a->pg);
3106 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3112 *** SVE Integer Compare Scalars Group
3115 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3117 if (!sve_access_check(s)) {
3121 TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3122 TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3123 TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3124 TCGv_i64 cmp = tcg_temp_new_i64();
3126 tcg_gen_setcond_i64(cond, cmp, rn, rm);
3127 tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3128 tcg_temp_free_i64(cmp);
3130 /* VF = !NF & !CF. */
3131 tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3132 tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3134 /* Both NF and VF actually look at bit 31. */
3135 tcg_gen_neg_i32(cpu_NF, cpu_NF);
3136 tcg_gen_neg_i32(cpu_VF, cpu_VF);
3140 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3142 if (!sve_access_check(s)) {
3146 TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3147 TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3148 TCGv_i64 t0 = tcg_temp_new_i64();
3149 TCGv_i64 t1 = tcg_temp_new_i64();
3152 unsigned desc, vsz = vec_full_reg_size(s);
3157 tcg_gen_ext32u_i64(op0, op0);
3158 tcg_gen_ext32u_i64(op1, op1);
3160 tcg_gen_ext32s_i64(op0, op0);
3161 tcg_gen_ext32s_i64(op1, op1);
3165 /* For the helper, compress the different conditions into a computation
3166 * of how many iterations for which the condition is true.
3168 * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3169 * 2**64 iterations, overflowing to 0. Of course, predicate registers
3170 * aren't that large, so any value >= predicate size is sufficient.
3172 tcg_gen_sub_i64(t0, op1, op0);
3174 /* t0 = MIN(op1 - op0, vsz). */
3175 tcg_gen_movi_i64(t1, vsz);
3176 tcg_gen_umin_i64(t0, t0, t1);
3178 /* Equality means one more iteration. */
3179 tcg_gen_addi_i64(t0, t0, 1);
3182 /* t0 = (condition true ? t0 : 0). */
3184 ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3185 : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3186 tcg_gen_movi_i64(t1, 0);
3187 tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3189 t2 = tcg_temp_new_i32();
3190 tcg_gen_extrl_i64_i32(t2, t0);
3191 tcg_temp_free_i64(t0);
3192 tcg_temp_free_i64(t1);
3194 desc = (vsz / 8) - 2;
3195 desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3196 t3 = tcg_const_i32(desc);
3198 ptr = tcg_temp_new_ptr();
3199 tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3201 gen_helper_sve_while(t2, ptr, t2, t3);
3204 tcg_temp_free_ptr(ptr);
3205 tcg_temp_free_i32(t2);
3206 tcg_temp_free_i32(t3);
3211 *** SVE Integer Wide Immediate - Unpredicated Group
3214 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3219 if (sve_access_check(s)) {
3220 unsigned vsz = vec_full_reg_size(s);
3221 int dofs = vec_full_reg_offset(s, a->rd);
3224 /* Decode the VFP immediate. */
3225 imm = vfp_expand_imm(a->esz, a->imm);
3226 imm = dup_const(a->esz, imm);
3228 tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3233 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3235 if (a->esz == 0 && extract32(insn, 13, 1)) {
3238 if (sve_access_check(s)) {
3239 unsigned vsz = vec_full_reg_size(s);
3240 int dofs = vec_full_reg_offset(s, a->rd);
3242 tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3247 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3249 if (a->esz == 0 && extract32(insn, 13, 1)) {
3252 if (sve_access_check(s)) {
3253 unsigned vsz = vec_full_reg_size(s);
3254 tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3255 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3260 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3263 return trans_ADD_zzi(s, a, insn);
3266 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3268 static const GVecGen2s op[4] = {
3269 { .fni8 = tcg_gen_vec_sub8_i64,
3270 .fniv = tcg_gen_sub_vec,
3271 .fno = gen_helper_sve_subri_b,
3272 .opc = INDEX_op_sub_vec,
3274 .scalar_first = true },
3275 { .fni8 = tcg_gen_vec_sub16_i64,
3276 .fniv = tcg_gen_sub_vec,
3277 .fno = gen_helper_sve_subri_h,
3278 .opc = INDEX_op_sub_vec,
3280 .scalar_first = true },
3281 { .fni4 = tcg_gen_sub_i32,
3282 .fniv = tcg_gen_sub_vec,
3283 .fno = gen_helper_sve_subri_s,
3284 .opc = INDEX_op_sub_vec,
3286 .scalar_first = true },
3287 { .fni8 = tcg_gen_sub_i64,
3288 .fniv = tcg_gen_sub_vec,
3289 .fno = gen_helper_sve_subri_d,
3290 .opc = INDEX_op_sub_vec,
3291 .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3293 .scalar_first = true }
3296 if (a->esz == 0 && extract32(insn, 13, 1)) {
3299 if (sve_access_check(s)) {
3300 unsigned vsz = vec_full_reg_size(s);
3301 TCGv_i64 c = tcg_const_i64(a->imm);
3302 tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3303 vec_full_reg_offset(s, a->rn),
3304 vsz, vsz, c, &op[a->esz]);
3305 tcg_temp_free_i64(c);
3310 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3312 if (sve_access_check(s)) {
3313 unsigned vsz = vec_full_reg_size(s);
3314 tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3315 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3320 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3323 if (a->esz == 0 && extract32(insn, 13, 1)) {
3326 if (sve_access_check(s)) {
3327 TCGv_i64 val = tcg_const_i64(a->imm);
3328 do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3329 tcg_temp_free_i64(val);
3334 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3336 return do_zzi_sat(s, a, insn, false, false);
3339 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3341 return do_zzi_sat(s, a, insn, true, false);
3344 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3346 return do_zzi_sat(s, a, insn, false, true);
3349 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3351 return do_zzi_sat(s, a, insn, true, true);
3354 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3356 if (sve_access_check(s)) {
3357 unsigned vsz = vec_full_reg_size(s);
3358 TCGv_i64 c = tcg_const_i64(a->imm);
3360 tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3361 vec_full_reg_offset(s, a->rn),
3362 c, vsz, vsz, 0, fn);
3363 tcg_temp_free_i64(c);
3368 #define DO_ZZI(NAME, name) \
3369 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a, \
3372 static gen_helper_gvec_2i * const fns[4] = { \
3373 gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3374 gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3376 return do_zzi_ool(s, a, fns[a->esz]); \
3387 *** SVE Floating Point Arithmetic - Unpredicated Group
3390 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3391 gen_helper_gvec_3_ptr *fn)
3396 if (sve_access_check(s)) {
3397 unsigned vsz = vec_full_reg_size(s);
3398 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3399 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3400 vec_full_reg_offset(s, a->rn),
3401 vec_full_reg_offset(s, a->rm),
3402 status, vsz, vsz, 0, fn);
3403 tcg_temp_free_ptr(status);
3409 #define DO_FP3(NAME, name) \
3410 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3412 static gen_helper_gvec_3_ptr * const fns[4] = { \
3413 NULL, gen_helper_gvec_##name##_h, \
3414 gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3416 return do_zzz_fp(s, a, fns[a->esz]); \
3419 DO_FP3(FADD_zzz, fadd)
3420 DO_FP3(FSUB_zzz, fsub)
3421 DO_FP3(FMUL_zzz, fmul)
3422 DO_FP3(FTSMUL, ftsmul)
3423 DO_FP3(FRECPS, recps)
3424 DO_FP3(FRSQRTS, rsqrts)
3429 *** SVE Floating Point Arithmetic - Predicated Group
3432 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3433 gen_helper_gvec_4_ptr *fn)
3438 if (sve_access_check(s)) {
3439 unsigned vsz = vec_full_reg_size(s);
3440 TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3441 tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3442 vec_full_reg_offset(s, a->rn),
3443 vec_full_reg_offset(s, a->rm),
3444 pred_full_reg_offset(s, a->pg),
3445 status, vsz, vsz, 0, fn);
3446 tcg_temp_free_ptr(status);
3451 #define DO_FP3(NAME, name) \
3452 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3454 static gen_helper_gvec_4_ptr * const fns[4] = { \
3455 NULL, gen_helper_sve_##name##_h, \
3456 gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3458 return do_zpzz_fp(s, a, fns[a->esz]); \
3461 DO_FP3(FADD_zpzz, fadd)
3462 DO_FP3(FSUB_zpzz, fsub)
3463 DO_FP3(FMUL_zpzz, fmul)
3464 DO_FP3(FMIN_zpzz, fmin)
3465 DO_FP3(FMAX_zpzz, fmax)
3466 DO_FP3(FMINNM_zpzz, fminnum)
3467 DO_FP3(FMAXNM_zpzz, fmaxnum)
3469 DO_FP3(FSCALE, fscalbn)
3471 DO_FP3(FMULX, fmulx)
3476 *** SVE Floating Point Unary Operations Predicated Group
3479 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3480 bool is_fp16, gen_helper_gvec_3_ptr *fn)
3482 if (sve_access_check(s)) {
3483 unsigned vsz = vec_full_reg_size(s);
3484 TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3485 tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3486 vec_full_reg_offset(s, rn),
3487 pred_full_reg_offset(s, pg),
3488 status, vsz, vsz, 0, fn);
3489 tcg_temp_free_ptr(status);
3494 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3496 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3499 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3501 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3504 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3506 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3509 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3511 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3514 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3516 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3519 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3521 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3524 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3526 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3529 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3531 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3534 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3536 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3539 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3541 return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3544 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3546 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3549 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3551 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
3554 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3556 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
3559 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3561 return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
3565 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3568 /* Subroutine loading a vector register at VOFS of LEN bytes.
3569 * The load should begin at the address Rn + IMM.
3572 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3575 uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3576 uint32_t len_remain = len % 8;
3577 uint32_t nparts = len / 8 + ctpop8(len_remain);
3578 int midx = get_mem_index(s);
3579 TCGv_i64 addr, t0, t1;
3581 addr = tcg_temp_new_i64();
3582 t0 = tcg_temp_new_i64();
3584 /* Note that unpredicated load/store of vector/predicate registers
3585 * are defined as a stream of bytes, which equates to little-endian
3586 * operations on larger quantities. There is no nice way to force
3587 * a little-endian load for aarch64_be-linux-user out of line.
3589 * Attempt to keep code expansion to a minimum by limiting the
3590 * amount of unrolling done.
3595 for (i = 0; i < len_align; i += 8) {
3596 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3597 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3598 tcg_gen_st_i64(t0, cpu_env, vofs + i);
3601 TCGLabel *loop = gen_new_label();
3602 TCGv_ptr tp, i = tcg_const_local_ptr(0);
3604 gen_set_label(loop);
3606 /* Minimize the number of local temps that must be re-read from
3607 * the stack each iteration. Instead, re-compute values other
3608 * than the loop counter.
3610 tp = tcg_temp_new_ptr();
3611 tcg_gen_addi_ptr(tp, i, imm);
3612 tcg_gen_extu_ptr_i64(addr, tp);
3613 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3615 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3617 tcg_gen_add_ptr(tp, cpu_env, i);
3618 tcg_gen_addi_ptr(i, i, 8);
3619 tcg_gen_st_i64(t0, tp, vofs);
3620 tcg_temp_free_ptr(tp);
3622 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3623 tcg_temp_free_ptr(i);
3626 /* Predicate register loads can be any multiple of 2.
3627 * Note that we still store the entire 64-bit unit into cpu_env.
3630 tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3632 switch (len_remain) {
3636 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3640 t1 = tcg_temp_new_i64();
3641 tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3642 tcg_gen_addi_i64(addr, addr, 4);
3643 tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3644 tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3645 tcg_temp_free_i64(t1);
3649 g_assert_not_reached();
3651 tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3653 tcg_temp_free_i64(addr);
3654 tcg_temp_free_i64(t0);
3657 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3659 if (sve_access_check(s)) {
3660 int size = vec_full_reg_size(s);
3661 int off = vec_full_reg_offset(s, a->rd);
3662 do_ldr(s, off, size, a->rn, a->imm * size);
3667 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3669 if (sve_access_check(s)) {
3670 int size = pred_full_reg_size(s);
3671 int off = pred_full_reg_offset(s, a->rd);
3672 do_ldr(s, off, size, a->rn, a->imm * size);
3678 *** SVE Memory - Contiguous Load Group
3681 /* The memory mode of the dtype. */
3682 static const TCGMemOp dtype_mop[16] = {
3683 MO_UB, MO_UB, MO_UB, MO_UB,
3684 MO_SL, MO_UW, MO_UW, MO_UW,
3685 MO_SW, MO_SW, MO_UL, MO_UL,
3686 MO_SB, MO_SB, MO_SB, MO_Q
3689 #define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
3691 /* The vector element size of dtype. */
3692 static const uint8_t dtype_esz[16] = {
3699 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3700 gen_helper_gvec_mem *fn)
3702 unsigned vsz = vec_full_reg_size(s);
3706 /* For e.g. LD4, there are not enough arguments to pass all 4
3707 * registers as pointers, so encode the regno into the data field.
3708 * For consistency, do this even for LD1.
3710 desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
3711 t_pg = tcg_temp_new_ptr();
3713 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3714 fn(cpu_env, t_pg, addr, desc);
3716 tcg_temp_free_ptr(t_pg);
3717 tcg_temp_free_i32(desc);
3720 static void do_ld_zpa(DisasContext *s, int zt, int pg,
3721 TCGv_i64 addr, int dtype, int nreg)
3723 static gen_helper_gvec_mem * const fns[16][4] = {
3724 { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
3725 gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
3726 { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
3727 { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
3728 { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
3730 { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
3731 { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
3732 gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
3733 { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
3734 { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
3736 { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
3737 { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
3738 { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
3739 gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
3740 { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
3742 { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
3743 { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
3744 { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
3745 { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
3746 gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
3748 gen_helper_gvec_mem *fn = fns[dtype][nreg];
3750 /* While there are holes in the table, they are not
3751 * accessible via the instruction encoding.
3754 do_mem_zpa(s, zt, pg, addr, fn);
3757 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3762 if (sve_access_check(s)) {
3763 TCGv_i64 addr = new_tmp_a64(s);
3764 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
3765 (a->nreg + 1) << dtype_msz(a->dtype));
3766 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3767 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3772 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3774 if (sve_access_check(s)) {
3775 int vsz = vec_full_reg_size(s);
3776 int elements = vsz >> dtype_esz[a->dtype];
3777 TCGv_i64 addr = new_tmp_a64(s);
3779 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3780 (a->imm * elements * (a->nreg + 1))
3781 << dtype_msz(a->dtype));
3782 do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3787 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3789 static gen_helper_gvec_mem * const fns[16] = {
3790 gen_helper_sve_ldff1bb_r,
3791 gen_helper_sve_ldff1bhu_r,
3792 gen_helper_sve_ldff1bsu_r,
3793 gen_helper_sve_ldff1bdu_r,
3795 gen_helper_sve_ldff1sds_r,
3796 gen_helper_sve_ldff1hh_r,
3797 gen_helper_sve_ldff1hsu_r,
3798 gen_helper_sve_ldff1hdu_r,
3800 gen_helper_sve_ldff1hds_r,
3801 gen_helper_sve_ldff1hss_r,
3802 gen_helper_sve_ldff1ss_r,
3803 gen_helper_sve_ldff1sdu_r,
3805 gen_helper_sve_ldff1bds_r,
3806 gen_helper_sve_ldff1bss_r,
3807 gen_helper_sve_ldff1bhs_r,
3808 gen_helper_sve_ldff1dd_r,
3811 if (sve_access_check(s)) {
3812 TCGv_i64 addr = new_tmp_a64(s);
3813 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
3814 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3815 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
3820 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3822 static gen_helper_gvec_mem * const fns[16] = {
3823 gen_helper_sve_ldnf1bb_r,
3824 gen_helper_sve_ldnf1bhu_r,
3825 gen_helper_sve_ldnf1bsu_r,
3826 gen_helper_sve_ldnf1bdu_r,
3828 gen_helper_sve_ldnf1sds_r,
3829 gen_helper_sve_ldnf1hh_r,
3830 gen_helper_sve_ldnf1hsu_r,
3831 gen_helper_sve_ldnf1hdu_r,
3833 gen_helper_sve_ldnf1hds_r,
3834 gen_helper_sve_ldnf1hss_r,
3835 gen_helper_sve_ldnf1ss_r,
3836 gen_helper_sve_ldnf1sdu_r,
3838 gen_helper_sve_ldnf1bds_r,
3839 gen_helper_sve_ldnf1bss_r,
3840 gen_helper_sve_ldnf1bhs_r,
3841 gen_helper_sve_ldnf1dd_r,
3844 if (sve_access_check(s)) {
3845 int vsz = vec_full_reg_size(s);
3846 int elements = vsz >> dtype_esz[a->dtype];
3847 int off = (a->imm * elements) << dtype_msz(a->dtype);
3848 TCGv_i64 addr = new_tmp_a64(s);
3850 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
3851 do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
3856 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
3858 static gen_helper_gvec_mem * const fns[4] = {
3859 gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
3860 gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
3862 unsigned vsz = vec_full_reg_size(s);
3866 /* Load the first quadword using the normal predicated load helpers. */
3867 desc = tcg_const_i32(simd_desc(16, 16, zt));
3868 t_pg = tcg_temp_new_ptr();
3870 tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3871 fns[msz](cpu_env, t_pg, addr, desc);
3873 tcg_temp_free_ptr(t_pg);
3874 tcg_temp_free_i32(desc);
3876 /* Replicate that first quadword. */
3878 unsigned dofs = vec_full_reg_offset(s, zt);
3879 tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
3883 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3888 if (sve_access_check(s)) {
3889 int msz = dtype_msz(a->dtype);
3890 TCGv_i64 addr = new_tmp_a64(s);
3891 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
3892 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3893 do_ldrq(s, a->rd, a->pg, addr, msz);
3898 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3900 if (sve_access_check(s)) {
3901 TCGv_i64 addr = new_tmp_a64(s);
3902 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
3903 do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
3908 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3909 int msz, int esz, int nreg)
3911 static gen_helper_gvec_mem * const fn_single[4][4] = {
3912 { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
3913 gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
3914 { NULL, gen_helper_sve_st1hh_r,
3915 gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
3917 gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
3918 { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
3920 static gen_helper_gvec_mem * const fn_multiple[3][4] = {
3921 { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
3922 gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
3923 { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
3924 gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
3925 { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
3926 gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
3928 gen_helper_gvec_mem *fn;
3932 fn = fn_single[msz][esz];
3934 /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
3936 fn = fn_multiple[nreg - 1][msz];
3939 do_mem_zpa(s, zt, pg, addr, fn);
3942 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
3944 if (a->rm == 31 || a->msz > a->esz) {
3947 if (sve_access_check(s)) {
3948 TCGv_i64 addr = new_tmp_a64(s);
3949 tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
3950 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3951 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
3956 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
3958 if (a->msz > a->esz) {
3961 if (sve_access_check(s)) {
3962 int vsz = vec_full_reg_size(s);
3963 int elements = vsz >> a->esz;
3964 TCGv_i64 addr = new_tmp_a64(s);
3966 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3967 (a->imm * elements * (a->nreg + 1)) << a->msz);
3968 do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);