target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35 #include "fpu/softfloat.h"
  36
  37
  38 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  39                          TCGv_i64, uint32_t, uint32_t);
  40
  41 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  42                                      TCGv_ptr, TCGv_i32);
  43 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  44                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  45
  46 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  47 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  48                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  49
  50 /*
  51  * Helpers for extracting complex instruction fields.
  52  */
  53
  54 /* See e.g. ASR (immediate, predicated).
  55  * Returns -1 for unallocated encoding; diagnose later.
  56  */
  57 static int tszimm_esz(int x)
  58 {
  59     x >>= 3;  /* discard imm3 */
  60     return 31 - clz32(x);
  61 }
  62
  63 static int tszimm_shr(int x)
  64 {
  65     return (16 << tszimm_esz(x)) - x;
  66 }
  67
  68 /* See e.g. LSL (immediate, predicated).  */
  69 static int tszimm_shl(int x)
  70 {
  71     return x - (8 << tszimm_esz(x));
  72 }
  73
  74 static inline int plus1(int x)
  75 {
  76     return x + 1;
  77 }
  78
  79 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  80 static inline int expand_imm_sh8s(int x)
  81 {
  82     return (int8_t)x << (x & 0x100 ? 8 : 0);
  83 }
  84
  85 static inline int expand_imm_sh8u(int x)
  86 {
  87     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  88 }
  89
  90 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  91  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  92  */
  93 static inline int msz_dtype(int msz)
  94 {
  95     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  96     return dtype[msz];
  97 }
  98
  99 /*
 100  * Include the generated decoder.
 101  */
 102
 103 #include "decode-sve.inc.c"
 104
 105 /*
 106  * Implement all of the translator functions referenced by the decoder.
 107  */
 108
 109 /* Return the offset info CPUARMState of the predicate vector register Pn.
 110  * Note for this purpose, FFR is P16.
 111  */
 112 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 113 {
 114     return offsetof(CPUARMState, vfp.pregs[regno]);
 115 }
 116
 117 /* Return the byte size of the whole predicate register, VL / 64.  */
 118 static inline int pred_full_reg_size(DisasContext *s)
 119 {
 120     return s->sve_len >> 3;
 121 }
 122
 123 /* Round up the size of a register to a size allowed by
 124  * the tcg vector infrastructure.  Any operation which uses this
 125  * size may assume that the bits above pred_full_reg_size are zero,
 126  * and must leave them the same way.
 127  *
 128  * Note that this is not needed for the vector registers as they
 129  * are always properly sized for tcg vectors.
 130  */
 131 static int size_for_gvec(int size)
 132 {
 133     if (size <= 8) {
 134         return 8;
 135     } else {
 136         return QEMU_ALIGN_UP(size, 16);
 137     }
 138 }
 139
 140 static int pred_gvec_reg_size(DisasContext *s)
 141 {
 142     return size_for_gvec(pred_full_reg_size(s));
 143 }
 144
 145 /* Invoke a vector expander on two Zregs.  */
 146 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 147                          int esz, int rd, int rn)
 148 {
 149     if (sve_access_check(s)) {
 150         unsigned vsz = vec_full_reg_size(s);
 151         gvec_fn(esz, vec_full_reg_offset(s, rd),
 152                 vec_full_reg_offset(s, rn), vsz, vsz);
 153     }
 154     return true;
 155 }
 156
 157 /* Invoke a vector expander on three Zregs.  */
 158 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 159                          int esz, int rd, int rn, int rm)
 160 {
 161     if (sve_access_check(s)) {
 162         unsigned vsz = vec_full_reg_size(s);
 163         gvec_fn(esz, vec_full_reg_offset(s, rd),
 164                 vec_full_reg_offset(s, rn),
 165                 vec_full_reg_offset(s, rm), vsz, vsz);
 166     }
 167     return true;
 168 }
 169
 170 /* Invoke a vector move on two Zregs.  */
 171 static bool do_mov_z(DisasContext *s, int rd, int rn)
 172 {
 173     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 174 }
 175
 176 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 177 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 178 {
 179     unsigned vsz = vec_full_reg_size(s);
 180     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 181 }
 182
 183 /* Invoke a vector expander on two Pregs.  */
 184 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 185                          int esz, int rd, int rn)
 186 {
 187     if (sve_access_check(s)) {
 188         unsigned psz = pred_gvec_reg_size(s);
 189         gvec_fn(esz, pred_full_reg_offset(s, rd),
 190                 pred_full_reg_offset(s, rn), psz, psz);
 191     }
 192     return true;
 193 }
 194
 195 /* Invoke a vector expander on three Pregs.  */
 196 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 197                          int esz, int rd, int rn, int rm)
 198 {
 199     if (sve_access_check(s)) {
 200         unsigned psz = pred_gvec_reg_size(s);
 201         gvec_fn(esz, pred_full_reg_offset(s, rd),
 202                 pred_full_reg_offset(s, rn),
 203                 pred_full_reg_offset(s, rm), psz, psz);
 204     }
 205     return true;
 206 }
 207
 208 /* Invoke a vector operation on four Pregs.  */
 209 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 210                         int rd, int rn, int rm, int rg)
 211 {
 212     if (sve_access_check(s)) {
 213         unsigned psz = pred_gvec_reg_size(s);
 214         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 215                        pred_full_reg_offset(s, rn),
 216                        pred_full_reg_offset(s, rm),
 217                        pred_full_reg_offset(s, rg),
 218                        psz, psz, gvec_op);
 219     }
 220     return true;
 221 }
 222
 223 /* Invoke a vector move on two Pregs.  */
 224 static bool do_mov_p(DisasContext *s, int rd, int rn)
 225 {
 226     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 227 }
 228
 229 /* Set the cpu flags as per a return from an SVE helper.  */
 230 static void do_pred_flags(TCGv_i32 t)
 231 {
 232     tcg_gen_mov_i32(cpu_NF, t);
 233     tcg_gen_andi_i32(cpu_ZF, t, 2);
 234     tcg_gen_andi_i32(cpu_CF, t, 1);
 235     tcg_gen_movi_i32(cpu_VF, 0);
 236 }
 237
 238 /* Subroutines computing the ARM PredTest psuedofunction.  */
 239 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 240 {
 241     TCGv_i32 t = tcg_temp_new_i32();
 242
 243     gen_helper_sve_predtest1(t, d, g);
 244     do_pred_flags(t);
 245     tcg_temp_free_i32(t);
 246 }
 247
 248 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 249 {
 250     TCGv_ptr dptr = tcg_temp_new_ptr();
 251     TCGv_ptr gptr = tcg_temp_new_ptr();
 252     TCGv_i32 t;
 253
 254     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 255     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 256     t = tcg_const_i32(words);
 257
 258     gen_helper_sve_predtest(t, dptr, gptr, t);
 259     tcg_temp_free_ptr(dptr);
 260     tcg_temp_free_ptr(gptr);
 261
 262     do_pred_flags(t);
 263     tcg_temp_free_i32(t);
 264 }
 265
 266 /* For each element size, the bits within a predicate word that are active.  */
 267 const uint64_t pred_esz_masks[4] = {
 268     0xffffffffffffffffull, 0x5555555555555555ull,
 269     0x1111111111111111ull, 0x0101010101010101ull
 270 };
 271
 272 /*
 273  *** SVE Logical - Unpredicated Group
 274  */
 275
 276 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 277 {
 278     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 279 }
 280
 281 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 282 {
 283     if (a->rn == a->rm) { /* MOV */
 284         return do_mov_z(s, a->rd, a->rn);
 285     } else {
 286         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 287     }
 288 }
 289
 290 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 291 {
 292     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 293 }
 294
 295 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 296 {
 297     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 298 }
 299
 300 /*
 301  *** SVE Integer Arithmetic - Unpredicated Group
 302  */
 303
 304 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 305 {
 306     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 307 }
 308
 309 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 310 {
 311     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 312 }
 313
 314 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 315 {
 316     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 317 }
 318
 319 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 320 {
 321     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 322 }
 323
 324 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 325 {
 326     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 327 }
 328
 329 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 330 {
 331     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 332 }
 333
 334 /*
 335  *** SVE Integer Arithmetic - Binary Predicated Group
 336  */
 337
 338 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 339 {
 340     unsigned vsz = vec_full_reg_size(s);
 341     if (fn == NULL) {
 342         return false;
 343     }
 344     if (sve_access_check(s)) {
 345         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 346                            vec_full_reg_offset(s, a->rn),
 347                            vec_full_reg_offset(s, a->rm),
 348                            pred_full_reg_offset(s, a->pg),
 349                            vsz, vsz, 0, fn);
 350     }
 351     return true;
 352 }
 353
 354 /* Select active elememnts from Zn and inactive elements from Zm,
 355  * storing the result in Zd.
 356  */
 357 static void do_sel_z(DisasContext *s, int rd, int rn, int rm, int pg, int esz)
 358 {
 359     static gen_helper_gvec_4 * const fns[4] = {
 360         gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
 361         gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
 362     };
 363     unsigned vsz = vec_full_reg_size(s);
 364     tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
 365                        vec_full_reg_offset(s, rn),
 366                        vec_full_reg_offset(s, rm),
 367                        pred_full_reg_offset(s, pg),
 368                        vsz, vsz, 0, fns[esz]);
 369 }
 370
 371 #define DO_ZPZZ(NAME, name) \
 372 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 373                                 uint32_t insn)                            \
 374 {                                                                         \
 375     static gen_helper_gvec_4 * const fns[4] = {                           \
 376         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 377         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 378     };                                                                    \
 379     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 380 }
 381
 382 DO_ZPZZ(AND, and)
 383 DO_ZPZZ(EOR, eor)
 384 DO_ZPZZ(ORR, orr)
 385 DO_ZPZZ(BIC, bic)
 386
 387 DO_ZPZZ(ADD, add)
 388 DO_ZPZZ(SUB, sub)
 389
 390 DO_ZPZZ(SMAX, smax)
 391 DO_ZPZZ(UMAX, umax)
 392 DO_ZPZZ(SMIN, smin)
 393 DO_ZPZZ(UMIN, umin)
 394 DO_ZPZZ(SABD, sabd)
 395 DO_ZPZZ(UABD, uabd)
 396
 397 DO_ZPZZ(MUL, mul)
 398 DO_ZPZZ(SMULH, smulh)
 399 DO_ZPZZ(UMULH, umulh)
 400
 401 DO_ZPZZ(ASR, asr)
 402 DO_ZPZZ(LSR, lsr)
 403 DO_ZPZZ(LSL, lsl)
 404
 405 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 406 {
 407     static gen_helper_gvec_4 * const fns[4] = {
 408         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 409     };
 410     return do_zpzz_ool(s, a, fns[a->esz]);
 411 }
 412
 413 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 414 {
 415     static gen_helper_gvec_4 * const fns[4] = {
 416         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 417     };
 418     return do_zpzz_ool(s, a, fns[a->esz]);
 419 }
 420
 421 static bool trans_SEL_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 422 {
 423     if (sve_access_check(s)) {
 424         do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
 425     }
 426     return true;
 427 }
 428
 429 #undef DO_ZPZZ
 430
 431 /*
 432  *** SVE Integer Arithmetic - Unary Predicated Group
 433  */
 434
 435 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 436 {
 437     if (fn == NULL) {
 438         return false;
 439     }
 440     if (sve_access_check(s)) {
 441         unsigned vsz = vec_full_reg_size(s);
 442         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 443                            vec_full_reg_offset(s, a->rn),
 444                            pred_full_reg_offset(s, a->pg),
 445                            vsz, vsz, 0, fn);
 446     }
 447     return true;
 448 }
 449
 450 #define DO_ZPZ(NAME, name) \
 451 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 452 {                                                                   \
 453     static gen_helper_gvec_3 * const fns[4] = {                     \
 454         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 455         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 456     };                                                              \
 457     return do_zpz_ool(s, a, fns[a->esz]);                           \
 458 }
 459
 460 DO_ZPZ(CLS, cls)
 461 DO_ZPZ(CLZ, clz)
 462 DO_ZPZ(CNT_zpz, cnt_zpz)
 463 DO_ZPZ(CNOT, cnot)
 464 DO_ZPZ(NOT_zpz, not_zpz)
 465 DO_ZPZ(ABS, abs)
 466 DO_ZPZ(NEG, neg)
 467
 468 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 469 {
 470     static gen_helper_gvec_3 * const fns[4] = {
 471         NULL,
 472         gen_helper_sve_fabs_h,
 473         gen_helper_sve_fabs_s,
 474         gen_helper_sve_fabs_d
 475     };
 476     return do_zpz_ool(s, a, fns[a->esz]);
 477 }
 478
 479 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 480 {
 481     static gen_helper_gvec_3 * const fns[4] = {
 482         NULL,
 483         gen_helper_sve_fneg_h,
 484         gen_helper_sve_fneg_s,
 485         gen_helper_sve_fneg_d
 486     };
 487     return do_zpz_ool(s, a, fns[a->esz]);
 488 }
 489
 490 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 491 {
 492     static gen_helper_gvec_3 * const fns[4] = {
 493         NULL,
 494         gen_helper_sve_sxtb_h,
 495         gen_helper_sve_sxtb_s,
 496         gen_helper_sve_sxtb_d
 497     };
 498     return do_zpz_ool(s, a, fns[a->esz]);
 499 }
 500
 501 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 502 {
 503     static gen_helper_gvec_3 * const fns[4] = {
 504         NULL,
 505         gen_helper_sve_uxtb_h,
 506         gen_helper_sve_uxtb_s,
 507         gen_helper_sve_uxtb_d
 508     };
 509     return do_zpz_ool(s, a, fns[a->esz]);
 510 }
 511
 512 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 513 {
 514     static gen_helper_gvec_3 * const fns[4] = {
 515         NULL, NULL,
 516         gen_helper_sve_sxth_s,
 517         gen_helper_sve_sxth_d
 518     };
 519     return do_zpz_ool(s, a, fns[a->esz]);
 520 }
 521
 522 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 523 {
 524     static gen_helper_gvec_3 * const fns[4] = {
 525         NULL, NULL,
 526         gen_helper_sve_uxth_s,
 527         gen_helper_sve_uxth_d
 528     };
 529     return do_zpz_ool(s, a, fns[a->esz]);
 530 }
 531
 532 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 533 {
 534     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 535 }
 536
 537 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 538 {
 539     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 540 }
 541
 542 #undef DO_ZPZ
 543
 544 /*
 545  *** SVE Integer Reduction Group
 546  */
 547
 548 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 549 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 550                        gen_helper_gvec_reduc *fn)
 551 {
 552     unsigned vsz = vec_full_reg_size(s);
 553     TCGv_ptr t_zn, t_pg;
 554     TCGv_i32 desc;
 555     TCGv_i64 temp;
 556
 557     if (fn == NULL) {
 558         return false;
 559     }
 560     if (!sve_access_check(s)) {
 561         return true;
 562     }
 563
 564     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 565     temp = tcg_temp_new_i64();
 566     t_zn = tcg_temp_new_ptr();
 567     t_pg = tcg_temp_new_ptr();
 568
 569     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 570     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 571     fn(temp, t_zn, t_pg, desc);
 572     tcg_temp_free_ptr(t_zn);
 573     tcg_temp_free_ptr(t_pg);
 574     tcg_temp_free_i32(desc);
 575
 576     write_fp_dreg(s, a->rd, temp);
 577     tcg_temp_free_i64(temp);
 578     return true;
 579 }
 580
 581 #define DO_VPZ(NAME, name) \
 582 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 583 {                                                                        \
 584     static gen_helper_gvec_reduc * const fns[4] = {                      \
 585         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 586         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 587     };                                                                   \
 588     return do_vpz_ool(s, a, fns[a->esz]);                                \
 589 }
 590
 591 DO_VPZ(ORV, orv)
 592 DO_VPZ(ANDV, andv)
 593 DO_VPZ(EORV, eorv)
 594
 595 DO_VPZ(UADDV, uaddv)
 596 DO_VPZ(SMAXV, smaxv)
 597 DO_VPZ(UMAXV, umaxv)
 598 DO_VPZ(SMINV, sminv)
 599 DO_VPZ(UMINV, uminv)
 600
 601 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 602 {
 603     static gen_helper_gvec_reduc * const fns[4] = {
 604         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 605         gen_helper_sve_saddv_s, NULL
 606     };
 607     return do_vpz_ool(s, a, fns[a->esz]);
 608 }
 609
 610 #undef DO_VPZ
 611
 612 /*
 613  *** SVE Shift by Immediate - Predicated Group
 614  */
 615
 616 /* Store zero into every active element of Zd.  We will use this for two
 617  * and three-operand predicated instructions for which logic dictates a
 618  * zero result.
 619  */
 620 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 621 {
 622     static gen_helper_gvec_2 * const fns[4] = {
 623         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 624         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 625     };
 626     if (sve_access_check(s)) {
 627         unsigned vsz = vec_full_reg_size(s);
 628         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 629                            pred_full_reg_offset(s, pg),
 630                            vsz, vsz, 0, fns[esz]);
 631     }
 632     return true;
 633 }
 634
 635 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 636 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 637 {
 638     static gen_helper_gvec_3 * const fns[4] = {
 639         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 640         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 641     };
 642     unsigned vsz = vec_full_reg_size(s);
 643     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 644                        vec_full_reg_offset(s, rn),
 645                        pred_full_reg_offset(s, pg),
 646                        vsz, vsz, 0, fns[esz]);
 647 }
 648
 649 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 650                         gen_helper_gvec_3 *fn)
 651 {
 652     if (sve_access_check(s)) {
 653         unsigned vsz = vec_full_reg_size(s);
 654         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 655                            vec_full_reg_offset(s, a->rn),
 656                            pred_full_reg_offset(s, a->pg),
 657                            vsz, vsz, a->imm, fn);
 658     }
 659     return true;
 660 }
 661
 662 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 663 {
 664     static gen_helper_gvec_3 * const fns[4] = {
 665         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 666         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 667     };
 668     if (a->esz < 0) {
 669         /* Invalid tsz encoding -- see tszimm_esz. */
 670         return false;
 671     }
 672     /* Shift by element size is architecturally valid.  For
 673        arithmetic right-shift, it's the same as by one less. */
 674     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 675     return do_zpzi_ool(s, a, fns[a->esz]);
 676 }
 677
 678 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 679 {
 680     static gen_helper_gvec_3 * const fns[4] = {
 681         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 682         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 683     };
 684     if (a->esz < 0) {
 685         return false;
 686     }
 687     /* Shift by element size is architecturally valid.
 688        For logical shifts, it is a zeroing operation.  */
 689     if (a->imm >= (8 << a->esz)) {
 690         return do_clr_zp(s, a->rd, a->pg, a->esz);
 691     } else {
 692         return do_zpzi_ool(s, a, fns[a->esz]);
 693     }
 694 }
 695
 696 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 697 {
 698     static gen_helper_gvec_3 * const fns[4] = {
 699         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 700         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 701     };
 702     if (a->esz < 0) {
 703         return false;
 704     }
 705     /* Shift by element size is architecturally valid.
 706        For logical shifts, it is a zeroing operation.  */
 707     if (a->imm >= (8 << a->esz)) {
 708         return do_clr_zp(s, a->rd, a->pg, a->esz);
 709     } else {
 710         return do_zpzi_ool(s, a, fns[a->esz]);
 711     }
 712 }
 713
 714 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 715 {
 716     static gen_helper_gvec_3 * const fns[4] = {
 717         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 718         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 719     };
 720     if (a->esz < 0) {
 721         return false;
 722     }
 723     /* Shift by element size is architecturally valid.  For arithmetic
 724        right shift for division, it is a zeroing operation.  */
 725     if (a->imm >= (8 << a->esz)) {
 726         return do_clr_zp(s, a->rd, a->pg, a->esz);
 727     } else {
 728         return do_zpzi_ool(s, a, fns[a->esz]);
 729     }
 730 }
 731
 732 /*
 733  *** SVE Bitwise Shift - Predicated Group
 734  */
 735
 736 #define DO_ZPZW(NAME, name) \
 737 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 738                                 uint32_t insn)                            \
 739 {                                                                         \
 740     static gen_helper_gvec_4 * const fns[3] = {                           \
 741         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 742         gen_helper_sve_##name##_zpzw_s,                                   \
 743     };                                                                    \
 744     if (a->esz < 0 || a->esz >= 3) {                                      \
 745         return false;                                                     \
 746     }                                                                     \
 747     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 748 }
 749
 750 DO_ZPZW(ASR, asr)
 751 DO_ZPZW(LSR, lsr)
 752 DO_ZPZW(LSL, lsl)
 753
 754 #undef DO_ZPZW
 755
 756 /*
 757  *** SVE Bitwise Shift - Unpredicated Group
 758  */
 759
 760 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 761                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 762                                          int64_t, uint32_t, uint32_t))
 763 {
 764     if (a->esz < 0) {
 765         /* Invalid tsz encoding -- see tszimm_esz. */
 766         return false;
 767     }
 768     if (sve_access_check(s)) {
 769         unsigned vsz = vec_full_reg_size(s);
 770         /* Shift by element size is architecturally valid.  For
 771            arithmetic right-shift, it's the same as by one less.
 772            Otherwise it is a zeroing operation.  */
 773         if (a->imm >= 8 << a->esz) {
 774             if (asr) {
 775                 a->imm = (8 << a->esz) - 1;
 776             } else {
 777                 do_dupi_z(s, a->rd, 0);
 778                 return true;
 779             }
 780         }
 781         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 782                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 783     }
 784     return true;
 785 }
 786
 787 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 788 {
 789     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 790 }
 791
 792 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 793 {
 794     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 795 }
 796
 797 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 798 {
 799     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 800 }
 801
 802 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 803 {
 804     if (fn == NULL) {
 805         return false;
 806     }
 807     if (sve_access_check(s)) {
 808         unsigned vsz = vec_full_reg_size(s);
 809         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 810                            vec_full_reg_offset(s, a->rn),
 811                            vec_full_reg_offset(s, a->rm),
 812                            vsz, vsz, 0, fn);
 813     }
 814     return true;
 815 }
 816
 817 #define DO_ZZW(NAME, name) \
 818 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 819                                uint32_t insn)                             \
 820 {                                                                         \
 821     static gen_helper_gvec_3 * const fns[4] = {                           \
 822         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 823         gen_helper_sve_##name##_zzw_s, NULL                               \
 824     };                                                                    \
 825     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 826 }
 827
 828 DO_ZZW(ASR, asr)
 829 DO_ZZW(LSR, lsr)
 830 DO_ZZW(LSL, lsl)
 831
 832 #undef DO_ZZW
 833
 834 /*
 835  *** SVE Integer Multiply-Add Group
 836  */
 837
 838 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 839                          gen_helper_gvec_5 *fn)
 840 {
 841     if (sve_access_check(s)) {
 842         unsigned vsz = vec_full_reg_size(s);
 843         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 844                            vec_full_reg_offset(s, a->ra),
 845                            vec_full_reg_offset(s, a->rn),
 846                            vec_full_reg_offset(s, a->rm),
 847                            pred_full_reg_offset(s, a->pg),
 848                            vsz, vsz, 0, fn);
 849     }
 850     return true;
 851 }
 852
 853 #define DO_ZPZZZ(NAME, name) \
 854 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 855 {                                                                    \
 856     static gen_helper_gvec_5 * const fns[4] = {                      \
 857         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 858         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 859     };                                                               \
 860     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 861 }
 862
 863 DO_ZPZZZ(MLA, mla)
 864 DO_ZPZZZ(MLS, mls)
 865
 866 #undef DO_ZPZZZ
 867
 868 /*
 869  *** SVE Index Generation Group
 870  */
 871
 872 static void do_index(DisasContext *s, int esz, int rd,
 873                      TCGv_i64 start, TCGv_i64 incr)
 874 {
 875     unsigned vsz = vec_full_reg_size(s);
 876     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 877     TCGv_ptr t_zd = tcg_temp_new_ptr();
 878
 879     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 880     if (esz == 3) {
 881         gen_helper_sve_index_d(t_zd, start, incr, desc);
 882     } else {
 883         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 884         static index_fn * const fns[3] = {
 885             gen_helper_sve_index_b,
 886             gen_helper_sve_index_h,
 887             gen_helper_sve_index_s,
 888         };
 889         TCGv_i32 s32 = tcg_temp_new_i32();
 890         TCGv_i32 i32 = tcg_temp_new_i32();
 891
 892         tcg_gen_extrl_i64_i32(s32, start);
 893         tcg_gen_extrl_i64_i32(i32, incr);
 894         fns[esz](t_zd, s32, i32, desc);
 895
 896         tcg_temp_free_i32(s32);
 897         tcg_temp_free_i32(i32);
 898     }
 899     tcg_temp_free_ptr(t_zd);
 900     tcg_temp_free_i32(desc);
 901 }
 902
 903 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 904 {
 905     if (sve_access_check(s)) {
 906         TCGv_i64 start = tcg_const_i64(a->imm1);
 907         TCGv_i64 incr = tcg_const_i64(a->imm2);
 908         do_index(s, a->esz, a->rd, start, incr);
 909         tcg_temp_free_i64(start);
 910         tcg_temp_free_i64(incr);
 911     }
 912     return true;
 913 }
 914
 915 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 916 {
 917     if (sve_access_check(s)) {
 918         TCGv_i64 start = tcg_const_i64(a->imm);
 919         TCGv_i64 incr = cpu_reg(s, a->rm);
 920         do_index(s, a->esz, a->rd, start, incr);
 921         tcg_temp_free_i64(start);
 922     }
 923     return true;
 924 }
 925
 926 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 927 {
 928     if (sve_access_check(s)) {
 929         TCGv_i64 start = cpu_reg(s, a->rn);
 930         TCGv_i64 incr = tcg_const_i64(a->imm);
 931         do_index(s, a->esz, a->rd, start, incr);
 932         tcg_temp_free_i64(incr);
 933     }
 934     return true;
 935 }
 936
 937 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 938 {
 939     if (sve_access_check(s)) {
 940         TCGv_i64 start = cpu_reg(s, a->rn);
 941         TCGv_i64 incr = cpu_reg(s, a->rm);
 942         do_index(s, a->esz, a->rd, start, incr);
 943     }
 944     return true;
 945 }
 946
 947 /*
 948  *** SVE Stack Allocation Group
 949  */
 950
 951 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 952 {
 953     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 954     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 955     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 956     return true;
 957 }
 958
 959 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 960 {
 961     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 962     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 963     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 964     return true;
 965 }
 966
 967 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 968 {
 969     TCGv_i64 reg = cpu_reg(s, a->rd);
 970     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 971     return true;
 972 }
 973
 974 /*
 975  *** SVE Compute Vector Address Group
 976  */
 977
 978 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 979 {
 980     if (sve_access_check(s)) {
 981         unsigned vsz = vec_full_reg_size(s);
 982         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 983                            vec_full_reg_offset(s, a->rn),
 984                            vec_full_reg_offset(s, a->rm),
 985                            vsz, vsz, a->imm, fn);
 986     }
 987     return true;
 988 }
 989
 990 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 991 {
 992     return do_adr(s, a, gen_helper_sve_adr_p32);
 993 }
 994
 995 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 996 {
 997     return do_adr(s, a, gen_helper_sve_adr_p64);
 998 }
 999
1000 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
1001 {
1002     return do_adr(s, a, gen_helper_sve_adr_s32);
1003 }
1004
1005 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
1006 {
1007     return do_adr(s, a, gen_helper_sve_adr_u32);
1008 }
1009
1010 /*
1011  *** SVE Integer Misc - Unpredicated Group
1012  */
1013
1014 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1015 {
1016     static gen_helper_gvec_2 * const fns[4] = {
1017         NULL,
1018         gen_helper_sve_fexpa_h,
1019         gen_helper_sve_fexpa_s,
1020         gen_helper_sve_fexpa_d,
1021     };
1022     if (a->esz == 0) {
1023         return false;
1024     }
1025     if (sve_access_check(s)) {
1026         unsigned vsz = vec_full_reg_size(s);
1027         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1028                            vec_full_reg_offset(s, a->rn),
1029                            vsz, vsz, 0, fns[a->esz]);
1030     }
1031     return true;
1032 }
1033
1034 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1035 {
1036     static gen_helper_gvec_3 * const fns[4] = {
1037         NULL,
1038         gen_helper_sve_ftssel_h,
1039         gen_helper_sve_ftssel_s,
1040         gen_helper_sve_ftssel_d,
1041     };
1042     if (a->esz == 0) {
1043         return false;
1044     }
1045     if (sve_access_check(s)) {
1046         unsigned vsz = vec_full_reg_size(s);
1047         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1048                            vec_full_reg_offset(s, a->rn),
1049                            vec_full_reg_offset(s, a->rm),
1050                            vsz, vsz, 0, fns[a->esz]);
1051     }
1052     return true;
1053 }
1054
1055 /*
1056  *** SVE Predicate Logical Operations Group
1057  */
1058
1059 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1060                           const GVecGen4 *gvec_op)
1061 {
1062     if (!sve_access_check(s)) {
1063         return true;
1064     }
1065
1066     unsigned psz = pred_gvec_reg_size(s);
1067     int dofs = pred_full_reg_offset(s, a->rd);
1068     int nofs = pred_full_reg_offset(s, a->rn);
1069     int mofs = pred_full_reg_offset(s, a->rm);
1070     int gofs = pred_full_reg_offset(s, a->pg);
1071
1072     if (psz == 8) {
1073         /* Do the operation and the flags generation in temps.  */
1074         TCGv_i64 pd = tcg_temp_new_i64();
1075         TCGv_i64 pn = tcg_temp_new_i64();
1076         TCGv_i64 pm = tcg_temp_new_i64();
1077         TCGv_i64 pg = tcg_temp_new_i64();
1078
1079         tcg_gen_ld_i64(pn, cpu_env, nofs);
1080         tcg_gen_ld_i64(pm, cpu_env, mofs);
1081         tcg_gen_ld_i64(pg, cpu_env, gofs);
1082
1083         gvec_op->fni8(pd, pn, pm, pg);
1084         tcg_gen_st_i64(pd, cpu_env, dofs);
1085
1086         do_predtest1(pd, pg);
1087
1088         tcg_temp_free_i64(pd);
1089         tcg_temp_free_i64(pn);
1090         tcg_temp_free_i64(pm);
1091         tcg_temp_free_i64(pg);
1092     } else {
1093         /* The operation and flags generation is large.  The computation
1094          * of the flags depends on the original contents of the guarding
1095          * predicate.  If the destination overwrites the guarding predicate,
1096          * then the easiest way to get this right is to save a copy.
1097           */
1098         int tofs = gofs;
1099         if (a->rd == a->pg) {
1100             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1101             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1102         }
1103
1104         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1105         do_predtest(s, dofs, tofs, psz / 8);
1106     }
1107     return true;
1108 }
1109
1110 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1111 {
1112     tcg_gen_and_i64(pd, pn, pm);
1113     tcg_gen_and_i64(pd, pd, pg);
1114 }
1115
1116 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1117                            TCGv_vec pm, TCGv_vec pg)
1118 {
1119     tcg_gen_and_vec(vece, pd, pn, pm);
1120     tcg_gen_and_vec(vece, pd, pd, pg);
1121 }
1122
1123 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1124 {
1125     static const GVecGen4 op = {
1126         .fni8 = gen_and_pg_i64,
1127         .fniv = gen_and_pg_vec,
1128         .fno = gen_helper_sve_and_pppp,
1129         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1130     };
1131     if (a->s) {
1132         return do_pppp_flags(s, a, &op);
1133     } else if (a->rn == a->rm) {
1134         if (a->pg == a->rn) {
1135             return do_mov_p(s, a->rd, a->rn);
1136         } else {
1137             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1138         }
1139     } else if (a->pg == a->rn || a->pg == a->rm) {
1140         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1141     } else {
1142         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1143     }
1144 }
1145
1146 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1147 {
1148     tcg_gen_andc_i64(pd, pn, pm);
1149     tcg_gen_and_i64(pd, pd, pg);
1150 }
1151
1152 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1153                            TCGv_vec pm, TCGv_vec pg)
1154 {
1155     tcg_gen_andc_vec(vece, pd, pn, pm);
1156     tcg_gen_and_vec(vece, pd, pd, pg);
1157 }
1158
1159 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1160 {
1161     static const GVecGen4 op = {
1162         .fni8 = gen_bic_pg_i64,
1163         .fniv = gen_bic_pg_vec,
1164         .fno = gen_helper_sve_bic_pppp,
1165         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1166     };
1167     if (a->s) {
1168         return do_pppp_flags(s, a, &op);
1169     } else if (a->pg == a->rn) {
1170         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1171     } else {
1172         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1173     }
1174 }
1175
1176 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1177 {
1178     tcg_gen_xor_i64(pd, pn, pm);
1179     tcg_gen_and_i64(pd, pd, pg);
1180 }
1181
1182 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1183                            TCGv_vec pm, TCGv_vec pg)
1184 {
1185     tcg_gen_xor_vec(vece, pd, pn, pm);
1186     tcg_gen_and_vec(vece, pd, pd, pg);
1187 }
1188
1189 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1190 {
1191     static const GVecGen4 op = {
1192         .fni8 = gen_eor_pg_i64,
1193         .fniv = gen_eor_pg_vec,
1194         .fno = gen_helper_sve_eor_pppp,
1195         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1196     };
1197     if (a->s) {
1198         return do_pppp_flags(s, a, &op);
1199     } else {
1200         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1201     }
1202 }
1203
1204 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1205 {
1206     tcg_gen_and_i64(pn, pn, pg);
1207     tcg_gen_andc_i64(pm, pm, pg);
1208     tcg_gen_or_i64(pd, pn, pm);
1209 }
1210
1211 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1212                            TCGv_vec pm, TCGv_vec pg)
1213 {
1214     tcg_gen_and_vec(vece, pn, pn, pg);
1215     tcg_gen_andc_vec(vece, pm, pm, pg);
1216     tcg_gen_or_vec(vece, pd, pn, pm);
1217 }
1218
1219 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1220 {
1221     static const GVecGen4 op = {
1222         .fni8 = gen_sel_pg_i64,
1223         .fniv = gen_sel_pg_vec,
1224         .fno = gen_helper_sve_sel_pppp,
1225         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1226     };
1227     if (a->s) {
1228         return false;
1229     } else {
1230         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1231     }
1232 }
1233
1234 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1235 {
1236     tcg_gen_or_i64(pd, pn, pm);
1237     tcg_gen_and_i64(pd, pd, pg);
1238 }
1239
1240 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1241                            TCGv_vec pm, TCGv_vec pg)
1242 {
1243     tcg_gen_or_vec(vece, pd, pn, pm);
1244     tcg_gen_and_vec(vece, pd, pd, pg);
1245 }
1246
1247 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1248 {
1249     static const GVecGen4 op = {
1250         .fni8 = gen_orr_pg_i64,
1251         .fniv = gen_orr_pg_vec,
1252         .fno = gen_helper_sve_orr_pppp,
1253         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1254     };
1255     if (a->s) {
1256         return do_pppp_flags(s, a, &op);
1257     } else if (a->pg == a->rn && a->rn == a->rm) {
1258         return do_mov_p(s, a->rd, a->rn);
1259     } else {
1260         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1261     }
1262 }
1263
1264 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1265 {
1266     tcg_gen_orc_i64(pd, pn, pm);
1267     tcg_gen_and_i64(pd, pd, pg);
1268 }
1269
1270 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1271                            TCGv_vec pm, TCGv_vec pg)
1272 {
1273     tcg_gen_orc_vec(vece, pd, pn, pm);
1274     tcg_gen_and_vec(vece, pd, pd, pg);
1275 }
1276
1277 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1278 {
1279     static const GVecGen4 op = {
1280         .fni8 = gen_orn_pg_i64,
1281         .fniv = gen_orn_pg_vec,
1282         .fno = gen_helper_sve_orn_pppp,
1283         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1284     };
1285     if (a->s) {
1286         return do_pppp_flags(s, a, &op);
1287     } else {
1288         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1289     }
1290 }
1291
1292 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1293 {
1294     tcg_gen_or_i64(pd, pn, pm);
1295     tcg_gen_andc_i64(pd, pg, pd);
1296 }
1297
1298 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1299                            TCGv_vec pm, TCGv_vec pg)
1300 {
1301     tcg_gen_or_vec(vece, pd, pn, pm);
1302     tcg_gen_andc_vec(vece, pd, pg, pd);
1303 }
1304
1305 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1306 {
1307     static const GVecGen4 op = {
1308         .fni8 = gen_nor_pg_i64,
1309         .fniv = gen_nor_pg_vec,
1310         .fno = gen_helper_sve_nor_pppp,
1311         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1312     };
1313     if (a->s) {
1314         return do_pppp_flags(s, a, &op);
1315     } else {
1316         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1317     }
1318 }
1319
1320 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1321 {
1322     tcg_gen_and_i64(pd, pn, pm);
1323     tcg_gen_andc_i64(pd, pg, pd);
1324 }
1325
1326 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1327                            TCGv_vec pm, TCGv_vec pg)
1328 {
1329     tcg_gen_and_vec(vece, pd, pn, pm);
1330     tcg_gen_andc_vec(vece, pd, pg, pd);
1331 }
1332
1333 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1334 {
1335     static const GVecGen4 op = {
1336         .fni8 = gen_nand_pg_i64,
1337         .fniv = gen_nand_pg_vec,
1338         .fno = gen_helper_sve_nand_pppp,
1339         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1340     };
1341     if (a->s) {
1342         return do_pppp_flags(s, a, &op);
1343     } else {
1344         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1345     }
1346 }
1347
1348 /*
1349  *** SVE Predicate Misc Group
1350  */
1351
1352 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1353 {
1354     if (sve_access_check(s)) {
1355         int nofs = pred_full_reg_offset(s, a->rn);
1356         int gofs = pred_full_reg_offset(s, a->pg);
1357         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1358
1359         if (words == 1) {
1360             TCGv_i64 pn = tcg_temp_new_i64();
1361             TCGv_i64 pg = tcg_temp_new_i64();
1362
1363             tcg_gen_ld_i64(pn, cpu_env, nofs);
1364             tcg_gen_ld_i64(pg, cpu_env, gofs);
1365             do_predtest1(pn, pg);
1366
1367             tcg_temp_free_i64(pn);
1368             tcg_temp_free_i64(pg);
1369         } else {
1370             do_predtest(s, nofs, gofs, words);
1371         }
1372     }
1373     return true;
1374 }
1375
1376 /* See the ARM pseudocode DecodePredCount.  */
1377 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1378 {
1379     unsigned elements = fullsz >> esz;
1380     unsigned bound;
1381
1382     switch (pattern) {
1383     case 0x0: /* POW2 */
1384         return pow2floor(elements);
1385     case 0x1: /* VL1 */
1386     case 0x2: /* VL2 */
1387     case 0x3: /* VL3 */
1388     case 0x4: /* VL4 */
1389     case 0x5: /* VL5 */
1390     case 0x6: /* VL6 */
1391     case 0x7: /* VL7 */
1392     case 0x8: /* VL8 */
1393         bound = pattern;
1394         break;
1395     case 0x9: /* VL16 */
1396     case 0xa: /* VL32 */
1397     case 0xb: /* VL64 */
1398     case 0xc: /* VL128 */
1399     case 0xd: /* VL256 */
1400         bound = 16 << (pattern - 9);
1401         break;
1402     case 0x1d: /* MUL4 */
1403         return elements - elements % 4;
1404     case 0x1e: /* MUL3 */
1405         return elements - elements % 3;
1406     case 0x1f: /* ALL */
1407         return elements;
1408     default:   /* #uimm5 */
1409         return 0;
1410     }
1411     return elements >= bound ? bound : 0;
1412 }
1413
1414 /* This handles all of the predicate initialization instructions,
1415  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1416  * so that decode_pred_count returns 0.  For SETFFR, we will have
1417  * set RD == 16 == FFR.
1418  */
1419 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1420 {
1421     if (!sve_access_check(s)) {
1422         return true;
1423     }
1424
1425     unsigned fullsz = vec_full_reg_size(s);
1426     unsigned ofs = pred_full_reg_offset(s, rd);
1427     unsigned numelem, setsz, i;
1428     uint64_t word, lastword;
1429     TCGv_i64 t;
1430
1431     numelem = decode_pred_count(fullsz, pat, esz);
1432
1433     /* Determine what we must store into each bit, and how many.  */
1434     if (numelem == 0) {
1435         lastword = word = 0;
1436         setsz = fullsz;
1437     } else {
1438         setsz = numelem << esz;
1439         lastword = word = pred_esz_masks[esz];
1440         if (setsz % 64) {
1441             lastword &= MAKE_64BIT_MASK(0, setsz % 64);
1442         }
1443     }
1444
1445     t = tcg_temp_new_i64();
1446     if (fullsz <= 64) {
1447         tcg_gen_movi_i64(t, lastword);
1448         tcg_gen_st_i64(t, cpu_env, ofs);
1449         goto done;
1450     }
1451
1452     if (word == lastword) {
1453         unsigned maxsz = size_for_gvec(fullsz / 8);
1454         unsigned oprsz = size_for_gvec(setsz / 8);
1455
1456         if (oprsz * 8 == setsz) {
1457             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1458             goto done;
1459         }
1460     }
1461
1462     setsz /= 8;
1463     fullsz /= 8;
1464
1465     tcg_gen_movi_i64(t, word);
1466     for (i = 0; i < QEMU_ALIGN_DOWN(setsz, 8); i += 8) {
1467         tcg_gen_st_i64(t, cpu_env, ofs + i);
1468     }
1469     if (lastword != word) {
1470         tcg_gen_movi_i64(t, lastword);
1471         tcg_gen_st_i64(t, cpu_env, ofs + i);
1472         i += 8;
1473     }
1474     if (i < fullsz) {
1475         tcg_gen_movi_i64(t, 0);
1476         for (; i < fullsz; i += 8) {
1477             tcg_gen_st_i64(t, cpu_env, ofs + i);
1478         }
1479     }
1480
1481  done:
1482     tcg_temp_free_i64(t);
1483
1484     /* PTRUES */
1485     if (setflag) {
1486         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1487         tcg_gen_movi_i32(cpu_CF, word == 0);
1488         tcg_gen_movi_i32(cpu_VF, 0);
1489         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1490     }
1491     return true;
1492 }
1493
1494 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1495 {
1496     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1497 }
1498
1499 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1500 {
1501     /* Note pat == 31 is #all, to set all elements.  */
1502     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1503 }
1504
1505 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1506 {
1507     /* Note pat == 32 is #unimp, to set no elements.  */
1508     return do_predset(s, 0, a->rd, 32, false);
1509 }
1510
1511 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1512 {
1513     /* The path through do_pppp_flags is complicated enough to want to avoid
1514      * duplication.  Frob the arguments into the form of a predicated AND.
1515      */
1516     arg_rprr_s alt_a = {
1517         .rd = a->rd, .pg = a->pg, .s = a->s,
1518         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1519     };
1520     return trans_AND_pppp(s, &alt_a, insn);
1521 }
1522
1523 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1524 {
1525     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1526 }
1527
1528 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1529 {
1530     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1531 }
1532
1533 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1534                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1535                                            TCGv_ptr, TCGv_i32))
1536 {
1537     if (!sve_access_check(s)) {
1538         return true;
1539     }
1540
1541     TCGv_ptr t_pd = tcg_temp_new_ptr();
1542     TCGv_ptr t_pg = tcg_temp_new_ptr();
1543     TCGv_i32 t;
1544     unsigned desc;
1545
1546     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1547     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1548
1549     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1550     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1551     t = tcg_const_i32(desc);
1552
1553     gen_fn(t, t_pd, t_pg, t);
1554     tcg_temp_free_ptr(t_pd);
1555     tcg_temp_free_ptr(t_pg);
1556
1557     do_pred_flags(t);
1558     tcg_temp_free_i32(t);
1559     return true;
1560 }
1561
1562 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1563 {
1564     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1565 }
1566
1567 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1568 {
1569     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1570 }
1571
1572 /*
1573  *** SVE Element Count Group
1574  */
1575
1576 /* Perform an inline saturating addition of a 32-bit value within
1577  * a 64-bit register.  The second operand is known to be positive,
1578  * which halves the comparisions we must perform to bound the result.
1579  */
1580 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1581 {
1582     int64_t ibound;
1583     TCGv_i64 bound;
1584     TCGCond cond;
1585
1586     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1587     if (u) {
1588         tcg_gen_ext32u_i64(reg, reg);
1589     } else {
1590         tcg_gen_ext32s_i64(reg, reg);
1591     }
1592     if (d) {
1593         tcg_gen_sub_i64(reg, reg, val);
1594         ibound = (u ? 0 : INT32_MIN);
1595         cond = TCG_COND_LT;
1596     } else {
1597         tcg_gen_add_i64(reg, reg, val);
1598         ibound = (u ? UINT32_MAX : INT32_MAX);
1599         cond = TCG_COND_GT;
1600     }
1601     bound = tcg_const_i64(ibound);
1602     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1603     tcg_temp_free_i64(bound);
1604 }
1605
1606 /* Similarly with 64-bit values.  */
1607 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1608 {
1609     TCGv_i64 t0 = tcg_temp_new_i64();
1610     TCGv_i64 t1 = tcg_temp_new_i64();
1611     TCGv_i64 t2;
1612
1613     if (u) {
1614         if (d) {
1615             tcg_gen_sub_i64(t0, reg, val);
1616             tcg_gen_movi_i64(t1, 0);
1617             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1618         } else {
1619             tcg_gen_add_i64(t0, reg, val);
1620             tcg_gen_movi_i64(t1, -1);
1621             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1622         }
1623     } else {
1624         if (d) {
1625             /* Detect signed overflow for subtraction.  */
1626             tcg_gen_xor_i64(t0, reg, val);
1627             tcg_gen_sub_i64(t1, reg, val);
1628             tcg_gen_xor_i64(reg, reg, t1);
1629             tcg_gen_and_i64(t0, t0, reg);
1630
1631             /* Bound the result.  */
1632             tcg_gen_movi_i64(reg, INT64_MIN);
1633             t2 = tcg_const_i64(0);
1634             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1635         } else {
1636             /* Detect signed overflow for addition.  */
1637             tcg_gen_xor_i64(t0, reg, val);
1638             tcg_gen_add_i64(reg, reg, val);
1639             tcg_gen_xor_i64(t1, reg, val);
1640             tcg_gen_andc_i64(t0, t1, t0);
1641
1642             /* Bound the result.  */
1643             tcg_gen_movi_i64(t1, INT64_MAX);
1644             t2 = tcg_const_i64(0);
1645             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1646         }
1647         tcg_temp_free_i64(t2);
1648     }
1649     tcg_temp_free_i64(t0);
1650     tcg_temp_free_i64(t1);
1651 }
1652
1653 /* Similarly with a vector and a scalar operand.  */
1654 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1655                               TCGv_i64 val, bool u, bool d)
1656 {
1657     unsigned vsz = vec_full_reg_size(s);
1658     TCGv_ptr dptr, nptr;
1659     TCGv_i32 t32, desc;
1660     TCGv_i64 t64;
1661
1662     dptr = tcg_temp_new_ptr();
1663     nptr = tcg_temp_new_ptr();
1664     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1665     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1666     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1667
1668     switch (esz) {
1669     case MO_8:
1670         t32 = tcg_temp_new_i32();
1671         tcg_gen_extrl_i64_i32(t32, val);
1672         if (d) {
1673             tcg_gen_neg_i32(t32, t32);
1674         }
1675         if (u) {
1676             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1677         } else {
1678             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1679         }
1680         tcg_temp_free_i32(t32);
1681         break;
1682
1683     case MO_16:
1684         t32 = tcg_temp_new_i32();
1685         tcg_gen_extrl_i64_i32(t32, val);
1686         if (d) {
1687             tcg_gen_neg_i32(t32, t32);
1688         }
1689         if (u) {
1690             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1691         } else {
1692             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1693         }
1694         tcg_temp_free_i32(t32);
1695         break;
1696
1697     case MO_32:
1698         t64 = tcg_temp_new_i64();
1699         if (d) {
1700             tcg_gen_neg_i64(t64, val);
1701         } else {
1702             tcg_gen_mov_i64(t64, val);
1703         }
1704         if (u) {
1705             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1706         } else {
1707             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1708         }
1709         tcg_temp_free_i64(t64);
1710         break;
1711
1712     case MO_64:
1713         if (u) {
1714             if (d) {
1715                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1716             } else {
1717                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1718             }
1719         } else if (d) {
1720             t64 = tcg_temp_new_i64();
1721             tcg_gen_neg_i64(t64, val);
1722             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1723             tcg_temp_free_i64(t64);
1724         } else {
1725             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1726         }
1727         break;
1728
1729     default:
1730         g_assert_not_reached();
1731     }
1732
1733     tcg_temp_free_ptr(dptr);
1734     tcg_temp_free_ptr(nptr);
1735     tcg_temp_free_i32(desc);
1736 }
1737
1738 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1739 {
1740     if (sve_access_check(s)) {
1741         unsigned fullsz = vec_full_reg_size(s);
1742         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1743         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1744     }
1745     return true;
1746 }
1747
1748 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1749 {
1750     if (sve_access_check(s)) {
1751         unsigned fullsz = vec_full_reg_size(s);
1752         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1753         int inc = numelem * a->imm * (a->d ? -1 : 1);
1754         TCGv_i64 reg = cpu_reg(s, a->rd);
1755
1756         tcg_gen_addi_i64(reg, reg, inc);
1757     }
1758     return true;
1759 }
1760
1761 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1762                                uint32_t insn)
1763 {
1764     if (!sve_access_check(s)) {
1765         return true;
1766     }
1767
1768     unsigned fullsz = vec_full_reg_size(s);
1769     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1770     int inc = numelem * a->imm;
1771     TCGv_i64 reg = cpu_reg(s, a->rd);
1772
1773     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1774     if (inc == 0) {
1775         if (a->u) {
1776             tcg_gen_ext32u_i64(reg, reg);
1777         } else {
1778             tcg_gen_ext32s_i64(reg, reg);
1779         }
1780     } else {
1781         TCGv_i64 t = tcg_const_i64(inc);
1782         do_sat_addsub_32(reg, t, a->u, a->d);
1783         tcg_temp_free_i64(t);
1784     }
1785     return true;
1786 }
1787
1788 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1789                                uint32_t insn)
1790 {
1791     if (!sve_access_check(s)) {
1792         return true;
1793     }
1794
1795     unsigned fullsz = vec_full_reg_size(s);
1796     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1797     int inc = numelem * a->imm;
1798     TCGv_i64 reg = cpu_reg(s, a->rd);
1799
1800     if (inc != 0) {
1801         TCGv_i64 t = tcg_const_i64(inc);
1802         do_sat_addsub_64(reg, t, a->u, a->d);
1803         tcg_temp_free_i64(t);
1804     }
1805     return true;
1806 }
1807
1808 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1809 {
1810     if (a->esz == 0) {
1811         return false;
1812     }
1813
1814     unsigned fullsz = vec_full_reg_size(s);
1815     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1816     int inc = numelem * a->imm;
1817
1818     if (inc != 0) {
1819         if (sve_access_check(s)) {
1820             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1821             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1822                               vec_full_reg_offset(s, a->rn),
1823                               t, fullsz, fullsz);
1824             tcg_temp_free_i64(t);
1825         }
1826     } else {
1827         do_mov_z(s, a->rd, a->rn);
1828     }
1829     return true;
1830 }
1831
1832 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1833                             uint32_t insn)
1834 {
1835     if (a->esz == 0) {
1836         return false;
1837     }
1838
1839     unsigned fullsz = vec_full_reg_size(s);
1840     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1841     int inc = numelem * a->imm;
1842
1843     if (inc != 0) {
1844         if (sve_access_check(s)) {
1845             TCGv_i64 t = tcg_const_i64(inc);
1846             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1847             tcg_temp_free_i64(t);
1848         }
1849     } else {
1850         do_mov_z(s, a->rd, a->rn);
1851     }
1852     return true;
1853 }
1854
1855 /*
1856  *** SVE Bitwise Immediate Group
1857  */
1858
1859 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1860 {
1861     uint64_t imm;
1862     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1863                                 extract32(a->dbm, 0, 6),
1864                                 extract32(a->dbm, 6, 6))) {
1865         return false;
1866     }
1867     if (sve_access_check(s)) {
1868         unsigned vsz = vec_full_reg_size(s);
1869         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1870                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1871     }
1872     return true;
1873 }
1874
1875 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1876 {
1877     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1878 }
1879
1880 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1881 {
1882     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1883 }
1884
1885 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1886 {
1887     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1888 }
1889
1890 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1891 {
1892     uint64_t imm;
1893     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1894                                 extract32(a->dbm, 0, 6),
1895                                 extract32(a->dbm, 6, 6))) {
1896         return false;
1897     }
1898     if (sve_access_check(s)) {
1899         do_dupi_z(s, a->rd, imm);
1900     }
1901     return true;
1902 }
1903
1904 /*
1905  *** SVE Integer Wide Immediate - Predicated Group
1906  */
1907
1908 /* Implement all merging copies.  This is used for CPY (immediate),
1909  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1910  */
1911 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1912                      TCGv_i64 val)
1913 {
1914     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1915     static gen_cpy * const fns[4] = {
1916         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1917         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1918     };
1919     unsigned vsz = vec_full_reg_size(s);
1920     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1921     TCGv_ptr t_zd = tcg_temp_new_ptr();
1922     TCGv_ptr t_zn = tcg_temp_new_ptr();
1923     TCGv_ptr t_pg = tcg_temp_new_ptr();
1924
1925     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1926     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1927     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1928
1929     fns[esz](t_zd, t_zn, t_pg, val, desc);
1930
1931     tcg_temp_free_ptr(t_zd);
1932     tcg_temp_free_ptr(t_zn);
1933     tcg_temp_free_ptr(t_pg);
1934     tcg_temp_free_i32(desc);
1935 }
1936
1937 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1938 {
1939     if (a->esz == 0) {
1940         return false;
1941     }
1942     if (sve_access_check(s)) {
1943         /* Decode the VFP immediate.  */
1944         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1945         TCGv_i64 t_imm = tcg_const_i64(imm);
1946         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1947         tcg_temp_free_i64(t_imm);
1948     }
1949     return true;
1950 }
1951
1952 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1953 {
1954     if (a->esz == 0 && extract32(insn, 13, 1)) {
1955         return false;
1956     }
1957     if (sve_access_check(s)) {
1958         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1959         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1960         tcg_temp_free_i64(t_imm);
1961     }
1962     return true;
1963 }
1964
1965 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1966 {
1967     static gen_helper_gvec_2i * const fns[4] = {
1968         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1969         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1970     };
1971
1972     if (a->esz == 0 && extract32(insn, 13, 1)) {
1973         return false;
1974     }
1975     if (sve_access_check(s)) {
1976         unsigned vsz = vec_full_reg_size(s);
1977         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1978         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1979                             pred_full_reg_offset(s, a->pg),
1980                             t_imm, vsz, vsz, 0, fns[a->esz]);
1981         tcg_temp_free_i64(t_imm);
1982     }
1983     return true;
1984 }
1985
1986 /*
1987  *** SVE Permute Extract Group
1988  */
1989
1990 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1991 {
1992     if (!sve_access_check(s)) {
1993         return true;
1994     }
1995
1996     unsigned vsz = vec_full_reg_size(s);
1997     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1998     unsigned n_siz = vsz - n_ofs;
1999     unsigned d = vec_full_reg_offset(s, a->rd);
2000     unsigned n = vec_full_reg_offset(s, a->rn);
2001     unsigned m = vec_full_reg_offset(s, a->rm);
2002
2003     /* Use host vector move insns if we have appropriate sizes
2004      * and no unfortunate overlap.
2005      */
2006     if (m != d
2007         && n_ofs == size_for_gvec(n_ofs)
2008         && n_siz == size_for_gvec(n_siz)
2009         && (d != n || n_siz <= n_ofs)) {
2010         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
2011         if (n_ofs != 0) {
2012             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
2013         }
2014     } else {
2015         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2016     }
2017     return true;
2018 }
2019
2020 /*
2021  *** SVE Permute - Unpredicated Group
2022  */
2023
2024 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2025 {
2026     if (sve_access_check(s)) {
2027         unsigned vsz = vec_full_reg_size(s);
2028         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2029                              vsz, vsz, cpu_reg_sp(s, a->rn));
2030     }
2031     return true;
2032 }
2033
2034 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2035 {
2036     if ((a->imm & 0x1f) == 0) {
2037         return false;
2038     }
2039     if (sve_access_check(s)) {
2040         unsigned vsz = vec_full_reg_size(s);
2041         unsigned dofs = vec_full_reg_offset(s, a->rd);
2042         unsigned esz, index;
2043
2044         esz = ctz32(a->imm);
2045         index = a->imm >> (esz + 1);
2046
2047         if ((index << esz) < vsz) {
2048             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2049             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2050         } else {
2051             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2052         }
2053     }
2054     return true;
2055 }
2056
2057 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2058 {
2059     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2060     static gen_insr * const fns[4] = {
2061         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2062         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2063     };
2064     unsigned vsz = vec_full_reg_size(s);
2065     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2066     TCGv_ptr t_zd = tcg_temp_new_ptr();
2067     TCGv_ptr t_zn = tcg_temp_new_ptr();
2068
2069     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2070     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2071
2072     fns[a->esz](t_zd, t_zn, val, desc);
2073
2074     tcg_temp_free_ptr(t_zd);
2075     tcg_temp_free_ptr(t_zn);
2076     tcg_temp_free_i32(desc);
2077 }
2078
2079 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2080 {
2081     if (sve_access_check(s)) {
2082         TCGv_i64 t = tcg_temp_new_i64();
2083         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2084         do_insr_i64(s, a, t);
2085         tcg_temp_free_i64(t);
2086     }
2087     return true;
2088 }
2089
2090 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2091 {
2092     if (sve_access_check(s)) {
2093         do_insr_i64(s, a, cpu_reg(s, a->rm));
2094     }
2095     return true;
2096 }
2097
2098 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2099 {
2100     static gen_helper_gvec_2 * const fns[4] = {
2101         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2102         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2103     };
2104
2105     if (sve_access_check(s)) {
2106         unsigned vsz = vec_full_reg_size(s);
2107         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2108                            vec_full_reg_offset(s, a->rn),
2109                            vsz, vsz, 0, fns[a->esz]);
2110     }
2111     return true;
2112 }
2113
2114 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2115 {
2116     static gen_helper_gvec_3 * const fns[4] = {
2117         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2118         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2119     };
2120
2121     if (sve_access_check(s)) {
2122         unsigned vsz = vec_full_reg_size(s);
2123         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2124                            vec_full_reg_offset(s, a->rn),
2125                            vec_full_reg_offset(s, a->rm),
2126                            vsz, vsz, 0, fns[a->esz]);
2127     }
2128     return true;
2129 }
2130
2131 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2132 {
2133     static gen_helper_gvec_2 * const fns[4][2] = {
2134         { NULL, NULL },
2135         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2136         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2137         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2138     };
2139
2140     if (a->esz == 0) {
2141         return false;
2142     }
2143     if (sve_access_check(s)) {
2144         unsigned vsz = vec_full_reg_size(s);
2145         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2146                            vec_full_reg_offset(s, a->rn)
2147                            + (a->h ? vsz / 2 : 0),
2148                            vsz, vsz, 0, fns[a->esz][a->u]);
2149     }
2150     return true;
2151 }
2152
2153 /*
2154  *** SVE Permute - Predicates Group
2155  */
2156
2157 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2158                           gen_helper_gvec_3 *fn)
2159 {
2160     if (!sve_access_check(s)) {
2161         return true;
2162     }
2163
2164     unsigned vsz = pred_full_reg_size(s);
2165
2166     /* Predicate sizes may be smaller and cannot use simd_desc.
2167        We cannot round up, as we do elsewhere, because we need
2168        the exact size for ZIP2 and REV.  We retain the style for
2169        the other helpers for consistency.  */
2170     TCGv_ptr t_d = tcg_temp_new_ptr();
2171     TCGv_ptr t_n = tcg_temp_new_ptr();
2172     TCGv_ptr t_m = tcg_temp_new_ptr();
2173     TCGv_i32 t_desc;
2174     int desc;
2175
2176     desc = vsz - 2;
2177     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2178     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2179
2180     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2181     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2182     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2183     t_desc = tcg_const_i32(desc);
2184
2185     fn(t_d, t_n, t_m, t_desc);
2186
2187     tcg_temp_free_ptr(t_d);
2188     tcg_temp_free_ptr(t_n);
2189     tcg_temp_free_ptr(t_m);
2190     tcg_temp_free_i32(t_desc);
2191     return true;
2192 }
2193
2194 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2195                           gen_helper_gvec_2 *fn)
2196 {
2197     if (!sve_access_check(s)) {
2198         return true;
2199     }
2200
2201     unsigned vsz = pred_full_reg_size(s);
2202     TCGv_ptr t_d = tcg_temp_new_ptr();
2203     TCGv_ptr t_n = tcg_temp_new_ptr();
2204     TCGv_i32 t_desc;
2205     int desc;
2206
2207     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2208     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2209
2210     /* Predicate sizes may be smaller and cannot use simd_desc.
2211        We cannot round up, as we do elsewhere, because we need
2212        the exact size for ZIP2 and REV.  We retain the style for
2213        the other helpers for consistency.  */
2214
2215     desc = vsz - 2;
2216     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2217     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2218     t_desc = tcg_const_i32(desc);
2219
2220     fn(t_d, t_n, t_desc);
2221
2222     tcg_temp_free_i32(t_desc);
2223     tcg_temp_free_ptr(t_d);
2224     tcg_temp_free_ptr(t_n);
2225     return true;
2226 }
2227
2228 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2229 {
2230     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2231 }
2232
2233 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2234 {
2235     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2236 }
2237
2238 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2239 {
2240     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2241 }
2242
2243 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2244 {
2245     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2246 }
2247
2248 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2249 {
2250     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2251 }
2252
2253 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2254 {
2255     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2256 }
2257
2258 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2259 {
2260     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2261 }
2262
2263 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2264 {
2265     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2266 }
2267
2268 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2269 {
2270     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2271 }
2272
2273 /*
2274  *** SVE Permute - Interleaving Group
2275  */
2276
2277 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2278 {
2279     static gen_helper_gvec_3 * const fns[4] = {
2280         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2281         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2282     };
2283
2284     if (sve_access_check(s)) {
2285         unsigned vsz = vec_full_reg_size(s);
2286         unsigned high_ofs = high ? vsz / 2 : 0;
2287         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2288                            vec_full_reg_offset(s, a->rn) + high_ofs,
2289                            vec_full_reg_offset(s, a->rm) + high_ofs,
2290                            vsz, vsz, 0, fns[a->esz]);
2291     }
2292     return true;
2293 }
2294
2295 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2296                             gen_helper_gvec_3 *fn)
2297 {
2298     if (sve_access_check(s)) {
2299         unsigned vsz = vec_full_reg_size(s);
2300         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2301                            vec_full_reg_offset(s, a->rn),
2302                            vec_full_reg_offset(s, a->rm),
2303                            vsz, vsz, data, fn);
2304     }
2305     return true;
2306 }
2307
2308 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2309 {
2310     return do_zip(s, a, false);
2311 }
2312
2313 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2314 {
2315     return do_zip(s, a, true);
2316 }
2317
2318 static gen_helper_gvec_3 * const uzp_fns[4] = {
2319     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2320     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2321 };
2322
2323 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2324 {
2325     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2326 }
2327
2328 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2329 {
2330     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2331 }
2332
2333 static gen_helper_gvec_3 * const trn_fns[4] = {
2334     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2335     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2336 };
2337
2338 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2339 {
2340     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2341 }
2342
2343 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2344 {
2345     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2346 }
2347
2348 /*
2349  *** SVE Permute Vector - Predicated Group
2350  */
2351
2352 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2353 {
2354     static gen_helper_gvec_3 * const fns[4] = {
2355         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2356     };
2357     return do_zpz_ool(s, a, fns[a->esz]);
2358 }
2359
2360 /* Call the helper that computes the ARM LastActiveElement pseudocode
2361  * function, scaled by the element size.  This includes the not found
2362  * indication; e.g. not found for esz=3 is -8.
2363  */
2364 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2365 {
2366     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2367      * round up, as we do elsewhere, because we need the exact size.
2368      */
2369     TCGv_ptr t_p = tcg_temp_new_ptr();
2370     TCGv_i32 t_desc;
2371     unsigned vsz = pred_full_reg_size(s);
2372     unsigned desc;
2373
2374     desc = vsz - 2;
2375     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2376
2377     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2378     t_desc = tcg_const_i32(desc);
2379
2380     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2381
2382     tcg_temp_free_i32(t_desc);
2383     tcg_temp_free_ptr(t_p);
2384 }
2385
2386 /* Increment LAST to the offset of the next element in the vector,
2387  * wrapping around to 0.
2388  */
2389 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2390 {
2391     unsigned vsz = vec_full_reg_size(s);
2392
2393     tcg_gen_addi_i32(last, last, 1 << esz);
2394     if (is_power_of_2(vsz)) {
2395         tcg_gen_andi_i32(last, last, vsz - 1);
2396     } else {
2397         TCGv_i32 max = tcg_const_i32(vsz);
2398         TCGv_i32 zero = tcg_const_i32(0);
2399         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2400         tcg_temp_free_i32(max);
2401         tcg_temp_free_i32(zero);
2402     }
2403 }
2404
2405 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2406 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2407 {
2408     unsigned vsz = vec_full_reg_size(s);
2409
2410     if (is_power_of_2(vsz)) {
2411         tcg_gen_andi_i32(last, last, vsz - 1);
2412     } else {
2413         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2414         TCGv_i32 zero = tcg_const_i32(0);
2415         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2416         tcg_temp_free_i32(max);
2417         tcg_temp_free_i32(zero);
2418     }
2419 }
2420
2421 /* Load an unsigned element of ESZ from BASE+OFS.  */
2422 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2423 {
2424     TCGv_i64 r = tcg_temp_new_i64();
2425
2426     switch (esz) {
2427     case 0:
2428         tcg_gen_ld8u_i64(r, base, ofs);
2429         break;
2430     case 1:
2431         tcg_gen_ld16u_i64(r, base, ofs);
2432         break;
2433     case 2:
2434         tcg_gen_ld32u_i64(r, base, ofs);
2435         break;
2436     case 3:
2437         tcg_gen_ld_i64(r, base, ofs);
2438         break;
2439     default:
2440         g_assert_not_reached();
2441     }
2442     return r;
2443 }
2444
2445 /* Load an unsigned element of ESZ from RM[LAST].  */
2446 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2447                                  int rm, int esz)
2448 {
2449     TCGv_ptr p = tcg_temp_new_ptr();
2450     TCGv_i64 r;
2451
2452     /* Convert offset into vector into offset into ENV.
2453      * The final adjustment for the vector register base
2454      * is added via constant offset to the load.
2455      */
2456 #ifdef HOST_WORDS_BIGENDIAN
2457     /* Adjust for element ordering.  See vec_reg_offset.  */
2458     if (esz < 3) {
2459         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2460     }
2461 #endif
2462     tcg_gen_ext_i32_ptr(p, last);
2463     tcg_gen_add_ptr(p, p, cpu_env);
2464
2465     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2466     tcg_temp_free_ptr(p);
2467
2468     return r;
2469 }
2470
2471 /* Compute CLAST for a Zreg.  */
2472 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2473 {
2474     TCGv_i32 last;
2475     TCGLabel *over;
2476     TCGv_i64 ele;
2477     unsigned vsz, esz = a->esz;
2478
2479     if (!sve_access_check(s)) {
2480         return true;
2481     }
2482
2483     last = tcg_temp_local_new_i32();
2484     over = gen_new_label();
2485
2486     find_last_active(s, last, esz, a->pg);
2487
2488     /* There is of course no movcond for a 2048-bit vector,
2489      * so we must branch over the actual store.
2490      */
2491     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2492
2493     if (!before) {
2494         incr_last_active(s, last, esz);
2495     }
2496
2497     ele = load_last_active(s, last, a->rm, esz);
2498     tcg_temp_free_i32(last);
2499
2500     vsz = vec_full_reg_size(s);
2501     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2502     tcg_temp_free_i64(ele);
2503
2504     /* If this insn used MOVPRFX, we may need a second move.  */
2505     if (a->rd != a->rn) {
2506         TCGLabel *done = gen_new_label();
2507         tcg_gen_br(done);
2508
2509         gen_set_label(over);
2510         do_mov_z(s, a->rd, a->rn);
2511
2512         gen_set_label(done);
2513     } else {
2514         gen_set_label(over);
2515     }
2516     return true;
2517 }
2518
2519 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2520 {
2521     return do_clast_vector(s, a, false);
2522 }
2523
2524 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2525 {
2526     return do_clast_vector(s, a, true);
2527 }
2528
2529 /* Compute CLAST for a scalar.  */
2530 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2531                             bool before, TCGv_i64 reg_val)
2532 {
2533     TCGv_i32 last = tcg_temp_new_i32();
2534     TCGv_i64 ele, cmp, zero;
2535
2536     find_last_active(s, last, esz, pg);
2537
2538     /* Extend the original value of last prior to incrementing.  */
2539     cmp = tcg_temp_new_i64();
2540     tcg_gen_ext_i32_i64(cmp, last);
2541
2542     if (!before) {
2543         incr_last_active(s, last, esz);
2544     }
2545
2546     /* The conceit here is that while last < 0 indicates not found, after
2547      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2548      * from which we can load garbage.  We then discard the garbage with
2549      * a conditional move.
2550      */
2551     ele = load_last_active(s, last, rm, esz);
2552     tcg_temp_free_i32(last);
2553
2554     zero = tcg_const_i64(0);
2555     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2556
2557     tcg_temp_free_i64(zero);
2558     tcg_temp_free_i64(cmp);
2559     tcg_temp_free_i64(ele);
2560 }
2561
2562 /* Compute CLAST for a Vreg.  */
2563 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2564 {
2565     if (sve_access_check(s)) {
2566         int esz = a->esz;
2567         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2568         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2569
2570         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2571         write_fp_dreg(s, a->rd, reg);
2572         tcg_temp_free_i64(reg);
2573     }
2574     return true;
2575 }
2576
2577 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2578 {
2579     return do_clast_fp(s, a, false);
2580 }
2581
2582 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2583 {
2584     return do_clast_fp(s, a, true);
2585 }
2586
2587 /* Compute CLAST for a Xreg.  */
2588 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2589 {
2590     TCGv_i64 reg;
2591
2592     if (!sve_access_check(s)) {
2593         return true;
2594     }
2595
2596     reg = cpu_reg(s, a->rd);
2597     switch (a->esz) {
2598     case 0:
2599         tcg_gen_ext8u_i64(reg, reg);
2600         break;
2601     case 1:
2602         tcg_gen_ext16u_i64(reg, reg);
2603         break;
2604     case 2:
2605         tcg_gen_ext32u_i64(reg, reg);
2606         break;
2607     case 3:
2608         break;
2609     default:
2610         g_assert_not_reached();
2611     }
2612
2613     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2614     return true;
2615 }
2616
2617 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2618 {
2619     return do_clast_general(s, a, false);
2620 }
2621
2622 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2623 {
2624     return do_clast_general(s, a, true);
2625 }
2626
2627 /* Compute LAST for a scalar.  */
2628 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2629                                int pg, int rm, bool before)
2630 {
2631     TCGv_i32 last = tcg_temp_new_i32();
2632     TCGv_i64 ret;
2633
2634     find_last_active(s, last, esz, pg);
2635     if (before) {
2636         wrap_last_active(s, last, esz);
2637     } else {
2638         incr_last_active(s, last, esz);
2639     }
2640
2641     ret = load_last_active(s, last, rm, esz);
2642     tcg_temp_free_i32(last);
2643     return ret;
2644 }
2645
2646 /* Compute LAST for a Vreg.  */
2647 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2648 {
2649     if (sve_access_check(s)) {
2650         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2651         write_fp_dreg(s, a->rd, val);
2652         tcg_temp_free_i64(val);
2653     }
2654     return true;
2655 }
2656
2657 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2658 {
2659     return do_last_fp(s, a, false);
2660 }
2661
2662 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2663 {
2664     return do_last_fp(s, a, true);
2665 }
2666
2667 /* Compute LAST for a Xreg.  */
2668 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2669 {
2670     if (sve_access_check(s)) {
2671         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2672         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2673         tcg_temp_free_i64(val);
2674     }
2675     return true;
2676 }
2677
2678 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2679 {
2680     return do_last_general(s, a, false);
2681 }
2682
2683 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2684 {
2685     return do_last_general(s, a, true);
2686 }
2687
2688 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2689 {
2690     if (sve_access_check(s)) {
2691         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2692     }
2693     return true;
2694 }
2695
2696 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2697 {
2698     if (sve_access_check(s)) {
2699         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2700         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2701         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2702         tcg_temp_free_i64(t);
2703     }
2704     return true;
2705 }
2706
2707 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2708 {
2709     static gen_helper_gvec_3 * const fns[4] = {
2710         NULL,
2711         gen_helper_sve_revb_h,
2712         gen_helper_sve_revb_s,
2713         gen_helper_sve_revb_d,
2714     };
2715     return do_zpz_ool(s, a, fns[a->esz]);
2716 }
2717
2718 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2719 {
2720     static gen_helper_gvec_3 * const fns[4] = {
2721         NULL,
2722         NULL,
2723         gen_helper_sve_revh_s,
2724         gen_helper_sve_revh_d,
2725     };
2726     return do_zpz_ool(s, a, fns[a->esz]);
2727 }
2728
2729 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2730 {
2731     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2732 }
2733
2734 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2735 {
2736     static gen_helper_gvec_3 * const fns[4] = {
2737         gen_helper_sve_rbit_b,
2738         gen_helper_sve_rbit_h,
2739         gen_helper_sve_rbit_s,
2740         gen_helper_sve_rbit_d,
2741     };
2742     return do_zpz_ool(s, a, fns[a->esz]);
2743 }
2744
2745 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2746 {
2747     if (sve_access_check(s)) {
2748         unsigned vsz = vec_full_reg_size(s);
2749         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2750                            vec_full_reg_offset(s, a->rn),
2751                            vec_full_reg_offset(s, a->rm),
2752                            pred_full_reg_offset(s, a->pg),
2753                            vsz, vsz, a->esz, gen_helper_sve_splice);
2754     }
2755     return true;
2756 }
2757
2758 /*
2759  *** SVE Integer Compare - Vectors Group
2760  */
2761
2762 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2763                           gen_helper_gvec_flags_4 *gen_fn)
2764 {
2765     TCGv_ptr pd, zn, zm, pg;
2766     unsigned vsz;
2767     TCGv_i32 t;
2768
2769     if (gen_fn == NULL) {
2770         return false;
2771     }
2772     if (!sve_access_check(s)) {
2773         return true;
2774     }
2775
2776     vsz = vec_full_reg_size(s);
2777     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2778     pd = tcg_temp_new_ptr();
2779     zn = tcg_temp_new_ptr();
2780     zm = tcg_temp_new_ptr();
2781     pg = tcg_temp_new_ptr();
2782
2783     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2784     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2785     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2786     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2787
2788     gen_fn(t, pd, zn, zm, pg, t);
2789
2790     tcg_temp_free_ptr(pd);
2791     tcg_temp_free_ptr(zn);
2792     tcg_temp_free_ptr(zm);
2793     tcg_temp_free_ptr(pg);
2794
2795     do_pred_flags(t);
2796
2797     tcg_temp_free_i32(t);
2798     return true;
2799 }
2800
2801 #define DO_PPZZ(NAME, name) \
2802 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
2803                                 uint32_t insn)                            \
2804 {                                                                         \
2805     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2806         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2807         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2808     };                                                                    \
2809     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2810 }
2811
2812 DO_PPZZ(CMPEQ, cmpeq)
2813 DO_PPZZ(CMPNE, cmpne)
2814 DO_PPZZ(CMPGT, cmpgt)
2815 DO_PPZZ(CMPGE, cmpge)
2816 DO_PPZZ(CMPHI, cmphi)
2817 DO_PPZZ(CMPHS, cmphs)
2818
2819 #undef DO_PPZZ
2820
2821 #define DO_PPZW(NAME, name) \
2822 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
2823                                 uint32_t insn)                            \
2824 {                                                                         \
2825     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2826         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2827         gen_helper_sve_##name##_ppzw_s, NULL                              \
2828     };                                                                    \
2829     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2830 }
2831
2832 DO_PPZW(CMPEQ, cmpeq)
2833 DO_PPZW(CMPNE, cmpne)
2834 DO_PPZW(CMPGT, cmpgt)
2835 DO_PPZW(CMPGE, cmpge)
2836 DO_PPZW(CMPHI, cmphi)
2837 DO_PPZW(CMPHS, cmphs)
2838 DO_PPZW(CMPLT, cmplt)
2839 DO_PPZW(CMPLE, cmple)
2840 DO_PPZW(CMPLO, cmplo)
2841 DO_PPZW(CMPLS, cmpls)
2842
2843 #undef DO_PPZW
2844
2845 /*
2846  *** SVE Integer Compare - Immediate Groups
2847  */
2848
2849 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2850                           gen_helper_gvec_flags_3 *gen_fn)
2851 {
2852     TCGv_ptr pd, zn, pg;
2853     unsigned vsz;
2854     TCGv_i32 t;
2855
2856     if (gen_fn == NULL) {
2857         return false;
2858     }
2859     if (!sve_access_check(s)) {
2860         return true;
2861     }
2862
2863     vsz = vec_full_reg_size(s);
2864     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2865     pd = tcg_temp_new_ptr();
2866     zn = tcg_temp_new_ptr();
2867     pg = tcg_temp_new_ptr();
2868
2869     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2870     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2871     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2872
2873     gen_fn(t, pd, zn, pg, t);
2874
2875     tcg_temp_free_ptr(pd);
2876     tcg_temp_free_ptr(zn);
2877     tcg_temp_free_ptr(pg);
2878
2879     do_pred_flags(t);
2880
2881     tcg_temp_free_i32(t);
2882     return true;
2883 }
2884
2885 #define DO_PPZI(NAME, name) \
2886 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
2887                                 uint32_t insn)                            \
2888 {                                                                         \
2889     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2890         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2891         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2892     };                                                                    \
2893     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2894 }
2895
2896 DO_PPZI(CMPEQ, cmpeq)
2897 DO_PPZI(CMPNE, cmpne)
2898 DO_PPZI(CMPGT, cmpgt)
2899 DO_PPZI(CMPGE, cmpge)
2900 DO_PPZI(CMPHI, cmphi)
2901 DO_PPZI(CMPHS, cmphs)
2902 DO_PPZI(CMPLT, cmplt)
2903 DO_PPZI(CMPLE, cmple)
2904 DO_PPZI(CMPLO, cmplo)
2905 DO_PPZI(CMPLS, cmpls)
2906
2907 #undef DO_PPZI
2908
2909 /*
2910  *** SVE Partition Break Group
2911  */
2912
2913 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2914                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2915 {
2916     if (!sve_access_check(s)) {
2917         return true;
2918     }
2919
2920     unsigned vsz = pred_full_reg_size(s);
2921
2922     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2923     TCGv_ptr d = tcg_temp_new_ptr();
2924     TCGv_ptr n = tcg_temp_new_ptr();
2925     TCGv_ptr m = tcg_temp_new_ptr();
2926     TCGv_ptr g = tcg_temp_new_ptr();
2927     TCGv_i32 t = tcg_const_i32(vsz - 2);
2928
2929     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2930     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2931     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2932     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2933
2934     if (a->s) {
2935         fn_s(t, d, n, m, g, t);
2936         do_pred_flags(t);
2937     } else {
2938         fn(d, n, m, g, t);
2939     }
2940     tcg_temp_free_ptr(d);
2941     tcg_temp_free_ptr(n);
2942     tcg_temp_free_ptr(m);
2943     tcg_temp_free_ptr(g);
2944     tcg_temp_free_i32(t);
2945     return true;
2946 }
2947
2948 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2949                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2950 {
2951     if (!sve_access_check(s)) {
2952         return true;
2953     }
2954
2955     unsigned vsz = pred_full_reg_size(s);
2956
2957     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2958     TCGv_ptr d = tcg_temp_new_ptr();
2959     TCGv_ptr n = tcg_temp_new_ptr();
2960     TCGv_ptr g = tcg_temp_new_ptr();
2961     TCGv_i32 t = tcg_const_i32(vsz - 2);
2962
2963     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2964     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2965     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2966
2967     if (a->s) {
2968         fn_s(t, d, n, g, t);
2969         do_pred_flags(t);
2970     } else {
2971         fn(d, n, g, t);
2972     }
2973     tcg_temp_free_ptr(d);
2974     tcg_temp_free_ptr(n);
2975     tcg_temp_free_ptr(g);
2976     tcg_temp_free_i32(t);
2977     return true;
2978 }
2979
2980 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2981 {
2982     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2983 }
2984
2985 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2986 {
2987     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2988 }
2989
2990 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2991 {
2992     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2993 }
2994
2995 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2996 {
2997     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2998 }
2999
3000 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3001 {
3002     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
3003 }
3004
3005 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3006 {
3007     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3008 }
3009
3010 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
3011 {
3012     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3013 }
3014
3015 /*
3016  *** SVE Predicate Count Group
3017  */
3018
3019 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3020 {
3021     unsigned psz = pred_full_reg_size(s);
3022
3023     if (psz <= 8) {
3024         uint64_t psz_mask;
3025
3026         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3027         if (pn != pg) {
3028             TCGv_i64 g = tcg_temp_new_i64();
3029             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3030             tcg_gen_and_i64(val, val, g);
3031             tcg_temp_free_i64(g);
3032         }
3033
3034         /* Reduce the pred_esz_masks value simply to reduce the
3035          * size of the code generated here.
3036          */
3037         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3038         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3039
3040         tcg_gen_ctpop_i64(val, val);
3041     } else {
3042         TCGv_ptr t_pn = tcg_temp_new_ptr();
3043         TCGv_ptr t_pg = tcg_temp_new_ptr();
3044         unsigned desc;
3045         TCGv_i32 t_desc;
3046
3047         desc = psz - 2;
3048         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3049
3050         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3051         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3052         t_desc = tcg_const_i32(desc);
3053
3054         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3055         tcg_temp_free_ptr(t_pn);
3056         tcg_temp_free_ptr(t_pg);
3057         tcg_temp_free_i32(t_desc);
3058     }
3059 }
3060
3061 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3062 {
3063     if (sve_access_check(s)) {
3064         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3065     }
3066     return true;
3067 }
3068
3069 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3070                             uint32_t insn)
3071 {
3072     if (sve_access_check(s)) {
3073         TCGv_i64 reg = cpu_reg(s, a->rd);
3074         TCGv_i64 val = tcg_temp_new_i64();
3075
3076         do_cntp(s, val, a->esz, a->pg, a->pg);
3077         if (a->d) {
3078             tcg_gen_sub_i64(reg, reg, val);
3079         } else {
3080             tcg_gen_add_i64(reg, reg, val);
3081         }
3082         tcg_temp_free_i64(val);
3083     }
3084     return true;
3085 }
3086
3087 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3088                             uint32_t insn)
3089 {
3090     if (a->esz == 0) {
3091         return false;
3092     }
3093     if (sve_access_check(s)) {
3094         unsigned vsz = vec_full_reg_size(s);
3095         TCGv_i64 val = tcg_temp_new_i64();
3096         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3097
3098         do_cntp(s, val, a->esz, a->pg, a->pg);
3099         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3100                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3101     }
3102     return true;
3103 }
3104
3105 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3106                                 uint32_t insn)
3107 {
3108     if (sve_access_check(s)) {
3109         TCGv_i64 reg = cpu_reg(s, a->rd);
3110         TCGv_i64 val = tcg_temp_new_i64();
3111
3112         do_cntp(s, val, a->esz, a->pg, a->pg);
3113         do_sat_addsub_32(reg, val, a->u, a->d);
3114     }
3115     return true;
3116 }
3117
3118 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3119                                 uint32_t insn)
3120 {
3121     if (sve_access_check(s)) {
3122         TCGv_i64 reg = cpu_reg(s, a->rd);
3123         TCGv_i64 val = tcg_temp_new_i64();
3124
3125         do_cntp(s, val, a->esz, a->pg, a->pg);
3126         do_sat_addsub_64(reg, val, a->u, a->d);
3127     }
3128     return true;
3129 }
3130
3131 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3132                              uint32_t insn)
3133 {
3134     if (a->esz == 0) {
3135         return false;
3136     }
3137     if (sve_access_check(s)) {
3138         TCGv_i64 val = tcg_temp_new_i64();
3139         do_cntp(s, val, a->esz, a->pg, a->pg);
3140         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3141     }
3142     return true;
3143 }
3144
3145 /*
3146  *** SVE Integer Compare Scalars Group
3147  */
3148
3149 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3150 {
3151     if (!sve_access_check(s)) {
3152         return true;
3153     }
3154
3155     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3156     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3157     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3158     TCGv_i64 cmp = tcg_temp_new_i64();
3159
3160     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3161     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3162     tcg_temp_free_i64(cmp);
3163
3164     /* VF = !NF & !CF.  */
3165     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3166     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3167
3168     /* Both NF and VF actually look at bit 31.  */
3169     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3170     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3171     return true;
3172 }
3173
3174 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3175 {
3176     TCGv_i64 op0, op1, t0, t1, tmax;
3177     TCGv_i32 t2, t3;
3178     TCGv_ptr ptr;
3179     unsigned desc, vsz = vec_full_reg_size(s);
3180     TCGCond cond;
3181
3182     if (!sve_access_check(s)) {
3183         return true;
3184     }
3185
3186     op0 = read_cpu_reg(s, a->rn, 1);
3187     op1 = read_cpu_reg(s, a->rm, 1);
3188
3189     if (!a->sf) {
3190         if (a->u) {
3191             tcg_gen_ext32u_i64(op0, op0);
3192             tcg_gen_ext32u_i64(op1, op1);
3193         } else {
3194             tcg_gen_ext32s_i64(op0, op0);
3195             tcg_gen_ext32s_i64(op1, op1);
3196         }
3197     }
3198
3199     /* For the helper, compress the different conditions into a computation
3200      * of how many iterations for which the condition is true.
3201      */
3202     t0 = tcg_temp_new_i64();
3203     t1 = tcg_temp_new_i64();
3204     tcg_gen_sub_i64(t0, op1, op0);
3205
3206     tmax = tcg_const_i64(vsz >> a->esz);
3207     if (a->eq) {
3208         /* Equality means one more iteration.  */
3209         tcg_gen_addi_i64(t0, t0, 1);
3210
3211         /* If op1 is max (un)signed integer (and the only time the addition
3212          * above could overflow), then we produce an all-true predicate by
3213          * setting the count to the vector length.  This is because the
3214          * pseudocode is described as an increment + compare loop, and the
3215          * max integer would always compare true.
3216          */
3217         tcg_gen_movi_i64(t1, (a->sf
3218                               ? (a->u ? UINT64_MAX : INT64_MAX)
3219                               : (a->u ? UINT32_MAX : INT32_MAX)));
3220         tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3221     }
3222
3223     /* Bound to the maximum.  */
3224     tcg_gen_umin_i64(t0, t0, tmax);
3225     tcg_temp_free_i64(tmax);
3226
3227     /* Set the count to zero if the condition is false.  */
3228     cond = (a->u
3229             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3230             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3231     tcg_gen_movi_i64(t1, 0);
3232     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3233     tcg_temp_free_i64(t1);
3234
3235     /* Since we're bounded, pass as a 32-bit type.  */
3236     t2 = tcg_temp_new_i32();
3237     tcg_gen_extrl_i64_i32(t2, t0);
3238     tcg_temp_free_i64(t0);
3239
3240     /* Scale elements to bits.  */
3241     tcg_gen_shli_i32(t2, t2, a->esz);
3242
3243     desc = (vsz / 8) - 2;
3244     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3245     t3 = tcg_const_i32(desc);
3246
3247     ptr = tcg_temp_new_ptr();
3248     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3249
3250     gen_helper_sve_while(t2, ptr, t2, t3);
3251     do_pred_flags(t2);
3252
3253     tcg_temp_free_ptr(ptr);
3254     tcg_temp_free_i32(t2);
3255     tcg_temp_free_i32(t3);
3256     return true;
3257 }
3258
3259 /*
3260  *** SVE Integer Wide Immediate - Unpredicated Group
3261  */
3262
3263 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3264 {
3265     if (a->esz == 0) {
3266         return false;
3267     }
3268     if (sve_access_check(s)) {
3269         unsigned vsz = vec_full_reg_size(s);
3270         int dofs = vec_full_reg_offset(s, a->rd);
3271         uint64_t imm;
3272
3273         /* Decode the VFP immediate.  */
3274         imm = vfp_expand_imm(a->esz, a->imm);
3275         imm = dup_const(a->esz, imm);
3276
3277         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3278     }
3279     return true;
3280 }
3281
3282 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3283 {
3284     if (a->esz == 0 && extract32(insn, 13, 1)) {
3285         return false;
3286     }
3287     if (sve_access_check(s)) {
3288         unsigned vsz = vec_full_reg_size(s);
3289         int dofs = vec_full_reg_offset(s, a->rd);
3290
3291         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3292     }
3293     return true;
3294 }
3295
3296 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3297 {
3298     if (a->esz == 0 && extract32(insn, 13, 1)) {
3299         return false;
3300     }
3301     if (sve_access_check(s)) {
3302         unsigned vsz = vec_full_reg_size(s);
3303         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3304                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3305     }
3306     return true;
3307 }
3308
3309 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3310 {
3311     a->imm = -a->imm;
3312     return trans_ADD_zzi(s, a, insn);
3313 }
3314
3315 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3316 {
3317     static const GVecGen2s op[4] = {
3318         { .fni8 = tcg_gen_vec_sub8_i64,
3319           .fniv = tcg_gen_sub_vec,
3320           .fno = gen_helper_sve_subri_b,
3321           .opc = INDEX_op_sub_vec,
3322           .vece = MO_8,
3323           .scalar_first = true },
3324         { .fni8 = tcg_gen_vec_sub16_i64,
3325           .fniv = tcg_gen_sub_vec,
3326           .fno = gen_helper_sve_subri_h,
3327           .opc = INDEX_op_sub_vec,
3328           .vece = MO_16,
3329           .scalar_first = true },
3330         { .fni4 = tcg_gen_sub_i32,
3331           .fniv = tcg_gen_sub_vec,
3332           .fno = gen_helper_sve_subri_s,
3333           .opc = INDEX_op_sub_vec,
3334           .vece = MO_32,
3335           .scalar_first = true },
3336         { .fni8 = tcg_gen_sub_i64,
3337           .fniv = tcg_gen_sub_vec,
3338           .fno = gen_helper_sve_subri_d,
3339           .opc = INDEX_op_sub_vec,
3340           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3341           .vece = MO_64,
3342           .scalar_first = true }
3343     };
3344
3345     if (a->esz == 0 && extract32(insn, 13, 1)) {
3346         return false;
3347     }
3348     if (sve_access_check(s)) {
3349         unsigned vsz = vec_full_reg_size(s);
3350         TCGv_i64 c = tcg_const_i64(a->imm);
3351         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3352                         vec_full_reg_offset(s, a->rn),
3353                         vsz, vsz, c, &op[a->esz]);
3354         tcg_temp_free_i64(c);
3355     }
3356     return true;
3357 }
3358
3359 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3360 {
3361     if (sve_access_check(s)) {
3362         unsigned vsz = vec_full_reg_size(s);
3363         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3364                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3365     }
3366     return true;
3367 }
3368
3369 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3370                        bool u, bool d)
3371 {
3372     if (a->esz == 0 && extract32(insn, 13, 1)) {
3373         return false;
3374     }
3375     if (sve_access_check(s)) {
3376         TCGv_i64 val = tcg_const_i64(a->imm);
3377         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3378         tcg_temp_free_i64(val);
3379     }
3380     return true;
3381 }
3382
3383 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3384 {
3385     return do_zzi_sat(s, a, insn, false, false);
3386 }
3387
3388 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3389 {
3390     return do_zzi_sat(s, a, insn, true, false);
3391 }
3392
3393 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3394 {
3395     return do_zzi_sat(s, a, insn, false, true);
3396 }
3397
3398 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3399 {
3400     return do_zzi_sat(s, a, insn, true, true);
3401 }
3402
3403 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3404 {
3405     if (sve_access_check(s)) {
3406         unsigned vsz = vec_full_reg_size(s);
3407         TCGv_i64 c = tcg_const_i64(a->imm);
3408
3409         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3410                             vec_full_reg_offset(s, a->rn),
3411                             c, vsz, vsz, 0, fn);
3412         tcg_temp_free_i64(c);
3413     }
3414     return true;
3415 }
3416
3417 #define DO_ZZI(NAME, name) \
3418 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a,         \
3419                                uint32_t insn)                           \
3420 {                                                                       \
3421     static gen_helper_gvec_2i * const fns[4] = {                        \
3422         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3423         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3424     };                                                                  \
3425     return do_zzi_ool(s, a, fns[a->esz]);                               \
3426 }
3427
3428 DO_ZZI(SMAX, smax)
3429 DO_ZZI(UMAX, umax)
3430 DO_ZZI(SMIN, smin)
3431 DO_ZZI(UMIN, umin)
3432
3433 #undef DO_ZZI
3434
3435 static bool trans_DOT_zzz(DisasContext *s, arg_DOT_zzz *a, uint32_t insn)
3436 {
3437     static gen_helper_gvec_3 * const fns[2][2] = {
3438         { gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3439         { gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3440     };
3441
3442     if (sve_access_check(s)) {
3443         unsigned vsz = vec_full_reg_size(s);
3444         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3445                            vec_full_reg_offset(s, a->rn),
3446                            vec_full_reg_offset(s, a->rm),
3447                            vsz, vsz, 0, fns[a->u][a->sz]);
3448     }
3449     return true;
3450 }
3451
3452 static bool trans_DOT_zzx(DisasContext *s, arg_DOT_zzx *a, uint32_t insn)
3453 {
3454     static gen_helper_gvec_3 * const fns[2][2] = {
3455         { gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3456         { gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3457     };
3458
3459     if (sve_access_check(s)) {
3460         unsigned vsz = vec_full_reg_size(s);
3461         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3462                            vec_full_reg_offset(s, a->rn),
3463                            vec_full_reg_offset(s, a->rm),
3464                            vsz, vsz, a->index, fns[a->u][a->sz]);
3465     }
3466     return true;
3467 }
3468
3469
3470 /*
3471  *** SVE Floating Point Multiply-Add Indexed Group
3472  */
3473
3474 static bool trans_FMLA_zzxz(DisasContext *s, arg_FMLA_zzxz *a, uint32_t insn)
3475 {
3476     static gen_helper_gvec_4_ptr * const fns[3] = {
3477         gen_helper_gvec_fmla_idx_h,
3478         gen_helper_gvec_fmla_idx_s,
3479         gen_helper_gvec_fmla_idx_d,
3480     };
3481
3482     if (sve_access_check(s)) {
3483         unsigned vsz = vec_full_reg_size(s);
3484         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3485         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3486                            vec_full_reg_offset(s, a->rn),
3487                            vec_full_reg_offset(s, a->rm),
3488                            vec_full_reg_offset(s, a->ra),
3489                            status, vsz, vsz, (a->index << 1) | a->sub,
3490                            fns[a->esz - 1]);
3491         tcg_temp_free_ptr(status);
3492     }
3493     return true;
3494 }
3495
3496 /*
3497  *** SVE Floating Point Multiply Indexed Group
3498  */
3499
3500 static bool trans_FMUL_zzx(DisasContext *s, arg_FMUL_zzx *a, uint32_t insn)
3501 {
3502     static gen_helper_gvec_3_ptr * const fns[3] = {
3503         gen_helper_gvec_fmul_idx_h,
3504         gen_helper_gvec_fmul_idx_s,
3505         gen_helper_gvec_fmul_idx_d,
3506     };
3507
3508     if (sve_access_check(s)) {
3509         unsigned vsz = vec_full_reg_size(s);
3510         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3511         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3512                            vec_full_reg_offset(s, a->rn),
3513                            vec_full_reg_offset(s, a->rm),
3514                            status, vsz, vsz, a->index, fns[a->esz - 1]);
3515         tcg_temp_free_ptr(status);
3516     }
3517     return true;
3518 }
3519
3520 /*
3521  *** SVE Floating Point Fast Reduction Group
3522  */
3523
3524 typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3525                                   TCGv_ptr, TCGv_i32);
3526
3527 static void do_reduce(DisasContext *s, arg_rpr_esz *a,
3528                       gen_helper_fp_reduce *fn)
3529 {
3530     unsigned vsz = vec_full_reg_size(s);
3531     unsigned p2vsz = pow2ceil(vsz);
3532     TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, 0));
3533     TCGv_ptr t_zn, t_pg, status;
3534     TCGv_i64 temp;
3535
3536     temp = tcg_temp_new_i64();
3537     t_zn = tcg_temp_new_ptr();
3538     t_pg = tcg_temp_new_ptr();
3539
3540     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3541     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3542     status = get_fpstatus_ptr(a->esz == MO_16);
3543
3544     fn(temp, t_zn, t_pg, status, t_desc);
3545     tcg_temp_free_ptr(t_zn);
3546     tcg_temp_free_ptr(t_pg);
3547     tcg_temp_free_ptr(status);
3548     tcg_temp_free_i32(t_desc);
3549
3550     write_fp_dreg(s, a->rd, temp);
3551     tcg_temp_free_i64(temp);
3552 }
3553
3554 #define DO_VPZ(NAME, name) \
3555 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3556 {                                                                        \
3557     static gen_helper_fp_reduce * const fns[3] = {                       \
3558         gen_helper_sve_##name##_h,                                       \
3559         gen_helper_sve_##name##_s,                                       \
3560         gen_helper_sve_##name##_d,                                       \
3561     };                                                                   \
3562     if (a->esz == 0) {                                                   \
3563         return false;                                                    \
3564     }                                                                    \
3565     if (sve_access_check(s)) {                                           \
3566         do_reduce(s, a, fns[a->esz - 1]);                                \
3567     }                                                                    \
3568     return true;                                                         \
3569 }
3570
3571 DO_VPZ(FADDV, faddv)
3572 DO_VPZ(FMINNMV, fminnmv)
3573 DO_VPZ(FMAXNMV, fmaxnmv)
3574 DO_VPZ(FMINV, fminv)
3575 DO_VPZ(FMAXV, fmaxv)
3576
3577 /*
3578  *** SVE Floating Point Unary Operations - Unpredicated Group
3579  */
3580
3581 static void do_zz_fp(DisasContext *s, arg_rr_esz *a, gen_helper_gvec_2_ptr *fn)
3582 {
3583     unsigned vsz = vec_full_reg_size(s);
3584     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3585
3586     tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3587                        vec_full_reg_offset(s, a->rn),
3588                        status, vsz, vsz, 0, fn);
3589     tcg_temp_free_ptr(status);
3590 }
3591
3592 static bool trans_FRECPE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3593 {
3594     static gen_helper_gvec_2_ptr * const fns[3] = {
3595         gen_helper_gvec_frecpe_h,
3596         gen_helper_gvec_frecpe_s,
3597         gen_helper_gvec_frecpe_d,
3598     };
3599     if (a->esz == 0) {
3600         return false;
3601     }
3602     if (sve_access_check(s)) {
3603         do_zz_fp(s, a, fns[a->esz - 1]);
3604     }
3605     return true;
3606 }
3607
3608 static bool trans_FRSQRTE(DisasContext *s, arg_rr_esz *a, uint32_t insn)
3609 {
3610     static gen_helper_gvec_2_ptr * const fns[3] = {
3611         gen_helper_gvec_frsqrte_h,
3612         gen_helper_gvec_frsqrte_s,
3613         gen_helper_gvec_frsqrte_d,
3614     };
3615     if (a->esz == 0) {
3616         return false;
3617     }
3618     if (sve_access_check(s)) {
3619         do_zz_fp(s, a, fns[a->esz - 1]);
3620     }
3621     return true;
3622 }
3623
3624 /*
3625  *** SVE Floating Point Compare with Zero Group
3626  */
3627
3628 static void do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
3629                       gen_helper_gvec_3_ptr *fn)
3630 {
3631     unsigned vsz = vec_full_reg_size(s);
3632     TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3633
3634     tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3635                        vec_full_reg_offset(s, a->rn),
3636                        pred_full_reg_offset(s, a->pg),
3637                        status, vsz, vsz, 0, fn);
3638     tcg_temp_free_ptr(status);
3639 }
3640
3641 #define DO_PPZ(NAME, name) \
3642 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
3643 {                                                                 \
3644     static gen_helper_gvec_3_ptr * const fns[3] = {               \
3645         gen_helper_sve_##name##_h,                                \
3646         gen_helper_sve_##name##_s,                                \
3647         gen_helper_sve_##name##_d,                                \
3648     };                                                            \
3649     if (a->esz == 0) {                                            \
3650         return false;                                             \
3651     }                                                             \
3652     if (sve_access_check(s)) {                                    \
3653         do_ppz_fp(s, a, fns[a->esz - 1]);                         \
3654     }                                                             \
3655     return true;                                                  \
3656 }
3657
3658 DO_PPZ(FCMGE_ppz0, fcmge0)
3659 DO_PPZ(FCMGT_ppz0, fcmgt0)
3660 DO_PPZ(FCMLE_ppz0, fcmle0)
3661 DO_PPZ(FCMLT_ppz0, fcmlt0)
3662 DO_PPZ(FCMEQ_ppz0, fcmeq0)
3663 DO_PPZ(FCMNE_ppz0, fcmne0)
3664
3665 #undef DO_PPZ
3666
3667 /*
3668  *** SVE floating-point trig multiply-add coefficient
3669  */
3670
3671 static bool trans_FTMAD(DisasContext *s, arg_FTMAD *a, uint32_t insn)
3672 {
3673     static gen_helper_gvec_3_ptr * const fns[3] = {
3674         gen_helper_sve_ftmad_h,
3675         gen_helper_sve_ftmad_s,
3676         gen_helper_sve_ftmad_d,
3677     };
3678
3679     if (a->esz == 0) {
3680         return false;
3681     }
3682     if (sve_access_check(s)) {
3683         unsigned vsz = vec_full_reg_size(s);
3684         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3685         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3686                            vec_full_reg_offset(s, a->rn),
3687                            vec_full_reg_offset(s, a->rm),
3688                            status, vsz, vsz, a->imm, fns[a->esz - 1]);
3689         tcg_temp_free_ptr(status);
3690     }
3691     return true;
3692 }
3693
3694 /*
3695  *** SVE Floating Point Accumulating Reduction Group
3696  */
3697
3698 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3699 {
3700     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3701                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3702     static fadda_fn * const fns[3] = {
3703         gen_helper_sve_fadda_h,
3704         gen_helper_sve_fadda_s,
3705         gen_helper_sve_fadda_d,
3706     };
3707     unsigned vsz = vec_full_reg_size(s);
3708     TCGv_ptr t_rm, t_pg, t_fpst;
3709     TCGv_i64 t_val;
3710     TCGv_i32 t_desc;
3711
3712     if (a->esz == 0) {
3713         return false;
3714     }
3715     if (!sve_access_check(s)) {
3716         return true;
3717     }
3718
3719     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3720     t_rm = tcg_temp_new_ptr();
3721     t_pg = tcg_temp_new_ptr();
3722     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3723     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3724     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3725     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3726
3727     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3728
3729     tcg_temp_free_i32(t_desc);
3730     tcg_temp_free_ptr(t_fpst);
3731     tcg_temp_free_ptr(t_pg);
3732     tcg_temp_free_ptr(t_rm);
3733
3734     write_fp_dreg(s, a->rd, t_val);
3735     tcg_temp_free_i64(t_val);
3736     return true;
3737 }
3738
3739 /*
3740  *** SVE Floating Point Arithmetic - Unpredicated Group
3741  */
3742
3743 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3744                       gen_helper_gvec_3_ptr *fn)
3745 {
3746     if (fn == NULL) {
3747         return false;
3748     }
3749     if (sve_access_check(s)) {
3750         unsigned vsz = vec_full_reg_size(s);
3751         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3752         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3753                            vec_full_reg_offset(s, a->rn),
3754                            vec_full_reg_offset(s, a->rm),
3755                            status, vsz, vsz, 0, fn);
3756         tcg_temp_free_ptr(status);
3757     }
3758     return true;
3759 }
3760
3761
3762 #define DO_FP3(NAME, name) \
3763 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3764 {                                                                   \
3765     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3766         NULL, gen_helper_gvec_##name##_h,                           \
3767         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3768     };                                                              \
3769     return do_zzz_fp(s, a, fns[a->esz]);                            \
3770 }
3771
3772 DO_FP3(FADD_zzz, fadd)
3773 DO_FP3(FSUB_zzz, fsub)
3774 DO_FP3(FMUL_zzz, fmul)
3775 DO_FP3(FTSMUL, ftsmul)
3776 DO_FP3(FRECPS, recps)
3777 DO_FP3(FRSQRTS, rsqrts)
3778
3779 #undef DO_FP3
3780
3781 /*
3782  *** SVE Floating Point Arithmetic - Predicated Group
3783  */
3784
3785 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3786                        gen_helper_gvec_4_ptr *fn)
3787 {
3788     if (fn == NULL) {
3789         return false;
3790     }
3791     if (sve_access_check(s)) {
3792         unsigned vsz = vec_full_reg_size(s);
3793         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3794         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3795                            vec_full_reg_offset(s, a->rn),
3796                            vec_full_reg_offset(s, a->rm),
3797                            pred_full_reg_offset(s, a->pg),
3798                            status, vsz, vsz, 0, fn);
3799         tcg_temp_free_ptr(status);
3800     }
3801     return true;
3802 }
3803
3804 #define DO_FP3(NAME, name) \
3805 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3806 {                                                                   \
3807     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3808         NULL, gen_helper_sve_##name##_h,                            \
3809         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3810     };                                                              \
3811     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3812 }
3813
3814 DO_FP3(FADD_zpzz, fadd)
3815 DO_FP3(FSUB_zpzz, fsub)
3816 DO_FP3(FMUL_zpzz, fmul)
3817 DO_FP3(FMIN_zpzz, fmin)
3818 DO_FP3(FMAX_zpzz, fmax)
3819 DO_FP3(FMINNM_zpzz, fminnum)
3820 DO_FP3(FMAXNM_zpzz, fmaxnum)
3821 DO_FP3(FABD, fabd)
3822 DO_FP3(FSCALE, fscalbn)
3823 DO_FP3(FDIV, fdiv)
3824 DO_FP3(FMULX, fmulx)
3825
3826 #undef DO_FP3
3827
3828 typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3829                                       TCGv_i64, TCGv_ptr, TCGv_i32);
3830
3831 static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
3832                          TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3833 {
3834     unsigned vsz = vec_full_reg_size(s);
3835     TCGv_ptr t_zd, t_zn, t_pg, status;
3836     TCGv_i32 desc;
3837
3838     t_zd = tcg_temp_new_ptr();
3839     t_zn = tcg_temp_new_ptr();
3840     t_pg = tcg_temp_new_ptr();
3841     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3842     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3843     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3844
3845     status = get_fpstatus_ptr(is_fp16);
3846     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3847     fn(t_zd, t_zn, t_pg, scalar, status, desc);
3848
3849     tcg_temp_free_i32(desc);
3850     tcg_temp_free_ptr(status);
3851     tcg_temp_free_ptr(t_pg);
3852     tcg_temp_free_ptr(t_zn);
3853     tcg_temp_free_ptr(t_zd);
3854 }
3855
3856 static void do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
3857                       gen_helper_sve_fp2scalar *fn)
3858 {
3859     TCGv_i64 temp = tcg_const_i64(imm);
3860     do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3861     tcg_temp_free_i64(temp);
3862 }
3863
3864 #define DO_FP_IMM(NAME, name, const0, const1) \
3865 static bool trans_##NAME##_zpzi(DisasContext *s, arg_rpri_esz *a,         \
3866                                 uint32_t insn)                            \
3867 {                                                                         \
3868     static gen_helper_sve_fp2scalar * const fns[3] = {                    \
3869         gen_helper_sve_##name##_h,                                        \
3870         gen_helper_sve_##name##_s,                                        \
3871         gen_helper_sve_##name##_d                                         \
3872     };                                                                    \
3873     static uint64_t const val[3][2] = {                                   \
3874         { float16_##const0, float16_##const1 },                           \
3875         { float32_##const0, float32_##const1 },                           \
3876         { float64_##const0, float64_##const1 },                           \
3877     };                                                                    \
3878     if (a->esz == 0) {                                                    \
3879         return false;                                                     \
3880     }                                                                     \
3881     if (sve_access_check(s)) {                                            \
3882         do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]);        \
3883     }                                                                     \
3884     return true;                                                          \
3885 }
3886
3887 #define float16_two  make_float16(0x4000)
3888 #define float32_two  make_float32(0x40000000)
3889 #define float64_two  make_float64(0x4000000000000000ULL)
3890
3891 DO_FP_IMM(FADD, fadds, half, one)
3892 DO_FP_IMM(FSUB, fsubs, half, one)
3893 DO_FP_IMM(FMUL, fmuls, half, two)
3894 DO_FP_IMM(FSUBR, fsubrs, half, one)
3895 DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3896 DO_FP_IMM(FMINNM, fminnms, zero, one)
3897 DO_FP_IMM(FMAX, fmaxs, zero, one)
3898 DO_FP_IMM(FMIN, fmins, zero, one)
3899
3900 #undef DO_FP_IMM
3901
3902 static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
3903                       gen_helper_gvec_4_ptr *fn)
3904 {
3905     if (fn == NULL) {
3906         return false;
3907     }
3908     if (sve_access_check(s)) {
3909         unsigned vsz = vec_full_reg_size(s);
3910         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3911         tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3912                            vec_full_reg_offset(s, a->rn),
3913                            vec_full_reg_offset(s, a->rm),
3914                            pred_full_reg_offset(s, a->pg),
3915                            status, vsz, vsz, 0, fn);
3916         tcg_temp_free_ptr(status);
3917     }
3918     return true;
3919 }
3920
3921 #define DO_FPCMP(NAME, name) \
3922 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,     \
3923                                 uint32_t insn)                        \
3924 {                                                                     \
3925     static gen_helper_gvec_4_ptr * const fns[4] = {                   \
3926         NULL, gen_helper_sve_##name##_h,                              \
3927         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d          \
3928     };                                                                \
3929     return do_fp_cmp(s, a, fns[a->esz]);                              \
3930 }
3931
3932 DO_FPCMP(FCMGE, fcmge)
3933 DO_FPCMP(FCMGT, fcmgt)
3934 DO_FPCMP(FCMEQ, fcmeq)
3935 DO_FPCMP(FCMNE, fcmne)
3936 DO_FPCMP(FCMUO, fcmuo)
3937 DO_FPCMP(FACGE, facge)
3938 DO_FPCMP(FACGT, facgt)
3939
3940 #undef DO_FPCMP
3941
3942 static bool trans_FCADD(DisasContext *s, arg_FCADD *a, uint32_t insn)
3943 {
3944     static gen_helper_gvec_4_ptr * const fns[3] = {
3945         gen_helper_sve_fcadd_h,
3946         gen_helper_sve_fcadd_s,
3947         gen_helper_sve_fcadd_d
3948     };
3949
3950     if (a->esz == 0) {
3951         return false;
3952     }
3953     if (sve_access_check(s)) {
3954         unsigned vsz = vec_full_reg_size(s);
3955         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3956         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3957                            vec_full_reg_offset(s, a->rn),
3958                            vec_full_reg_offset(s, a->rm),
3959                            pred_full_reg_offset(s, a->pg),
3960                            status, vsz, vsz, a->rot, fns[a->esz - 1]);
3961         tcg_temp_free_ptr(status);
3962     }
3963     return true;
3964 }
3965
3966 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3967
3968 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3969 {
3970     if (fn == NULL) {
3971         return false;
3972     }
3973     if (!sve_access_check(s)) {
3974         return true;
3975     }
3976
3977     unsigned vsz = vec_full_reg_size(s);
3978     unsigned desc;
3979     TCGv_i32 t_desc;
3980     TCGv_ptr pg = tcg_temp_new_ptr();
3981
3982     /* We would need 7 operands to pass these arguments "properly".
3983      * So we encode all the register numbers into the descriptor.
3984      */
3985     desc = deposit32(a->rd, 5, 5, a->rn);
3986     desc = deposit32(desc, 10, 5, a->rm);
3987     desc = deposit32(desc, 15, 5, a->ra);
3988     desc = simd_desc(vsz, vsz, desc);
3989
3990     t_desc = tcg_const_i32(desc);
3991     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3992     fn(cpu_env, pg, t_desc);
3993     tcg_temp_free_i32(t_desc);
3994     tcg_temp_free_ptr(pg);
3995     return true;
3996 }
3997
3998 #define DO_FMLA(NAME, name) \
3999 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
4000 {                                                                    \
4001     static gen_helper_sve_fmla * const fns[4] = {                    \
4002         NULL, gen_helper_sve_##name##_h,                             \
4003         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
4004     };                                                               \
4005     return do_fmla(s, a, fns[a->esz]);                               \
4006 }
4007
4008 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
4009 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
4010 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
4011 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
4012
4013 #undef DO_FMLA
4014
4015 static bool trans_FCMLA_zpzzz(DisasContext *s,
4016                               arg_FCMLA_zpzzz *a, uint32_t insn)
4017 {
4018     static gen_helper_sve_fmla * const fns[3] = {
4019         gen_helper_sve_fcmla_zpzzz_h,
4020         gen_helper_sve_fcmla_zpzzz_s,
4021         gen_helper_sve_fcmla_zpzzz_d,
4022     };
4023
4024     if (a->esz == 0) {
4025         return false;
4026     }
4027     if (sve_access_check(s)) {
4028         unsigned vsz = vec_full_reg_size(s);
4029         unsigned desc;
4030         TCGv_i32 t_desc;
4031         TCGv_ptr pg = tcg_temp_new_ptr();
4032
4033         /* We would need 7 operands to pass these arguments "properly".
4034          * So we encode all the register numbers into the descriptor.
4035          */
4036         desc = deposit32(a->rd, 5, 5, a->rn);
4037         desc = deposit32(desc, 10, 5, a->rm);
4038         desc = deposit32(desc, 15, 5, a->ra);
4039         desc = deposit32(desc, 20, 2, a->rot);
4040         desc = sextract32(desc, 0, 22);
4041         desc = simd_desc(vsz, vsz, desc);
4042
4043         t_desc = tcg_const_i32(desc);
4044         tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4045         fns[a->esz - 1](cpu_env, pg, t_desc);
4046         tcg_temp_free_i32(t_desc);
4047         tcg_temp_free_ptr(pg);
4048     }
4049     return true;
4050 }
4051
4052 static bool trans_FCMLA_zzxz(DisasContext *s, arg_FCMLA_zzxz *a, uint32_t insn)
4053 {
4054     static gen_helper_gvec_3_ptr * const fns[2] = {
4055         gen_helper_gvec_fcmlah_idx,
4056         gen_helper_gvec_fcmlas_idx,
4057     };
4058
4059     tcg_debug_assert(a->esz == 1 || a->esz == 2);
4060     tcg_debug_assert(a->rd == a->ra);
4061     if (sve_access_check(s)) {
4062         unsigned vsz = vec_full_reg_size(s);
4063         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4064         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4065                            vec_full_reg_offset(s, a->rn),
4066                            vec_full_reg_offset(s, a->rm),
4067                            status, vsz, vsz,
4068                            a->index * 4 + a->rot,
4069                            fns[a->esz - 1]);
4070         tcg_temp_free_ptr(status);
4071     }
4072     return true;
4073 }
4074
4075 /*
4076  *** SVE Floating Point Unary Operations Predicated Group
4077  */
4078
4079 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
4080                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
4081 {
4082     if (sve_access_check(s)) {
4083         unsigned vsz = vec_full_reg_size(s);
4084         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4085         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4086                            vec_full_reg_offset(s, rn),
4087                            pred_full_reg_offset(s, pg),
4088                            status, vsz, vsz, 0, fn);
4089         tcg_temp_free_ptr(status);
4090     }
4091     return true;
4092 }
4093
4094 static bool trans_FCVT_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4095 {
4096     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4097 }
4098
4099 static bool trans_FCVT_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4100 {
4101     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4102 }
4103
4104 static bool trans_FCVT_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4105 {
4106     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4107 }
4108
4109 static bool trans_FCVT_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4110 {
4111     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4112 }
4113
4114 static bool trans_FCVT_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4115 {
4116     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4117 }
4118
4119 static bool trans_FCVT_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4120 {
4121     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4122 }
4123
4124 static bool trans_FCVTZS_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4125 {
4126     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4127 }
4128
4129 static bool trans_FCVTZU_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4130 {
4131     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4132 }
4133
4134 static bool trans_FCVTZS_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4135 {
4136     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4137 }
4138
4139 static bool trans_FCVTZU_hs(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4140 {
4141     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4142 }
4143
4144 static bool trans_FCVTZS_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4145 {
4146     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4147 }
4148
4149 static bool trans_FCVTZU_hd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4150 {
4151     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4152 }
4153
4154 static bool trans_FCVTZS_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4155 {
4156     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4157 }
4158
4159 static bool trans_FCVTZU_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4160 {
4161     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4162 }
4163
4164 static bool trans_FCVTZS_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4165 {
4166     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4167 }
4168
4169 static bool trans_FCVTZU_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4170 {
4171     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4172 }
4173
4174 static bool trans_FCVTZS_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4175 {
4176     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4177 }
4178
4179 static bool trans_FCVTZU_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4180 {
4181     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4182 }
4183
4184 static bool trans_FCVTZS_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4185 {
4186     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4187 }
4188
4189 static bool trans_FCVTZU_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4190 {
4191     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4192 }
4193
4194 static gen_helper_gvec_3_ptr * const frint_fns[3] = {
4195     gen_helper_sve_frint_h,
4196     gen_helper_sve_frint_s,
4197     gen_helper_sve_frint_d
4198 };
4199
4200 static bool trans_FRINTI(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4201 {
4202     if (a->esz == 0) {
4203         return false;
4204     }
4205     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4206                       frint_fns[a->esz - 1]);
4207 }
4208
4209 static bool trans_FRINTX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4210 {
4211     static gen_helper_gvec_3_ptr * const fns[3] = {
4212         gen_helper_sve_frintx_h,
4213         gen_helper_sve_frintx_s,
4214         gen_helper_sve_frintx_d
4215     };
4216     if (a->esz == 0) {
4217         return false;
4218     }
4219     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4220 }
4221
4222 static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a, int mode)
4223 {
4224     if (a->esz == 0) {
4225         return false;
4226     }
4227     if (sve_access_check(s)) {
4228         unsigned vsz = vec_full_reg_size(s);
4229         TCGv_i32 tmode = tcg_const_i32(mode);
4230         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4231
4232         gen_helper_set_rmode(tmode, tmode, status);
4233
4234         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4235                            vec_full_reg_offset(s, a->rn),
4236                            pred_full_reg_offset(s, a->pg),
4237                            status, vsz, vsz, 0, frint_fns[a->esz - 1]);
4238
4239         gen_helper_set_rmode(tmode, tmode, status);
4240         tcg_temp_free_i32(tmode);
4241         tcg_temp_free_ptr(status);
4242     }
4243     return true;
4244 }
4245
4246 static bool trans_FRINTN(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4247 {
4248     return do_frint_mode(s, a, float_round_nearest_even);
4249 }
4250
4251 static bool trans_FRINTP(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4252 {
4253     return do_frint_mode(s, a, float_round_up);
4254 }
4255
4256 static bool trans_FRINTM(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4257 {
4258     return do_frint_mode(s, a, float_round_down);
4259 }
4260
4261 static bool trans_FRINTZ(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4262 {
4263     return do_frint_mode(s, a, float_round_to_zero);
4264 }
4265
4266 static bool trans_FRINTA(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4267 {
4268     return do_frint_mode(s, a, float_round_ties_away);
4269 }
4270
4271 static bool trans_FRECPX(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4272 {
4273     static gen_helper_gvec_3_ptr * const fns[3] = {
4274         gen_helper_sve_frecpx_h,
4275         gen_helper_sve_frecpx_s,
4276         gen_helper_sve_frecpx_d
4277     };
4278     if (a->esz == 0) {
4279         return false;
4280     }
4281     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4282 }
4283
4284 static bool trans_FSQRT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4285 {
4286     static gen_helper_gvec_3_ptr * const fns[3] = {
4287         gen_helper_sve_fsqrt_h,
4288         gen_helper_sve_fsqrt_s,
4289         gen_helper_sve_fsqrt_d
4290     };
4291     if (a->esz == 0) {
4292         return false;
4293     }
4294     return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - 1]);
4295 }
4296
4297 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4298 {
4299     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4300 }
4301
4302 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4303 {
4304     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4305 }
4306
4307 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4308 {
4309     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4310 }
4311
4312 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4313 {
4314     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4315 }
4316
4317 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4318 {
4319     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4320 }
4321
4322 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4323 {
4324     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4325 }
4326
4327 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4328 {
4329     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4330 }
4331
4332 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4333 {
4334     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4335 }
4336
4337 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4338 {
4339     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4340 }
4341
4342 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4343 {
4344     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4345 }
4346
4347 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4348 {
4349     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4350 }
4351
4352 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4353 {
4354     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4355 }
4356
4357 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4358 {
4359     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4360 }
4361
4362 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
4363 {
4364     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4365 }
4366
4367 /*
4368  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4369  */
4370
4371 /* Subroutine loading a vector register at VOFS of LEN bytes.
4372  * The load should begin at the address Rn + IMM.
4373  */
4374
4375 static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4376 {
4377     int len_align = QEMU_ALIGN_DOWN(len, 8);
4378     int len_remain = len % 8;
4379     int nparts = len / 8 + ctpop8(len_remain);
4380     int midx = get_mem_index(s);
4381     TCGv_i64 addr, t0, t1;
4382
4383     addr = tcg_temp_new_i64();
4384     t0 = tcg_temp_new_i64();
4385
4386     /* Note that unpredicated load/store of vector/predicate registers
4387      * are defined as a stream of bytes, which equates to little-endian
4388      * operations on larger quantities.  There is no nice way to force
4389      * a little-endian load for aarch64_be-linux-user out of line.
4390      *
4391      * Attempt to keep code expansion to a minimum by limiting the
4392      * amount of unrolling done.
4393      */
4394     if (nparts <= 4) {
4395         int i;
4396
4397         for (i = 0; i < len_align; i += 8) {
4398             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4399             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4400             tcg_gen_st_i64(t0, cpu_env, vofs + i);
4401         }
4402     } else {
4403         TCGLabel *loop = gen_new_label();
4404         TCGv_ptr tp, i = tcg_const_local_ptr(0);
4405
4406         gen_set_label(loop);
4407
4408         /* Minimize the number of local temps that must be re-read from
4409          * the stack each iteration.  Instead, re-compute values other
4410          * than the loop counter.
4411          */
4412         tp = tcg_temp_new_ptr();
4413         tcg_gen_addi_ptr(tp, i, imm);
4414         tcg_gen_extu_ptr_i64(addr, tp);
4415         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4416
4417         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4418
4419         tcg_gen_add_ptr(tp, cpu_env, i);
4420         tcg_gen_addi_ptr(i, i, 8);
4421         tcg_gen_st_i64(t0, tp, vofs);
4422         tcg_temp_free_ptr(tp);
4423
4424         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4425         tcg_temp_free_ptr(i);
4426     }
4427
4428     /* Predicate register loads can be any multiple of 2.
4429      * Note that we still store the entire 64-bit unit into cpu_env.
4430      */
4431     if (len_remain) {
4432         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4433
4434         switch (len_remain) {
4435         case 2:
4436         case 4:
4437         case 8:
4438             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4439             break;
4440
4441         case 6:
4442             t1 = tcg_temp_new_i64();
4443             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4444             tcg_gen_addi_i64(addr, addr, 4);
4445             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4446             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
4447             tcg_temp_free_i64(t1);
4448             break;
4449
4450         default:
4451             g_assert_not_reached();
4452         }
4453         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4454     }
4455     tcg_temp_free_i64(addr);
4456     tcg_temp_free_i64(t0);
4457 }
4458
4459 /* Similarly for stores.  */
4460 static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
4461 {
4462     int len_align = QEMU_ALIGN_DOWN(len, 8);
4463     int len_remain = len % 8;
4464     int nparts = len / 8 + ctpop8(len_remain);
4465     int midx = get_mem_index(s);
4466     TCGv_i64 addr, t0;
4467
4468     addr = tcg_temp_new_i64();
4469     t0 = tcg_temp_new_i64();
4470
4471     /* Note that unpredicated load/store of vector/predicate registers
4472      * are defined as a stream of bytes, which equates to little-endian
4473      * operations on larger quantities.  There is no nice way to force
4474      * a little-endian store for aarch64_be-linux-user out of line.
4475      *
4476      * Attempt to keep code expansion to a minimum by limiting the
4477      * amount of unrolling done.
4478      */
4479     if (nparts <= 4) {
4480         int i;
4481
4482         for (i = 0; i < len_align; i += 8) {
4483             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4484             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4485             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4486         }
4487     } else {
4488         TCGLabel *loop = gen_new_label();
4489         TCGv_ptr t2, i = tcg_const_local_ptr(0);
4490
4491         gen_set_label(loop);
4492
4493         t2 = tcg_temp_new_ptr();
4494         tcg_gen_add_ptr(t2, cpu_env, i);
4495         tcg_gen_ld_i64(t0, t2, vofs);
4496
4497         /* Minimize the number of local temps that must be re-read from
4498          * the stack each iteration.  Instead, re-compute values other
4499          * than the loop counter.
4500          */
4501         tcg_gen_addi_ptr(t2, i, imm);
4502         tcg_gen_extu_ptr_i64(addr, t2);
4503         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4504         tcg_temp_free_ptr(t2);
4505
4506         tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4507
4508         tcg_gen_addi_ptr(i, i, 8);
4509
4510         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4511         tcg_temp_free_ptr(i);
4512     }
4513
4514     /* Predicate register stores can be any multiple of 2.  */
4515     if (len_remain) {
4516         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4517         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4518
4519         switch (len_remain) {
4520         case 2:
4521         case 4:
4522         case 8:
4523             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
4524             break;
4525
4526         case 6:
4527             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4528             tcg_gen_addi_i64(addr, addr, 4);
4529             tcg_gen_shri_i64(t0, t0, 32);
4530             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4531             break;
4532
4533         default:
4534             g_assert_not_reached();
4535         }
4536     }
4537     tcg_temp_free_i64(addr);
4538     tcg_temp_free_i64(t0);
4539 }
4540
4541 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4542 {
4543     if (sve_access_check(s)) {
4544         int size = vec_full_reg_size(s);
4545         int off = vec_full_reg_offset(s, a->rd);
4546         do_ldr(s, off, size, a->rn, a->imm * size);
4547     }
4548     return true;
4549 }
4550
4551 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4552 {
4553     if (sve_access_check(s)) {
4554         int size = pred_full_reg_size(s);
4555         int off = pred_full_reg_offset(s, a->rd);
4556         do_ldr(s, off, size, a->rn, a->imm * size);
4557     }
4558     return true;
4559 }
4560
4561 static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
4562 {
4563     if (sve_access_check(s)) {
4564         int size = vec_full_reg_size(s);
4565         int off = vec_full_reg_offset(s, a->rd);
4566         do_str(s, off, size, a->rn, a->imm * size);
4567     }
4568     return true;
4569 }
4570
4571 static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
4572 {
4573     if (sve_access_check(s)) {
4574         int size = pred_full_reg_size(s);
4575         int off = pred_full_reg_offset(s, a->rd);
4576         do_str(s, off, size, a->rn, a->imm * size);
4577     }
4578     return true;
4579 }
4580
4581 /*
4582  *** SVE Memory - Contiguous Load Group
4583  */
4584
4585 /* The memory mode of the dtype.  */
4586 static const TCGMemOp dtype_mop[16] = {
4587     MO_UB, MO_UB, MO_UB, MO_UB,
4588     MO_SL, MO_UW, MO_UW, MO_UW,
4589     MO_SW, MO_SW, MO_UL, MO_UL,
4590     MO_SB, MO_SB, MO_SB, MO_Q
4591 };
4592
4593 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
4594
4595 /* The vector element size of dtype.  */
4596 static const uint8_t dtype_esz[16] = {
4597     0, 1, 2, 3,
4598     3, 1, 2, 3,
4599     3, 2, 2, 3,
4600     3, 2, 1, 3
4601 };
4602
4603 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4604                        gen_helper_gvec_mem *fn)
4605 {
4606     unsigned vsz = vec_full_reg_size(s);
4607     TCGv_ptr t_pg;
4608     TCGv_i32 desc;
4609
4610     /* For e.g. LD4, there are not enough arguments to pass all 4
4611      * registers as pointers, so encode the regno into the data field.
4612      * For consistency, do this even for LD1.
4613      */
4614     desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
4615     t_pg = tcg_temp_new_ptr();
4616
4617     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4618     fn(cpu_env, t_pg, addr, desc);
4619
4620     tcg_temp_free_ptr(t_pg);
4621     tcg_temp_free_i32(desc);
4622 }
4623
4624 static void do_ld_zpa(DisasContext *s, int zt, int pg,
4625                       TCGv_i64 addr, int dtype, int nreg)
4626 {
4627     static gen_helper_gvec_mem * const fns[16][4] = {
4628         { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4629           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4630         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4631         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4632         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4633
4634         { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
4635         { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
4636           gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
4637         { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
4638         { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
4639
4640         { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
4641         { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
4642         { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
4643           gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
4644         { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
4645
4646         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4647         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4648         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4649         { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
4650           gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
4651     };
4652     gen_helper_gvec_mem *fn = fns[dtype][nreg];
4653
4654     /* While there are holes in the table, they are not
4655      * accessible via the instruction encoding.
4656      */
4657     assert(fn != NULL);
4658     do_mem_zpa(s, zt, pg, addr, fn);
4659 }
4660
4661 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4662 {
4663     if (a->rm == 31) {
4664         return false;
4665     }
4666     if (sve_access_check(s)) {
4667         TCGv_i64 addr = new_tmp_a64(s);
4668         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4669         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4670         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4671     }
4672     return true;
4673 }
4674
4675 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4676 {
4677     if (sve_access_check(s)) {
4678         int vsz = vec_full_reg_size(s);
4679         int elements = vsz >> dtype_esz[a->dtype];
4680         TCGv_i64 addr = new_tmp_a64(s);
4681
4682         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4683                          (a->imm * elements * (a->nreg + 1))
4684                          << dtype_msz(a->dtype));
4685         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4686     }
4687     return true;
4688 }
4689
4690 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4691 {
4692     static gen_helper_gvec_mem * const fns[16] = {
4693         gen_helper_sve_ldff1bb_r,
4694         gen_helper_sve_ldff1bhu_r,
4695         gen_helper_sve_ldff1bsu_r,
4696         gen_helper_sve_ldff1bdu_r,
4697
4698         gen_helper_sve_ldff1sds_r,
4699         gen_helper_sve_ldff1hh_r,
4700         gen_helper_sve_ldff1hsu_r,
4701         gen_helper_sve_ldff1hdu_r,
4702
4703         gen_helper_sve_ldff1hds_r,
4704         gen_helper_sve_ldff1hss_r,
4705         gen_helper_sve_ldff1ss_r,
4706         gen_helper_sve_ldff1sdu_r,
4707
4708         gen_helper_sve_ldff1bds_r,
4709         gen_helper_sve_ldff1bss_r,
4710         gen_helper_sve_ldff1bhs_r,
4711         gen_helper_sve_ldff1dd_r,
4712     };
4713
4714     if (sve_access_check(s)) {
4715         TCGv_i64 addr = new_tmp_a64(s);
4716         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4717         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4718         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4719     }
4720     return true;
4721 }
4722
4723 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4724 {
4725     static gen_helper_gvec_mem * const fns[16] = {
4726         gen_helper_sve_ldnf1bb_r,
4727         gen_helper_sve_ldnf1bhu_r,
4728         gen_helper_sve_ldnf1bsu_r,
4729         gen_helper_sve_ldnf1bdu_r,
4730
4731         gen_helper_sve_ldnf1sds_r,
4732         gen_helper_sve_ldnf1hh_r,
4733         gen_helper_sve_ldnf1hsu_r,
4734         gen_helper_sve_ldnf1hdu_r,
4735
4736         gen_helper_sve_ldnf1hds_r,
4737         gen_helper_sve_ldnf1hss_r,
4738         gen_helper_sve_ldnf1ss_r,
4739         gen_helper_sve_ldnf1sdu_r,
4740
4741         gen_helper_sve_ldnf1bds_r,
4742         gen_helper_sve_ldnf1bss_r,
4743         gen_helper_sve_ldnf1bhs_r,
4744         gen_helper_sve_ldnf1dd_r,
4745     };
4746
4747     if (sve_access_check(s)) {
4748         int vsz = vec_full_reg_size(s);
4749         int elements = vsz >> dtype_esz[a->dtype];
4750         int off = (a->imm * elements) << dtype_msz(a->dtype);
4751         TCGv_i64 addr = new_tmp_a64(s);
4752
4753         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4754         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4755     }
4756     return true;
4757 }
4758
4759 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4760 {
4761     static gen_helper_gvec_mem * const fns[4] = {
4762         gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4763         gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4764     };
4765     unsigned vsz = vec_full_reg_size(s);
4766     TCGv_ptr t_pg;
4767     TCGv_i32 desc;
4768     int poff;
4769
4770     /* Load the first quadword using the normal predicated load helpers.  */
4771     desc = tcg_const_i32(simd_desc(16, 16, zt));
4772
4773     poff = pred_full_reg_offset(s, pg);
4774     if (vsz > 16) {
4775         /*
4776          * Zero-extend the first 16 bits of the predicate into a temporary.
4777          * This avoids triggering an assert making sure we don't have bits
4778          * set within a predicate beyond VQ, but we have lowered VQ to 1
4779          * for this load operation.
4780          */
4781         TCGv_i64 tmp = tcg_temp_new_i64();
4782 #ifdef HOST_WORDS_BIGENDIAN
4783         poff += 6;
4784 #endif
4785         tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4786
4787         poff = offsetof(CPUARMState, vfp.preg_tmp);
4788         tcg_gen_st_i64(tmp, cpu_env, poff);
4789         tcg_temp_free_i64(tmp);
4790     }
4791
4792     t_pg = tcg_temp_new_ptr();
4793     tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4794
4795     fns[msz](cpu_env, t_pg, addr, desc);
4796
4797     tcg_temp_free_ptr(t_pg);
4798     tcg_temp_free_i32(desc);
4799
4800     /* Replicate that first quadword.  */
4801     if (vsz > 16) {
4802         unsigned dofs = vec_full_reg_offset(s, zt);
4803         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4804     }
4805 }
4806
4807 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4808 {
4809     if (a->rm == 31) {
4810         return false;
4811     }
4812     if (sve_access_check(s)) {
4813         int msz = dtype_msz(a->dtype);
4814         TCGv_i64 addr = new_tmp_a64(s);
4815         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4816         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4817         do_ldrq(s, a->rd, a->pg, addr, msz);
4818     }
4819     return true;
4820 }
4821
4822 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4823 {
4824     if (sve_access_check(s)) {
4825         TCGv_i64 addr = new_tmp_a64(s);
4826         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4827         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4828     }
4829     return true;
4830 }
4831
4832 /* Load and broadcast element.  */
4833 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4834 {
4835     if (!sve_access_check(s)) {
4836         return true;
4837     }
4838
4839     unsigned vsz = vec_full_reg_size(s);
4840     unsigned psz = pred_full_reg_size(s);
4841     unsigned esz = dtype_esz[a->dtype];
4842     unsigned msz = dtype_msz(a->dtype);
4843     TCGLabel *over = gen_new_label();
4844     TCGv_i64 temp;
4845
4846     /* If the guarding predicate has no bits set, no load occurs.  */
4847     if (psz <= 8) {
4848         /* Reduce the pred_esz_masks value simply to reduce the
4849          * size of the code generated here.
4850          */
4851         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4852         temp = tcg_temp_new_i64();
4853         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4854         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4855         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4856         tcg_temp_free_i64(temp);
4857     } else {
4858         TCGv_i32 t32 = tcg_temp_new_i32();
4859         find_last_active(s, t32, esz, a->pg);
4860         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4861         tcg_temp_free_i32(t32);
4862     }
4863
4864     /* Load the data.  */
4865     temp = tcg_temp_new_i64();
4866     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4867     tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4868                         s->be_data | dtype_mop[a->dtype]);
4869
4870     /* Broadcast to *all* elements.  */
4871     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4872                          vsz, vsz, temp);
4873     tcg_temp_free_i64(temp);
4874
4875     /* Zero the inactive elements.  */
4876     gen_set_label(over);
4877     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4878     return true;
4879 }
4880
4881 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4882                       int msz, int esz, int nreg)
4883 {
4884     static gen_helper_gvec_mem * const fn_single[4][4] = {
4885         { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4886           gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4887         { NULL,                   gen_helper_sve_st1hh_r,
4888           gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4889         { NULL, NULL,
4890           gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4891         { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4892     };
4893     static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4894         { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4895           gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4896         { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4897           gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4898         { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4899           gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4900     };
4901     gen_helper_gvec_mem *fn;
4902
4903     if (nreg == 0) {
4904         /* ST1 */
4905         fn = fn_single[msz][esz];
4906     } else {
4907         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4908         assert(msz == esz);
4909         fn = fn_multiple[nreg - 1][msz];
4910     }
4911     assert(fn != NULL);
4912     do_mem_zpa(s, zt, pg, addr, fn);
4913 }
4914
4915 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4916 {
4917     if (a->rm == 31 || a->msz > a->esz) {
4918         return false;
4919     }
4920     if (sve_access_check(s)) {
4921         TCGv_i64 addr = new_tmp_a64(s);
4922         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
4923         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4924         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4925     }
4926     return true;
4927 }
4928
4929 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4930 {
4931     if (a->msz > a->esz) {
4932         return false;
4933     }
4934     if (sve_access_check(s)) {
4935         int vsz = vec_full_reg_size(s);
4936         int elements = vsz >> a->esz;
4937         TCGv_i64 addr = new_tmp_a64(s);
4938
4939         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4940                          (a->imm * elements * (a->nreg + 1)) << a->msz);
4941         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4942     }
4943     return true;
4944 }
4945
4946 /*
4947  *** SVE gather loads / scatter stores
4948  */
4949
4950 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4951                        TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4952 {
4953     unsigned vsz = vec_full_reg_size(s);
4954     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4955     TCGv_ptr t_zm = tcg_temp_new_ptr();
4956     TCGv_ptr t_pg = tcg_temp_new_ptr();
4957     TCGv_ptr t_zt = tcg_temp_new_ptr();
4958
4959     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4960     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4961     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4962     fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4963
4964     tcg_temp_free_ptr(t_zt);
4965     tcg_temp_free_ptr(t_zm);
4966     tcg_temp_free_ptr(t_pg);
4967     tcg_temp_free_i32(desc);
4968 }
4969
4970 /* Indexed by [ff][xs][u][msz].  */
4971 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4972     { { { gen_helper_sve_ldbss_zsu,
4973           gen_helper_sve_ldhss_zsu,
4974           NULL, },
4975         { gen_helper_sve_ldbsu_zsu,
4976           gen_helper_sve_ldhsu_zsu,
4977           gen_helper_sve_ldssu_zsu, } },
4978       { { gen_helper_sve_ldbss_zss,
4979           gen_helper_sve_ldhss_zss,
4980           NULL, },
4981         { gen_helper_sve_ldbsu_zss,
4982           gen_helper_sve_ldhsu_zss,
4983           gen_helper_sve_ldssu_zss, } } },
4984
4985     { { { gen_helper_sve_ldffbss_zsu,
4986           gen_helper_sve_ldffhss_zsu,
4987           NULL, },
4988         { gen_helper_sve_ldffbsu_zsu,
4989           gen_helper_sve_ldffhsu_zsu,
4990           gen_helper_sve_ldffssu_zsu, } },
4991       { { gen_helper_sve_ldffbss_zss,
4992           gen_helper_sve_ldffhss_zss,
4993           NULL, },
4994         { gen_helper_sve_ldffbsu_zss,
4995           gen_helper_sve_ldffhsu_zss,
4996           gen_helper_sve_ldffssu_zss, } } }
4997 };
4998
4999 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5000 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
5001     { { { gen_helper_sve_ldbds_zsu,
5002           gen_helper_sve_ldhds_zsu,
5003           gen_helper_sve_ldsds_zsu,
5004           NULL, },
5005         { gen_helper_sve_ldbdu_zsu,
5006           gen_helper_sve_ldhdu_zsu,
5007           gen_helper_sve_ldsdu_zsu,
5008           gen_helper_sve_ldddu_zsu, } },
5009       { { gen_helper_sve_ldbds_zss,
5010           gen_helper_sve_ldhds_zss,
5011           gen_helper_sve_ldsds_zss,
5012           NULL, },
5013         { gen_helper_sve_ldbdu_zss,
5014           gen_helper_sve_ldhdu_zss,
5015           gen_helper_sve_ldsdu_zss,
5016           gen_helper_sve_ldddu_zss, } },
5017       { { gen_helper_sve_ldbds_zd,
5018           gen_helper_sve_ldhds_zd,
5019           gen_helper_sve_ldsds_zd,
5020           NULL, },
5021         { gen_helper_sve_ldbdu_zd,
5022           gen_helper_sve_ldhdu_zd,
5023           gen_helper_sve_ldsdu_zd,
5024           gen_helper_sve_ldddu_zd, } } },
5025
5026     { { { gen_helper_sve_ldffbds_zsu,
5027           gen_helper_sve_ldffhds_zsu,
5028           gen_helper_sve_ldffsds_zsu,
5029           NULL, },
5030         { gen_helper_sve_ldffbdu_zsu,
5031           gen_helper_sve_ldffhdu_zsu,
5032           gen_helper_sve_ldffsdu_zsu,
5033           gen_helper_sve_ldffddu_zsu, } },
5034       { { gen_helper_sve_ldffbds_zss,
5035           gen_helper_sve_ldffhds_zss,
5036           gen_helper_sve_ldffsds_zss,
5037           NULL, },
5038         { gen_helper_sve_ldffbdu_zss,
5039           gen_helper_sve_ldffhdu_zss,
5040           gen_helper_sve_ldffsdu_zss,
5041           gen_helper_sve_ldffddu_zss, } },
5042       { { gen_helper_sve_ldffbds_zd,
5043           gen_helper_sve_ldffhds_zd,
5044           gen_helper_sve_ldffsds_zd,
5045           NULL, },
5046         { gen_helper_sve_ldffbdu_zd,
5047           gen_helper_sve_ldffhdu_zd,
5048           gen_helper_sve_ldffsdu_zd,
5049           gen_helper_sve_ldffddu_zd, } } }
5050 };
5051
5052 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
5053 {
5054     gen_helper_gvec_mem_scatter *fn = NULL;
5055
5056     if (!sve_access_check(s)) {
5057         return true;
5058     }
5059
5060     switch (a->esz) {
5061     case MO_32:
5062         fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
5063         break;
5064     case MO_64:
5065         fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
5066         break;
5067     }
5068     assert(fn != NULL);
5069
5070     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5071                cpu_reg_sp(s, a->rn), fn);
5072     return true;
5073 }
5074
5075 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
5076 {
5077     gen_helper_gvec_mem_scatter *fn = NULL;
5078     TCGv_i64 imm;
5079
5080     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
5081         return false;
5082     }
5083     if (!sve_access_check(s)) {
5084         return true;
5085     }
5086
5087     switch (a->esz) {
5088     case MO_32:
5089         fn = gather_load_fn32[a->ff][0][a->u][a->msz];
5090         break;
5091     case MO_64:
5092         fn = gather_load_fn64[a->ff][2][a->u][a->msz];
5093         break;
5094     }
5095     assert(fn != NULL);
5096
5097     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
5098      * by loading the immediate into the scalar parameter.
5099      */
5100     imm = tcg_const_i64(a->imm << a->msz);
5101     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5102     tcg_temp_free_i64(imm);
5103     return true;
5104 }
5105
5106 /* Indexed by [xs][msz].  */
5107 static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = {
5108     { gen_helper_sve_stbs_zsu,
5109       gen_helper_sve_sths_zsu,
5110       gen_helper_sve_stss_zsu, },
5111     { gen_helper_sve_stbs_zss,
5112       gen_helper_sve_sths_zss,
5113       gen_helper_sve_stss_zss, },
5114 };
5115
5116 /* Note that we overload xs=2 to indicate 64-bit offset.  */
5117 static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = {
5118     { gen_helper_sve_stbd_zsu,
5119       gen_helper_sve_sthd_zsu,
5120       gen_helper_sve_stsd_zsu,
5121       gen_helper_sve_stdd_zsu, },
5122     { gen_helper_sve_stbd_zss,
5123       gen_helper_sve_sthd_zss,
5124       gen_helper_sve_stsd_zss,
5125       gen_helper_sve_stdd_zss, },
5126     { gen_helper_sve_stbd_zd,
5127       gen_helper_sve_sthd_zd,
5128       gen_helper_sve_stsd_zd,
5129       gen_helper_sve_stdd_zd, },
5130 };
5131
5132 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
5133 {
5134     gen_helper_gvec_mem_scatter *fn;
5135
5136     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
5137         return false;
5138     }
5139     if (!sve_access_check(s)) {
5140         return true;
5141     }
5142     switch (a->esz) {
5143     case MO_32:
5144         fn = scatter_store_fn32[a->xs][a->msz];
5145         break;
5146     case MO_64:
5147         fn = scatter_store_fn64[a->xs][a->msz];
5148         break;
5149     default:
5150         g_assert_not_reached();
5151     }
5152     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5153                cpu_reg_sp(s, a->rn), fn);
5154     return true;
5155 }
5156
5157 static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
5158 {
5159     gen_helper_gvec_mem_scatter *fn = NULL;
5160     TCGv_i64 imm;
5161
5162     if (a->esz < a->msz) {
5163         return false;
5164     }
5165     if (!sve_access_check(s)) {
5166         return true;
5167     }
5168
5169     switch (a->esz) {
5170     case MO_32:
5171         fn = scatter_store_fn32[0][a->msz];
5172         break;
5173     case MO_64:
5174         fn = scatter_store_fn64[2][a->msz];
5175         break;
5176     }
5177     assert(fn != NULL);
5178
5179     /* Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])
5180      * by loading the immediate into the scalar parameter.
5181      */
5182     imm = tcg_const_i64(a->imm << a->msz);
5183     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
5184     tcg_temp_free_i64(imm);
5185     return true;
5186 }
5187
5188 /*
5189  * Prefetches
5190  */
5191
5192 static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
5193 {
5194     /* Prefetch is a nop within QEMU.  */
5195     (void)sve_access_check(s);
5196     return true;
5197 }
5198
5199 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
5200 {
5201     if (a->rm == 31) {
5202         return false;
5203     }
5204     /* Prefetch is a nop within QEMU.  */
5205     (void)sve_access_check(s);
5206     return true;
5207 }
5208
5209 /*
5210  * Move Prefix
5211  *
5212  * TODO: The implementation so far could handle predicated merging movprfx.
5213  * The helper functions as written take an extra source register to
5214  * use in the operation, but the result is only written when predication
5215  * succeeds.  For unpredicated movprfx, we need to rearrange the helpers
5216  * to allow the final write back to the destination to be unconditional.
5217  * For predicated zeroing movprfx, we need to rearrange the helpers to
5218  * allow the final write back to zero inactives.
5219  *
5220  * In the meantime, just emit the moves.
5221  */
5222
5223 static bool trans_MOVPRFX(DisasContext *s, arg_MOVPRFX *a, uint32_t insn)
5224 {
5225     return do_mov_z(s, a->rd, a->rn);
5226 }
5227
5228 static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5229 {
5230     if (sve_access_check(s)) {
5231         do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5232     }
5233     return true;
5234 }
5235
5236 static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
5237 {
5238     if (sve_access_check(s)) {
5239         do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5240     }
5241     return true;
5242 }