target/arm/translate-sve.c

   1 /*
   2  * AArch64 SVE translation
   3  *
   4  * Copyright (c) 2018 Linaro, Ltd
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #include "qemu/osdep.h"
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "tcg-gvec-desc.h"
  26 #include "qemu/log.h"
  27 #include "arm_ldst.h"
  28 #include "translate.h"
  29 #include "internals.h"
  30 #include "exec/helper-proto.h"
  31 #include "exec/helper-gen.h"
  32 #include "exec/log.h"
  33 #include "trace-tcg.h"
  34 #include "translate-a64.h"
  35
  36
  37 typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
  38                          TCGv_i64, uint32_t, uint32_t);
  39
  40 typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
  41                                      TCGv_ptr, TCGv_i32);
  42 typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
  43                                      TCGv_ptr, TCGv_ptr, TCGv_i32);
  44
  45 typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
  46 typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
  47                                          TCGv_ptr, TCGv_i64, TCGv_i32);
  48
  49 /*
  50  * Helpers for extracting complex instruction fields.
  51  */
  52
  53 /* See e.g. ASR (immediate, predicated).
  54  * Returns -1 for unallocated encoding; diagnose later.
  55  */
  56 static int tszimm_esz(int x)
  57 {
  58     x >>= 3;  /* discard imm3 */
  59     return 31 - clz32(x);
  60 }
  61
  62 static int tszimm_shr(int x)
  63 {
  64     return (16 << tszimm_esz(x)) - x;
  65 }
  66
  67 /* See e.g. LSL (immediate, predicated).  */
  68 static int tszimm_shl(int x)
  69 {
  70     return x - (8 << tszimm_esz(x));
  71 }
  72
  73 static inline int plus1(int x)
  74 {
  75     return x + 1;
  76 }
  77
  78 /* The SH bit is in bit 8.  Extract the low 8 and shift.  */
  79 static inline int expand_imm_sh8s(int x)
  80 {
  81     return (int8_t)x << (x & 0x100 ? 8 : 0);
  82 }
  83
  84 static inline int expand_imm_sh8u(int x)
  85 {
  86     return (uint8_t)x << (x & 0x100 ? 8 : 0);
  87 }
  88
  89 /* Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)
  90  * with unsigned data.  C.f. SVE Memory Contiguous Load Group.
  91  */
  92 static inline int msz_dtype(int msz)
  93 {
  94     static const uint8_t dtype[4] = { 0, 5, 10, 15 };
  95     return dtype[msz];
  96 }
  97
  98 /*
  99  * Include the generated decoder.
 100  */
 101
 102 #include "decode-sve.inc.c"
 103
 104 /*
 105  * Implement all of the translator functions referenced by the decoder.
 106  */
 107
 108 /* Return the offset info CPUARMState of the predicate vector register Pn.
 109  * Note for this purpose, FFR is P16.
 110  */
 111 static inline int pred_full_reg_offset(DisasContext *s, int regno)
 112 {
 113     return offsetof(CPUARMState, vfp.pregs[regno]);
 114 }
 115
 116 /* Return the byte size of the whole predicate register, VL / 64.  */
 117 static inline int pred_full_reg_size(DisasContext *s)
 118 {
 119     return s->sve_len >> 3;
 120 }
 121
 122 /* Round up the size of a register to a size allowed by
 123  * the tcg vector infrastructure.  Any operation which uses this
 124  * size may assume that the bits above pred_full_reg_size are zero,
 125  * and must leave them the same way.
 126  *
 127  * Note that this is not needed for the vector registers as they
 128  * are always properly sized for tcg vectors.
 129  */
 130 static int size_for_gvec(int size)
 131 {
 132     if (size <= 8) {
 133         return 8;
 134     } else {
 135         return QEMU_ALIGN_UP(size, 16);
 136     }
 137 }
 138
 139 static int pred_gvec_reg_size(DisasContext *s)
 140 {
 141     return size_for_gvec(pred_full_reg_size(s));
 142 }
 143
 144 /* Invoke a vector expander on two Zregs.  */
 145 static bool do_vector2_z(DisasContext *s, GVecGen2Fn *gvec_fn,
 146                          int esz, int rd, int rn)
 147 {
 148     if (sve_access_check(s)) {
 149         unsigned vsz = vec_full_reg_size(s);
 150         gvec_fn(esz, vec_full_reg_offset(s, rd),
 151                 vec_full_reg_offset(s, rn), vsz, vsz);
 152     }
 153     return true;
 154 }
 155
 156 /* Invoke a vector expander on three Zregs.  */
 157 static bool do_vector3_z(DisasContext *s, GVecGen3Fn *gvec_fn,
 158                          int esz, int rd, int rn, int rm)
 159 {
 160     if (sve_access_check(s)) {
 161         unsigned vsz = vec_full_reg_size(s);
 162         gvec_fn(esz, vec_full_reg_offset(s, rd),
 163                 vec_full_reg_offset(s, rn),
 164                 vec_full_reg_offset(s, rm), vsz, vsz);
 165     }
 166     return true;
 167 }
 168
 169 /* Invoke a vector move on two Zregs.  */
 170 static bool do_mov_z(DisasContext *s, int rd, int rn)
 171 {
 172     return do_vector2_z(s, tcg_gen_gvec_mov, 0, rd, rn);
 173 }
 174
 175 /* Initialize a Zreg with replications of a 64-bit immediate.  */
 176 static void do_dupi_z(DisasContext *s, int rd, uint64_t word)
 177 {
 178     unsigned vsz = vec_full_reg_size(s);
 179     tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
 180 }
 181
 182 /* Invoke a vector expander on two Pregs.  */
 183 static bool do_vector2_p(DisasContext *s, GVecGen2Fn *gvec_fn,
 184                          int esz, int rd, int rn)
 185 {
 186     if (sve_access_check(s)) {
 187         unsigned psz = pred_gvec_reg_size(s);
 188         gvec_fn(esz, pred_full_reg_offset(s, rd),
 189                 pred_full_reg_offset(s, rn), psz, psz);
 190     }
 191     return true;
 192 }
 193
 194 /* Invoke a vector expander on three Pregs.  */
 195 static bool do_vector3_p(DisasContext *s, GVecGen3Fn *gvec_fn,
 196                          int esz, int rd, int rn, int rm)
 197 {
 198     if (sve_access_check(s)) {
 199         unsigned psz = pred_gvec_reg_size(s);
 200         gvec_fn(esz, pred_full_reg_offset(s, rd),
 201                 pred_full_reg_offset(s, rn),
 202                 pred_full_reg_offset(s, rm), psz, psz);
 203     }
 204     return true;
 205 }
 206
 207 /* Invoke a vector operation on four Pregs.  */
 208 static bool do_vecop4_p(DisasContext *s, const GVecGen4 *gvec_op,
 209                         int rd, int rn, int rm, int rg)
 210 {
 211     if (sve_access_check(s)) {
 212         unsigned psz = pred_gvec_reg_size(s);
 213         tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
 214                        pred_full_reg_offset(s, rn),
 215                        pred_full_reg_offset(s, rm),
 216                        pred_full_reg_offset(s, rg),
 217                        psz, psz, gvec_op);
 218     }
 219     return true;
 220 }
 221
 222 /* Invoke a vector move on two Pregs.  */
 223 static bool do_mov_p(DisasContext *s, int rd, int rn)
 224 {
 225     return do_vector2_p(s, tcg_gen_gvec_mov, 0, rd, rn);
 226 }
 227
 228 /* Set the cpu flags as per a return from an SVE helper.  */
 229 static void do_pred_flags(TCGv_i32 t)
 230 {
 231     tcg_gen_mov_i32(cpu_NF, t);
 232     tcg_gen_andi_i32(cpu_ZF, t, 2);
 233     tcg_gen_andi_i32(cpu_CF, t, 1);
 234     tcg_gen_movi_i32(cpu_VF, 0);
 235 }
 236
 237 /* Subroutines computing the ARM PredTest psuedofunction.  */
 238 static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
 239 {
 240     TCGv_i32 t = tcg_temp_new_i32();
 241
 242     gen_helper_sve_predtest1(t, d, g);
 243     do_pred_flags(t);
 244     tcg_temp_free_i32(t);
 245 }
 246
 247 static void do_predtest(DisasContext *s, int dofs, int gofs, int words)
 248 {
 249     TCGv_ptr dptr = tcg_temp_new_ptr();
 250     TCGv_ptr gptr = tcg_temp_new_ptr();
 251     TCGv_i32 t;
 252
 253     tcg_gen_addi_ptr(dptr, cpu_env, dofs);
 254     tcg_gen_addi_ptr(gptr, cpu_env, gofs);
 255     t = tcg_const_i32(words);
 256
 257     gen_helper_sve_predtest(t, dptr, gptr, t);
 258     tcg_temp_free_ptr(dptr);
 259     tcg_temp_free_ptr(gptr);
 260
 261     do_pred_flags(t);
 262     tcg_temp_free_i32(t);
 263 }
 264
 265 /* For each element size, the bits within a predicate word that are active.  */
 266 const uint64_t pred_esz_masks[4] = {
 267     0xffffffffffffffffull, 0x5555555555555555ull,
 268     0x1111111111111111ull, 0x0101010101010101ull
 269 };
 270
 271 /*
 272  *** SVE Logical - Unpredicated Group
 273  */
 274
 275 static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 276 {
 277     return do_vector3_z(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
 278 }
 279
 280 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 281 {
 282     if (a->rn == a->rm) { /* MOV */
 283         return do_mov_z(s, a->rd, a->rn);
 284     } else {
 285         return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 286     }
 287 }
 288
 289 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 290 {
 291     return do_vector3_z(s, tcg_gen_gvec_xor, 0, a->rd, a->rn, a->rm);
 292 }
 293
 294 static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 295 {
 296     return do_vector3_z(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
 297 }
 298
 299 /*
 300  *** SVE Integer Arithmetic - Unpredicated Group
 301  */
 302
 303 static bool trans_ADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 304 {
 305     return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
 306 }
 307
 308 static bool trans_SUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 309 {
 310     return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
 311 }
 312
 313 static bool trans_SQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 314 {
 315     return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
 316 }
 317
 318 static bool trans_SQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 319 {
 320     return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
 321 }
 322
 323 static bool trans_UQADD_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 324 {
 325     return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
 326 }
 327
 328 static bool trans_UQSUB_zzz(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
 329 {
 330     return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
 331 }
 332
 333 /*
 334  *** SVE Integer Arithmetic - Binary Predicated Group
 335  */
 336
 337 static bool do_zpzz_ool(DisasContext *s, arg_rprr_esz *a, gen_helper_gvec_4 *fn)
 338 {
 339     unsigned vsz = vec_full_reg_size(s);
 340     if (fn == NULL) {
 341         return false;
 342     }
 343     if (sve_access_check(s)) {
 344         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
 345                            vec_full_reg_offset(s, a->rn),
 346                            vec_full_reg_offset(s, a->rm),
 347                            pred_full_reg_offset(s, a->pg),
 348                            vsz, vsz, 0, fn);
 349     }
 350     return true;
 351 }
 352
 353 #define DO_ZPZZ(NAME, name) \
 354 static bool trans_##NAME##_zpzz(DisasContext *s, arg_rprr_esz *a,         \
 355                                 uint32_t insn)                            \
 356 {                                                                         \
 357     static gen_helper_gvec_4 * const fns[4] = {                           \
 358         gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h,   \
 359         gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d,   \
 360     };                                                                    \
 361     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 362 }
 363
 364 DO_ZPZZ(AND, and)
 365 DO_ZPZZ(EOR, eor)
 366 DO_ZPZZ(ORR, orr)
 367 DO_ZPZZ(BIC, bic)
 368
 369 DO_ZPZZ(ADD, add)
 370 DO_ZPZZ(SUB, sub)
 371
 372 DO_ZPZZ(SMAX, smax)
 373 DO_ZPZZ(UMAX, umax)
 374 DO_ZPZZ(SMIN, smin)
 375 DO_ZPZZ(UMIN, umin)
 376 DO_ZPZZ(SABD, sabd)
 377 DO_ZPZZ(UABD, uabd)
 378
 379 DO_ZPZZ(MUL, mul)
 380 DO_ZPZZ(SMULH, smulh)
 381 DO_ZPZZ(UMULH, umulh)
 382
 383 DO_ZPZZ(ASR, asr)
 384 DO_ZPZZ(LSR, lsr)
 385 DO_ZPZZ(LSL, lsl)
 386
 387 static bool trans_SDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 388 {
 389     static gen_helper_gvec_4 * const fns[4] = {
 390         NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
 391     };
 392     return do_zpzz_ool(s, a, fns[a->esz]);
 393 }
 394
 395 static bool trans_UDIV_zpzz(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
 396 {
 397     static gen_helper_gvec_4 * const fns[4] = {
 398         NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
 399     };
 400     return do_zpzz_ool(s, a, fns[a->esz]);
 401 }
 402
 403 DO_ZPZZ(SEL, sel)
 404
 405 #undef DO_ZPZZ
 406
 407 /*
 408  *** SVE Integer Arithmetic - Unary Predicated Group
 409  */
 410
 411 static bool do_zpz_ool(DisasContext *s, arg_rpr_esz *a, gen_helper_gvec_3 *fn)
 412 {
 413     if (fn == NULL) {
 414         return false;
 415     }
 416     if (sve_access_check(s)) {
 417         unsigned vsz = vec_full_reg_size(s);
 418         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 419                            vec_full_reg_offset(s, a->rn),
 420                            pred_full_reg_offset(s, a->pg),
 421                            vsz, vsz, 0, fn);
 422     }
 423     return true;
 424 }
 425
 426 #define DO_ZPZ(NAME, name) \
 427 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 428 {                                                                   \
 429     static gen_helper_gvec_3 * const fns[4] = {                     \
 430         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,       \
 431         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,       \
 432     };                                                              \
 433     return do_zpz_ool(s, a, fns[a->esz]);                           \
 434 }
 435
 436 DO_ZPZ(CLS, cls)
 437 DO_ZPZ(CLZ, clz)
 438 DO_ZPZ(CNT_zpz, cnt_zpz)
 439 DO_ZPZ(CNOT, cnot)
 440 DO_ZPZ(NOT_zpz, not_zpz)
 441 DO_ZPZ(ABS, abs)
 442 DO_ZPZ(NEG, neg)
 443
 444 static bool trans_FABS(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 445 {
 446     static gen_helper_gvec_3 * const fns[4] = {
 447         NULL,
 448         gen_helper_sve_fabs_h,
 449         gen_helper_sve_fabs_s,
 450         gen_helper_sve_fabs_d
 451     };
 452     return do_zpz_ool(s, a, fns[a->esz]);
 453 }
 454
 455 static bool trans_FNEG(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 456 {
 457     static gen_helper_gvec_3 * const fns[4] = {
 458         NULL,
 459         gen_helper_sve_fneg_h,
 460         gen_helper_sve_fneg_s,
 461         gen_helper_sve_fneg_d
 462     };
 463     return do_zpz_ool(s, a, fns[a->esz]);
 464 }
 465
 466 static bool trans_SXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 467 {
 468     static gen_helper_gvec_3 * const fns[4] = {
 469         NULL,
 470         gen_helper_sve_sxtb_h,
 471         gen_helper_sve_sxtb_s,
 472         gen_helper_sve_sxtb_d
 473     };
 474     return do_zpz_ool(s, a, fns[a->esz]);
 475 }
 476
 477 static bool trans_UXTB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 478 {
 479     static gen_helper_gvec_3 * const fns[4] = {
 480         NULL,
 481         gen_helper_sve_uxtb_h,
 482         gen_helper_sve_uxtb_s,
 483         gen_helper_sve_uxtb_d
 484     };
 485     return do_zpz_ool(s, a, fns[a->esz]);
 486 }
 487
 488 static bool trans_SXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 489 {
 490     static gen_helper_gvec_3 * const fns[4] = {
 491         NULL, NULL,
 492         gen_helper_sve_sxth_s,
 493         gen_helper_sve_sxth_d
 494     };
 495     return do_zpz_ool(s, a, fns[a->esz]);
 496 }
 497
 498 static bool trans_UXTH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 499 {
 500     static gen_helper_gvec_3 * const fns[4] = {
 501         NULL, NULL,
 502         gen_helper_sve_uxth_s,
 503         gen_helper_sve_uxth_d
 504     };
 505     return do_zpz_ool(s, a, fns[a->esz]);
 506 }
 507
 508 static bool trans_SXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 509 {
 510     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_sxtw_d : NULL);
 511 }
 512
 513 static bool trans_UXTW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 514 {
 515     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_uxtw_d : NULL);
 516 }
 517
 518 #undef DO_ZPZ
 519
 520 /*
 521  *** SVE Integer Reduction Group
 522  */
 523
 524 typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
 525 static bool do_vpz_ool(DisasContext *s, arg_rpr_esz *a,
 526                        gen_helper_gvec_reduc *fn)
 527 {
 528     unsigned vsz = vec_full_reg_size(s);
 529     TCGv_ptr t_zn, t_pg;
 530     TCGv_i32 desc;
 531     TCGv_i64 temp;
 532
 533     if (fn == NULL) {
 534         return false;
 535     }
 536     if (!sve_access_check(s)) {
 537         return true;
 538     }
 539
 540     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 541     temp = tcg_temp_new_i64();
 542     t_zn = tcg_temp_new_ptr();
 543     t_pg = tcg_temp_new_ptr();
 544
 545     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
 546     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
 547     fn(temp, t_zn, t_pg, desc);
 548     tcg_temp_free_ptr(t_zn);
 549     tcg_temp_free_ptr(t_pg);
 550     tcg_temp_free_i32(desc);
 551
 552     write_fp_dreg(s, a->rd, temp);
 553     tcg_temp_free_i64(temp);
 554     return true;
 555 }
 556
 557 #define DO_VPZ(NAME, name) \
 558 static bool trans_##NAME(DisasContext *s, arg_rpr_esz *a, uint32_t insn) \
 559 {                                                                        \
 560     static gen_helper_gvec_reduc * const fns[4] = {                      \
 561         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,            \
 562         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,            \
 563     };                                                                   \
 564     return do_vpz_ool(s, a, fns[a->esz]);                                \
 565 }
 566
 567 DO_VPZ(ORV, orv)
 568 DO_VPZ(ANDV, andv)
 569 DO_VPZ(EORV, eorv)
 570
 571 DO_VPZ(UADDV, uaddv)
 572 DO_VPZ(SMAXV, smaxv)
 573 DO_VPZ(UMAXV, umaxv)
 574 DO_VPZ(SMINV, sminv)
 575 DO_VPZ(UMINV, uminv)
 576
 577 static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
 578 {
 579     static gen_helper_gvec_reduc * const fns[4] = {
 580         gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
 581         gen_helper_sve_saddv_s, NULL
 582     };
 583     return do_vpz_ool(s, a, fns[a->esz]);
 584 }
 585
 586 #undef DO_VPZ
 587
 588 /*
 589  *** SVE Shift by Immediate - Predicated Group
 590  */
 591
 592 /* Store zero into every active element of Zd.  We will use this for two
 593  * and three-operand predicated instructions for which logic dictates a
 594  * zero result.
 595  */
 596 static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz)
 597 {
 598     static gen_helper_gvec_2 * const fns[4] = {
 599         gen_helper_sve_clr_b, gen_helper_sve_clr_h,
 600         gen_helper_sve_clr_s, gen_helper_sve_clr_d,
 601     };
 602     if (sve_access_check(s)) {
 603         unsigned vsz = vec_full_reg_size(s);
 604         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
 605                            pred_full_reg_offset(s, pg),
 606                            vsz, vsz, 0, fns[esz]);
 607     }
 608     return true;
 609 }
 610
 611 /* Copy Zn into Zd, storing zeros into inactive elements.  */
 612 static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
 613 {
 614     static gen_helper_gvec_3 * const fns[4] = {
 615         gen_helper_sve_movz_b, gen_helper_sve_movz_h,
 616         gen_helper_sve_movz_s, gen_helper_sve_movz_d,
 617     };
 618     unsigned vsz = vec_full_reg_size(s);
 619     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 620                        vec_full_reg_offset(s, rn),
 621                        pred_full_reg_offset(s, pg),
 622                        vsz, vsz, 0, fns[esz]);
 623 }
 624
 625 static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
 626                         gen_helper_gvec_3 *fn)
 627 {
 628     if (sve_access_check(s)) {
 629         unsigned vsz = vec_full_reg_size(s);
 630         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 631                            vec_full_reg_offset(s, a->rn),
 632                            pred_full_reg_offset(s, a->pg),
 633                            vsz, vsz, a->imm, fn);
 634     }
 635     return true;
 636 }
 637
 638 static bool trans_ASR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 639 {
 640     static gen_helper_gvec_3 * const fns[4] = {
 641         gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
 642         gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
 643     };
 644     if (a->esz < 0) {
 645         /* Invalid tsz encoding -- see tszimm_esz. */
 646         return false;
 647     }
 648     /* Shift by element size is architecturally valid.  For
 649        arithmetic right-shift, it's the same as by one less. */
 650     a->imm = MIN(a->imm, (8 << a->esz) - 1);
 651     return do_zpzi_ool(s, a, fns[a->esz]);
 652 }
 653
 654 static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 655 {
 656     static gen_helper_gvec_3 * const fns[4] = {
 657         gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
 658         gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
 659     };
 660     if (a->esz < 0) {
 661         return false;
 662     }
 663     /* Shift by element size is architecturally valid.
 664        For logical shifts, it is a zeroing operation.  */
 665     if (a->imm >= (8 << a->esz)) {
 666         return do_clr_zp(s, a->rd, a->pg, a->esz);
 667     } else {
 668         return do_zpzi_ool(s, a, fns[a->esz]);
 669     }
 670 }
 671
 672 static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 673 {
 674     static gen_helper_gvec_3 * const fns[4] = {
 675         gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
 676         gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
 677     };
 678     if (a->esz < 0) {
 679         return false;
 680     }
 681     /* Shift by element size is architecturally valid.
 682        For logical shifts, it is a zeroing operation.  */
 683     if (a->imm >= (8 << a->esz)) {
 684         return do_clr_zp(s, a->rd, a->pg, a->esz);
 685     } else {
 686         return do_zpzi_ool(s, a, fns[a->esz]);
 687     }
 688 }
 689
 690 static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
 691 {
 692     static gen_helper_gvec_3 * const fns[4] = {
 693         gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
 694         gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
 695     };
 696     if (a->esz < 0) {
 697         return false;
 698     }
 699     /* Shift by element size is architecturally valid.  For arithmetic
 700        right shift for division, it is a zeroing operation.  */
 701     if (a->imm >= (8 << a->esz)) {
 702         return do_clr_zp(s, a->rd, a->pg, a->esz);
 703     } else {
 704         return do_zpzi_ool(s, a, fns[a->esz]);
 705     }
 706 }
 707
 708 /*
 709  *** SVE Bitwise Shift - Predicated Group
 710  */
 711
 712 #define DO_ZPZW(NAME, name) \
 713 static bool trans_##NAME##_zpzw(DisasContext *s, arg_rprr_esz *a,         \
 714                                 uint32_t insn)                            \
 715 {                                                                         \
 716     static gen_helper_gvec_4 * const fns[3] = {                           \
 717         gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h,   \
 718         gen_helper_sve_##name##_zpzw_s,                                   \
 719     };                                                                    \
 720     if (a->esz < 0 || a->esz >= 3) {                                      \
 721         return false;                                                     \
 722     }                                                                     \
 723     return do_zpzz_ool(s, a, fns[a->esz]);                                \
 724 }
 725
 726 DO_ZPZW(ASR, asr)
 727 DO_ZPZW(LSR, lsr)
 728 DO_ZPZW(LSL, lsl)
 729
 730 #undef DO_ZPZW
 731
 732 /*
 733  *** SVE Bitwise Shift - Unpredicated Group
 734  */
 735
 736 static bool do_shift_imm(DisasContext *s, arg_rri_esz *a, bool asr,
 737                          void (*gvec_fn)(unsigned, uint32_t, uint32_t,
 738                                          int64_t, uint32_t, uint32_t))
 739 {
 740     if (a->esz < 0) {
 741         /* Invalid tsz encoding -- see tszimm_esz. */
 742         return false;
 743     }
 744     if (sve_access_check(s)) {
 745         unsigned vsz = vec_full_reg_size(s);
 746         /* Shift by element size is architecturally valid.  For
 747            arithmetic right-shift, it's the same as by one less.
 748            Otherwise it is a zeroing operation.  */
 749         if (a->imm >= 8 << a->esz) {
 750             if (asr) {
 751                 a->imm = (8 << a->esz) - 1;
 752             } else {
 753                 do_dupi_z(s, a->rd, 0);
 754                 return true;
 755             }
 756         }
 757         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
 758                 vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
 759     }
 760     return true;
 761 }
 762
 763 static bool trans_ASR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 764 {
 765     return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
 766 }
 767
 768 static bool trans_LSR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 769 {
 770     return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
 771 }
 772
 773 static bool trans_LSL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
 774 {
 775     return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
 776 }
 777
 778 static bool do_zzw_ool(DisasContext *s, arg_rrr_esz *a, gen_helper_gvec_3 *fn)
 779 {
 780     if (fn == NULL) {
 781         return false;
 782     }
 783     if (sve_access_check(s)) {
 784         unsigned vsz = vec_full_reg_size(s);
 785         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 786                            vec_full_reg_offset(s, a->rn),
 787                            vec_full_reg_offset(s, a->rm),
 788                            vsz, vsz, 0, fn);
 789     }
 790     return true;
 791 }
 792
 793 #define DO_ZZW(NAME, name) \
 794 static bool trans_##NAME##_zzw(DisasContext *s, arg_rrr_esz *a,           \
 795                                uint32_t insn)                             \
 796 {                                                                         \
 797     static gen_helper_gvec_3 * const fns[4] = {                           \
 798         gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h,     \
 799         gen_helper_sve_##name##_zzw_s, NULL                               \
 800     };                                                                    \
 801     return do_zzw_ool(s, a, fns[a->esz]);                                 \
 802 }
 803
 804 DO_ZZW(ASR, asr)
 805 DO_ZZW(LSR, lsr)
 806 DO_ZZW(LSL, lsl)
 807
 808 #undef DO_ZZW
 809
 810 /*
 811  *** SVE Integer Multiply-Add Group
 812  */
 813
 814 static bool do_zpzzz_ool(DisasContext *s, arg_rprrr_esz *a,
 815                          gen_helper_gvec_5 *fn)
 816 {
 817     if (sve_access_check(s)) {
 818         unsigned vsz = vec_full_reg_size(s);
 819         tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
 820                            vec_full_reg_offset(s, a->ra),
 821                            vec_full_reg_offset(s, a->rn),
 822                            vec_full_reg_offset(s, a->rm),
 823                            pred_full_reg_offset(s, a->pg),
 824                            vsz, vsz, 0, fn);
 825     }
 826     return true;
 827 }
 828
 829 #define DO_ZPZZZ(NAME, name) \
 830 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
 831 {                                                                    \
 832     static gen_helper_gvec_5 * const fns[4] = {                      \
 833         gen_helper_sve_##name##_b, gen_helper_sve_##name##_h,        \
 834         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d,        \
 835     };                                                               \
 836     return do_zpzzz_ool(s, a, fns[a->esz]);                          \
 837 }
 838
 839 DO_ZPZZZ(MLA, mla)
 840 DO_ZPZZZ(MLS, mls)
 841
 842 #undef DO_ZPZZZ
 843
 844 /*
 845  *** SVE Index Generation Group
 846  */
 847
 848 static void do_index(DisasContext *s, int esz, int rd,
 849                      TCGv_i64 start, TCGv_i64 incr)
 850 {
 851     unsigned vsz = vec_full_reg_size(s);
 852     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
 853     TCGv_ptr t_zd = tcg_temp_new_ptr();
 854
 855     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
 856     if (esz == 3) {
 857         gen_helper_sve_index_d(t_zd, start, incr, desc);
 858     } else {
 859         typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
 860         static index_fn * const fns[3] = {
 861             gen_helper_sve_index_b,
 862             gen_helper_sve_index_h,
 863             gen_helper_sve_index_s,
 864         };
 865         TCGv_i32 s32 = tcg_temp_new_i32();
 866         TCGv_i32 i32 = tcg_temp_new_i32();
 867
 868         tcg_gen_extrl_i64_i32(s32, start);
 869         tcg_gen_extrl_i64_i32(i32, incr);
 870         fns[esz](t_zd, s32, i32, desc);
 871
 872         tcg_temp_free_i32(s32);
 873         tcg_temp_free_i32(i32);
 874     }
 875     tcg_temp_free_ptr(t_zd);
 876     tcg_temp_free_i32(desc);
 877 }
 878
 879 static bool trans_INDEX_ii(DisasContext *s, arg_INDEX_ii *a, uint32_t insn)
 880 {
 881     if (sve_access_check(s)) {
 882         TCGv_i64 start = tcg_const_i64(a->imm1);
 883         TCGv_i64 incr = tcg_const_i64(a->imm2);
 884         do_index(s, a->esz, a->rd, start, incr);
 885         tcg_temp_free_i64(start);
 886         tcg_temp_free_i64(incr);
 887     }
 888     return true;
 889 }
 890
 891 static bool trans_INDEX_ir(DisasContext *s, arg_INDEX_ir *a, uint32_t insn)
 892 {
 893     if (sve_access_check(s)) {
 894         TCGv_i64 start = tcg_const_i64(a->imm);
 895         TCGv_i64 incr = cpu_reg(s, a->rm);
 896         do_index(s, a->esz, a->rd, start, incr);
 897         tcg_temp_free_i64(start);
 898     }
 899     return true;
 900 }
 901
 902 static bool trans_INDEX_ri(DisasContext *s, arg_INDEX_ri *a, uint32_t insn)
 903 {
 904     if (sve_access_check(s)) {
 905         TCGv_i64 start = cpu_reg(s, a->rn);
 906         TCGv_i64 incr = tcg_const_i64(a->imm);
 907         do_index(s, a->esz, a->rd, start, incr);
 908         tcg_temp_free_i64(incr);
 909     }
 910     return true;
 911 }
 912
 913 static bool trans_INDEX_rr(DisasContext *s, arg_INDEX_rr *a, uint32_t insn)
 914 {
 915     if (sve_access_check(s)) {
 916         TCGv_i64 start = cpu_reg(s, a->rn);
 917         TCGv_i64 incr = cpu_reg(s, a->rm);
 918         do_index(s, a->esz, a->rd, start, incr);
 919     }
 920     return true;
 921 }
 922
 923 /*
 924  *** SVE Stack Allocation Group
 925  */
 926
 927 static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a, uint32_t insn)
 928 {
 929     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 930     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 931     tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
 932     return true;
 933 }
 934
 935 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a, uint32_t insn)
 936 {
 937     TCGv_i64 rd = cpu_reg_sp(s, a->rd);
 938     TCGv_i64 rn = cpu_reg_sp(s, a->rn);
 939     tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
 940     return true;
 941 }
 942
 943 static bool trans_RDVL(DisasContext *s, arg_RDVL *a, uint32_t insn)
 944 {
 945     TCGv_i64 reg = cpu_reg(s, a->rd);
 946     tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
 947     return true;
 948 }
 949
 950 /*
 951  *** SVE Compute Vector Address Group
 952  */
 953
 954 static bool do_adr(DisasContext *s, arg_rrri *a, gen_helper_gvec_3 *fn)
 955 {
 956     if (sve_access_check(s)) {
 957         unsigned vsz = vec_full_reg_size(s);
 958         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
 959                            vec_full_reg_offset(s, a->rn),
 960                            vec_full_reg_offset(s, a->rm),
 961                            vsz, vsz, a->imm, fn);
 962     }
 963     return true;
 964 }
 965
 966 static bool trans_ADR_p32(DisasContext *s, arg_rrri *a, uint32_t insn)
 967 {
 968     return do_adr(s, a, gen_helper_sve_adr_p32);
 969 }
 970
 971 static bool trans_ADR_p64(DisasContext *s, arg_rrri *a, uint32_t insn)
 972 {
 973     return do_adr(s, a, gen_helper_sve_adr_p64);
 974 }
 975
 976 static bool trans_ADR_s32(DisasContext *s, arg_rrri *a, uint32_t insn)
 977 {
 978     return do_adr(s, a, gen_helper_sve_adr_s32);
 979 }
 980
 981 static bool trans_ADR_u32(DisasContext *s, arg_rrri *a, uint32_t insn)
 982 {
 983     return do_adr(s, a, gen_helper_sve_adr_u32);
 984 }
 985
 986 /*
 987  *** SVE Integer Misc - Unpredicated Group
 988  */
 989
 990 static bool trans_FEXPA(DisasContext *s, arg_rr_esz *a, uint32_t insn)
 991 {
 992     static gen_helper_gvec_2 * const fns[4] = {
 993         NULL,
 994         gen_helper_sve_fexpa_h,
 995         gen_helper_sve_fexpa_s,
 996         gen_helper_sve_fexpa_d,
 997     };
 998     if (a->esz == 0) {
 999         return false;
1000     }
1001     if (sve_access_check(s)) {
1002         unsigned vsz = vec_full_reg_size(s);
1003         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1004                            vec_full_reg_offset(s, a->rn),
1005                            vsz, vsz, 0, fns[a->esz]);
1006     }
1007     return true;
1008 }
1009
1010 static bool trans_FTSSEL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
1011 {
1012     static gen_helper_gvec_3 * const fns[4] = {
1013         NULL,
1014         gen_helper_sve_ftssel_h,
1015         gen_helper_sve_ftssel_s,
1016         gen_helper_sve_ftssel_d,
1017     };
1018     if (a->esz == 0) {
1019         return false;
1020     }
1021     if (sve_access_check(s)) {
1022         unsigned vsz = vec_full_reg_size(s);
1023         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1024                            vec_full_reg_offset(s, a->rn),
1025                            vec_full_reg_offset(s, a->rm),
1026                            vsz, vsz, 0, fns[a->esz]);
1027     }
1028     return true;
1029 }
1030
1031 /*
1032  *** SVE Predicate Logical Operations Group
1033  */
1034
1035 static bool do_pppp_flags(DisasContext *s, arg_rprr_s *a,
1036                           const GVecGen4 *gvec_op)
1037 {
1038     if (!sve_access_check(s)) {
1039         return true;
1040     }
1041
1042     unsigned psz = pred_gvec_reg_size(s);
1043     int dofs = pred_full_reg_offset(s, a->rd);
1044     int nofs = pred_full_reg_offset(s, a->rn);
1045     int mofs = pred_full_reg_offset(s, a->rm);
1046     int gofs = pred_full_reg_offset(s, a->pg);
1047
1048     if (psz == 8) {
1049         /* Do the operation and the flags generation in temps.  */
1050         TCGv_i64 pd = tcg_temp_new_i64();
1051         TCGv_i64 pn = tcg_temp_new_i64();
1052         TCGv_i64 pm = tcg_temp_new_i64();
1053         TCGv_i64 pg = tcg_temp_new_i64();
1054
1055         tcg_gen_ld_i64(pn, cpu_env, nofs);
1056         tcg_gen_ld_i64(pm, cpu_env, mofs);
1057         tcg_gen_ld_i64(pg, cpu_env, gofs);
1058
1059         gvec_op->fni8(pd, pn, pm, pg);
1060         tcg_gen_st_i64(pd, cpu_env, dofs);
1061
1062         do_predtest1(pd, pg);
1063
1064         tcg_temp_free_i64(pd);
1065         tcg_temp_free_i64(pn);
1066         tcg_temp_free_i64(pm);
1067         tcg_temp_free_i64(pg);
1068     } else {
1069         /* The operation and flags generation is large.  The computation
1070          * of the flags depends on the original contents of the guarding
1071          * predicate.  If the destination overwrites the guarding predicate,
1072          * then the easiest way to get this right is to save a copy.
1073           */
1074         int tofs = gofs;
1075         if (a->rd == a->pg) {
1076             tofs = offsetof(CPUARMState, vfp.preg_tmp);
1077             tcg_gen_gvec_mov(0, tofs, gofs, psz, psz);
1078         }
1079
1080         tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1081         do_predtest(s, dofs, tofs, psz / 8);
1082     }
1083     return true;
1084 }
1085
1086 static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1087 {
1088     tcg_gen_and_i64(pd, pn, pm);
1089     tcg_gen_and_i64(pd, pd, pg);
1090 }
1091
1092 static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1093                            TCGv_vec pm, TCGv_vec pg)
1094 {
1095     tcg_gen_and_vec(vece, pd, pn, pm);
1096     tcg_gen_and_vec(vece, pd, pd, pg);
1097 }
1098
1099 static bool trans_AND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1100 {
1101     static const GVecGen4 op = {
1102         .fni8 = gen_and_pg_i64,
1103         .fniv = gen_and_pg_vec,
1104         .fno = gen_helper_sve_and_pppp,
1105         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1106     };
1107     if (a->s) {
1108         return do_pppp_flags(s, a, &op);
1109     } else if (a->rn == a->rm) {
1110         if (a->pg == a->rn) {
1111             return do_mov_p(s, a->rd, a->rn);
1112         } else {
1113             return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->pg);
1114         }
1115     } else if (a->pg == a->rn || a->pg == a->rm) {
1116         return do_vector3_p(s, tcg_gen_gvec_and, 0, a->rd, a->rn, a->rm);
1117     } else {
1118         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1119     }
1120 }
1121
1122 static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1123 {
1124     tcg_gen_andc_i64(pd, pn, pm);
1125     tcg_gen_and_i64(pd, pd, pg);
1126 }
1127
1128 static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1129                            TCGv_vec pm, TCGv_vec pg)
1130 {
1131     tcg_gen_andc_vec(vece, pd, pn, pm);
1132     tcg_gen_and_vec(vece, pd, pd, pg);
1133 }
1134
1135 static bool trans_BIC_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1136 {
1137     static const GVecGen4 op = {
1138         .fni8 = gen_bic_pg_i64,
1139         .fniv = gen_bic_pg_vec,
1140         .fno = gen_helper_sve_bic_pppp,
1141         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1142     };
1143     if (a->s) {
1144         return do_pppp_flags(s, a, &op);
1145     } else if (a->pg == a->rn) {
1146         return do_vector3_p(s, tcg_gen_gvec_andc, 0, a->rd, a->rn, a->rm);
1147     } else {
1148         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1149     }
1150 }
1151
1152 static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1153 {
1154     tcg_gen_xor_i64(pd, pn, pm);
1155     tcg_gen_and_i64(pd, pd, pg);
1156 }
1157
1158 static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1159                            TCGv_vec pm, TCGv_vec pg)
1160 {
1161     tcg_gen_xor_vec(vece, pd, pn, pm);
1162     tcg_gen_and_vec(vece, pd, pd, pg);
1163 }
1164
1165 static bool trans_EOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1166 {
1167     static const GVecGen4 op = {
1168         .fni8 = gen_eor_pg_i64,
1169         .fniv = gen_eor_pg_vec,
1170         .fno = gen_helper_sve_eor_pppp,
1171         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1172     };
1173     if (a->s) {
1174         return do_pppp_flags(s, a, &op);
1175     } else {
1176         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1177     }
1178 }
1179
1180 static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1181 {
1182     tcg_gen_and_i64(pn, pn, pg);
1183     tcg_gen_andc_i64(pm, pm, pg);
1184     tcg_gen_or_i64(pd, pn, pm);
1185 }
1186
1187 static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1188                            TCGv_vec pm, TCGv_vec pg)
1189 {
1190     tcg_gen_and_vec(vece, pn, pn, pg);
1191     tcg_gen_andc_vec(vece, pm, pm, pg);
1192     tcg_gen_or_vec(vece, pd, pn, pm);
1193 }
1194
1195 static bool trans_SEL_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1196 {
1197     static const GVecGen4 op = {
1198         .fni8 = gen_sel_pg_i64,
1199         .fniv = gen_sel_pg_vec,
1200         .fno = gen_helper_sve_sel_pppp,
1201         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1202     };
1203     if (a->s) {
1204         return false;
1205     } else {
1206         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1207     }
1208 }
1209
1210 static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1211 {
1212     tcg_gen_or_i64(pd, pn, pm);
1213     tcg_gen_and_i64(pd, pd, pg);
1214 }
1215
1216 static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1217                            TCGv_vec pm, TCGv_vec pg)
1218 {
1219     tcg_gen_or_vec(vece, pd, pn, pm);
1220     tcg_gen_and_vec(vece, pd, pd, pg);
1221 }
1222
1223 static bool trans_ORR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1224 {
1225     static const GVecGen4 op = {
1226         .fni8 = gen_orr_pg_i64,
1227         .fniv = gen_orr_pg_vec,
1228         .fno = gen_helper_sve_orr_pppp,
1229         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1230     };
1231     if (a->s) {
1232         return do_pppp_flags(s, a, &op);
1233     } else if (a->pg == a->rn && a->rn == a->rm) {
1234         return do_mov_p(s, a->rd, a->rn);
1235     } else {
1236         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1237     }
1238 }
1239
1240 static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1241 {
1242     tcg_gen_orc_i64(pd, pn, pm);
1243     tcg_gen_and_i64(pd, pd, pg);
1244 }
1245
1246 static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1247                            TCGv_vec pm, TCGv_vec pg)
1248 {
1249     tcg_gen_orc_vec(vece, pd, pn, pm);
1250     tcg_gen_and_vec(vece, pd, pd, pg);
1251 }
1252
1253 static bool trans_ORN_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1254 {
1255     static const GVecGen4 op = {
1256         .fni8 = gen_orn_pg_i64,
1257         .fniv = gen_orn_pg_vec,
1258         .fno = gen_helper_sve_orn_pppp,
1259         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1260     };
1261     if (a->s) {
1262         return do_pppp_flags(s, a, &op);
1263     } else {
1264         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1265     }
1266 }
1267
1268 static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1269 {
1270     tcg_gen_or_i64(pd, pn, pm);
1271     tcg_gen_andc_i64(pd, pg, pd);
1272 }
1273
1274 static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1275                            TCGv_vec pm, TCGv_vec pg)
1276 {
1277     tcg_gen_or_vec(vece, pd, pn, pm);
1278     tcg_gen_andc_vec(vece, pd, pg, pd);
1279 }
1280
1281 static bool trans_NOR_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1282 {
1283     static const GVecGen4 op = {
1284         .fni8 = gen_nor_pg_i64,
1285         .fniv = gen_nor_pg_vec,
1286         .fno = gen_helper_sve_nor_pppp,
1287         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1288     };
1289     if (a->s) {
1290         return do_pppp_flags(s, a, &op);
1291     } else {
1292         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1293     }
1294 }
1295
1296 static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1297 {
1298     tcg_gen_and_i64(pd, pn, pm);
1299     tcg_gen_andc_i64(pd, pg, pd);
1300 }
1301
1302 static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1303                            TCGv_vec pm, TCGv_vec pg)
1304 {
1305     tcg_gen_and_vec(vece, pd, pn, pm);
1306     tcg_gen_andc_vec(vece, pd, pg, pd);
1307 }
1308
1309 static bool trans_NAND_pppp(DisasContext *s, arg_rprr_s *a, uint32_t insn)
1310 {
1311     static const GVecGen4 op = {
1312         .fni8 = gen_nand_pg_i64,
1313         .fniv = gen_nand_pg_vec,
1314         .fno = gen_helper_sve_nand_pppp,
1315         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
1316     };
1317     if (a->s) {
1318         return do_pppp_flags(s, a, &op);
1319     } else {
1320         return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1321     }
1322 }
1323
1324 /*
1325  *** SVE Predicate Misc Group
1326  */
1327
1328 static bool trans_PTEST(DisasContext *s, arg_PTEST *a, uint32_t insn)
1329 {
1330     if (sve_access_check(s)) {
1331         int nofs = pred_full_reg_offset(s, a->rn);
1332         int gofs = pred_full_reg_offset(s, a->pg);
1333         int words = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1334
1335         if (words == 1) {
1336             TCGv_i64 pn = tcg_temp_new_i64();
1337             TCGv_i64 pg = tcg_temp_new_i64();
1338
1339             tcg_gen_ld_i64(pn, cpu_env, nofs);
1340             tcg_gen_ld_i64(pg, cpu_env, gofs);
1341             do_predtest1(pn, pg);
1342
1343             tcg_temp_free_i64(pn);
1344             tcg_temp_free_i64(pg);
1345         } else {
1346             do_predtest(s, nofs, gofs, words);
1347         }
1348     }
1349     return true;
1350 }
1351
1352 /* See the ARM pseudocode DecodePredCount.  */
1353 static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1354 {
1355     unsigned elements = fullsz >> esz;
1356     unsigned bound;
1357
1358     switch (pattern) {
1359     case 0x0: /* POW2 */
1360         return pow2floor(elements);
1361     case 0x1: /* VL1 */
1362     case 0x2: /* VL2 */
1363     case 0x3: /* VL3 */
1364     case 0x4: /* VL4 */
1365     case 0x5: /* VL5 */
1366     case 0x6: /* VL6 */
1367     case 0x7: /* VL7 */
1368     case 0x8: /* VL8 */
1369         bound = pattern;
1370         break;
1371     case 0x9: /* VL16 */
1372     case 0xa: /* VL32 */
1373     case 0xb: /* VL64 */
1374     case 0xc: /* VL128 */
1375     case 0xd: /* VL256 */
1376         bound = 16 << (pattern - 9);
1377         break;
1378     case 0x1d: /* MUL4 */
1379         return elements - elements % 4;
1380     case 0x1e: /* MUL3 */
1381         return elements - elements % 3;
1382     case 0x1f: /* ALL */
1383         return elements;
1384     default:   /* #uimm5 */
1385         return 0;
1386     }
1387     return elements >= bound ? bound : 0;
1388 }
1389
1390 /* This handles all of the predicate initialization instructions,
1391  * PTRUE, PFALSE, SETFFR.  For PFALSE, we will have set PAT == 32
1392  * so that decode_pred_count returns 0.  For SETFFR, we will have
1393  * set RD == 16 == FFR.
1394  */
1395 static bool do_predset(DisasContext *s, int esz, int rd, int pat, bool setflag)
1396 {
1397     if (!sve_access_check(s)) {
1398         return true;
1399     }
1400
1401     unsigned fullsz = vec_full_reg_size(s);
1402     unsigned ofs = pred_full_reg_offset(s, rd);
1403     unsigned numelem, setsz, i;
1404     uint64_t word, lastword;
1405     TCGv_i64 t;
1406
1407     numelem = decode_pred_count(fullsz, pat, esz);
1408
1409     /* Determine what we must store into each bit, and how many.  */
1410     if (numelem == 0) {
1411         lastword = word = 0;
1412         setsz = fullsz;
1413     } else {
1414         setsz = numelem << esz;
1415         lastword = word = pred_esz_masks[esz];
1416         if (setsz % 64) {
1417             lastword &= ~(-1ull << (setsz % 64));
1418         }
1419     }
1420
1421     t = tcg_temp_new_i64();
1422     if (fullsz <= 64) {
1423         tcg_gen_movi_i64(t, lastword);
1424         tcg_gen_st_i64(t, cpu_env, ofs);
1425         goto done;
1426     }
1427
1428     if (word == lastword) {
1429         unsigned maxsz = size_for_gvec(fullsz / 8);
1430         unsigned oprsz = size_for_gvec(setsz / 8);
1431
1432         if (oprsz * 8 == setsz) {
1433             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1434             goto done;
1435         }
1436         if (oprsz * 8 == setsz + 8) {
1437             tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1438             tcg_gen_movi_i64(t, 0);
1439             tcg_gen_st_i64(t, cpu_env, ofs + oprsz - 8);
1440             goto done;
1441         }
1442     }
1443
1444     setsz /= 8;
1445     fullsz /= 8;
1446
1447     tcg_gen_movi_i64(t, word);
1448     for (i = 0; i < setsz; i += 8) {
1449         tcg_gen_st_i64(t, cpu_env, ofs + i);
1450     }
1451     if (lastword != word) {
1452         tcg_gen_movi_i64(t, lastword);
1453         tcg_gen_st_i64(t, cpu_env, ofs + i);
1454         i += 8;
1455     }
1456     if (i < fullsz) {
1457         tcg_gen_movi_i64(t, 0);
1458         for (; i < fullsz; i += 8) {
1459             tcg_gen_st_i64(t, cpu_env, ofs + i);
1460         }
1461     }
1462
1463  done:
1464     tcg_temp_free_i64(t);
1465
1466     /* PTRUES */
1467     if (setflag) {
1468         tcg_gen_movi_i32(cpu_NF, -(word != 0));
1469         tcg_gen_movi_i32(cpu_CF, word == 0);
1470         tcg_gen_movi_i32(cpu_VF, 0);
1471         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1472     }
1473     return true;
1474 }
1475
1476 static bool trans_PTRUE(DisasContext *s, arg_PTRUE *a, uint32_t insn)
1477 {
1478     return do_predset(s, a->esz, a->rd, a->pat, a->s);
1479 }
1480
1481 static bool trans_SETFFR(DisasContext *s, arg_SETFFR *a, uint32_t insn)
1482 {
1483     /* Note pat == 31 is #all, to set all elements.  */
1484     return do_predset(s, 0, FFR_PRED_NUM, 31, false);
1485 }
1486
1487 static bool trans_PFALSE(DisasContext *s, arg_PFALSE *a, uint32_t insn)
1488 {
1489     /* Note pat == 32 is #unimp, to set no elements.  */
1490     return do_predset(s, 0, a->rd, 32, false);
1491 }
1492
1493 static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p *a, uint32_t insn)
1494 {
1495     /* The path through do_pppp_flags is complicated enough to want to avoid
1496      * duplication.  Frob the arguments into the form of a predicated AND.
1497      */
1498     arg_rprr_s alt_a = {
1499         .rd = a->rd, .pg = a->pg, .s = a->s,
1500         .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1501     };
1502     return trans_AND_pppp(s, &alt_a, insn);
1503 }
1504
1505 static bool trans_RDFFR(DisasContext *s, arg_RDFFR *a, uint32_t insn)
1506 {
1507     return do_mov_p(s, a->rd, FFR_PRED_NUM);
1508 }
1509
1510 static bool trans_WRFFR(DisasContext *s, arg_WRFFR *a, uint32_t insn)
1511 {
1512     return do_mov_p(s, FFR_PRED_NUM, a->rn);
1513 }
1514
1515 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
1516                             void (*gen_fn)(TCGv_i32, TCGv_ptr,
1517                                            TCGv_ptr, TCGv_i32))
1518 {
1519     if (!sve_access_check(s)) {
1520         return true;
1521     }
1522
1523     TCGv_ptr t_pd = tcg_temp_new_ptr();
1524     TCGv_ptr t_pg = tcg_temp_new_ptr();
1525     TCGv_i32 t;
1526     unsigned desc;
1527
1528     desc = DIV_ROUND_UP(pred_full_reg_size(s), 8);
1529     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
1530
1531     tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1532     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1533     t = tcg_const_i32(desc);
1534
1535     gen_fn(t, t_pd, t_pg, t);
1536     tcg_temp_free_ptr(t_pd);
1537     tcg_temp_free_ptr(t_pg);
1538
1539     do_pred_flags(t);
1540     tcg_temp_free_i32(t);
1541     return true;
1542 }
1543
1544 static bool trans_PFIRST(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1545 {
1546     return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1547 }
1548
1549 static bool trans_PNEXT(DisasContext *s, arg_rr_esz *a, uint32_t insn)
1550 {
1551     return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1552 }
1553
1554 /*
1555  *** SVE Element Count Group
1556  */
1557
1558 /* Perform an inline saturating addition of a 32-bit value within
1559  * a 64-bit register.  The second operand is known to be positive,
1560  * which halves the comparisions we must perform to bound the result.
1561  */
1562 static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1563 {
1564     int64_t ibound;
1565     TCGv_i64 bound;
1566     TCGCond cond;
1567
1568     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1569     if (u) {
1570         tcg_gen_ext32u_i64(reg, reg);
1571     } else {
1572         tcg_gen_ext32s_i64(reg, reg);
1573     }
1574     if (d) {
1575         tcg_gen_sub_i64(reg, reg, val);
1576         ibound = (u ? 0 : INT32_MIN);
1577         cond = TCG_COND_LT;
1578     } else {
1579         tcg_gen_add_i64(reg, reg, val);
1580         ibound = (u ? UINT32_MAX : INT32_MAX);
1581         cond = TCG_COND_GT;
1582     }
1583     bound = tcg_const_i64(ibound);
1584     tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1585     tcg_temp_free_i64(bound);
1586 }
1587
1588 /* Similarly with 64-bit values.  */
1589 static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1590 {
1591     TCGv_i64 t0 = tcg_temp_new_i64();
1592     TCGv_i64 t1 = tcg_temp_new_i64();
1593     TCGv_i64 t2;
1594
1595     if (u) {
1596         if (d) {
1597             tcg_gen_sub_i64(t0, reg, val);
1598             tcg_gen_movi_i64(t1, 0);
1599             tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1600         } else {
1601             tcg_gen_add_i64(t0, reg, val);
1602             tcg_gen_movi_i64(t1, -1);
1603             tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1604         }
1605     } else {
1606         if (d) {
1607             /* Detect signed overflow for subtraction.  */
1608             tcg_gen_xor_i64(t0, reg, val);
1609             tcg_gen_sub_i64(t1, reg, val);
1610             tcg_gen_xor_i64(reg, reg, t0);
1611             tcg_gen_and_i64(t0, t0, reg);
1612
1613             /* Bound the result.  */
1614             tcg_gen_movi_i64(reg, INT64_MIN);
1615             t2 = tcg_const_i64(0);
1616             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1617         } else {
1618             /* Detect signed overflow for addition.  */
1619             tcg_gen_xor_i64(t0, reg, val);
1620             tcg_gen_add_i64(reg, reg, val);
1621             tcg_gen_xor_i64(t1, reg, val);
1622             tcg_gen_andc_i64(t0, t1, t0);
1623
1624             /* Bound the result.  */
1625             tcg_gen_movi_i64(t1, INT64_MAX);
1626             t2 = tcg_const_i64(0);
1627             tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1628         }
1629         tcg_temp_free_i64(t2);
1630     }
1631     tcg_temp_free_i64(t0);
1632     tcg_temp_free_i64(t1);
1633 }
1634
1635 /* Similarly with a vector and a scalar operand.  */
1636 static void do_sat_addsub_vec(DisasContext *s, int esz, int rd, int rn,
1637                               TCGv_i64 val, bool u, bool d)
1638 {
1639     unsigned vsz = vec_full_reg_size(s);
1640     TCGv_ptr dptr, nptr;
1641     TCGv_i32 t32, desc;
1642     TCGv_i64 t64;
1643
1644     dptr = tcg_temp_new_ptr();
1645     nptr = tcg_temp_new_ptr();
1646     tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1647     tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1648     desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1649
1650     switch (esz) {
1651     case MO_8:
1652         t32 = tcg_temp_new_i32();
1653         tcg_gen_extrl_i64_i32(t32, val);
1654         if (d) {
1655             tcg_gen_neg_i32(t32, t32);
1656         }
1657         if (u) {
1658             gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1659         } else {
1660             gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1661         }
1662         tcg_temp_free_i32(t32);
1663         break;
1664
1665     case MO_16:
1666         t32 = tcg_temp_new_i32();
1667         tcg_gen_extrl_i64_i32(t32, val);
1668         if (d) {
1669             tcg_gen_neg_i32(t32, t32);
1670         }
1671         if (u) {
1672             gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1673         } else {
1674             gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1675         }
1676         tcg_temp_free_i32(t32);
1677         break;
1678
1679     case MO_32:
1680         t64 = tcg_temp_new_i64();
1681         if (d) {
1682             tcg_gen_neg_i64(t64, val);
1683         } else {
1684             tcg_gen_mov_i64(t64, val);
1685         }
1686         if (u) {
1687             gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1688         } else {
1689             gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1690         }
1691         tcg_temp_free_i64(t64);
1692         break;
1693
1694     case MO_64:
1695         if (u) {
1696             if (d) {
1697                 gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1698             } else {
1699                 gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1700             }
1701         } else if (d) {
1702             t64 = tcg_temp_new_i64();
1703             tcg_gen_neg_i64(t64, val);
1704             gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1705             tcg_temp_free_i64(t64);
1706         } else {
1707             gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1708         }
1709         break;
1710
1711     default:
1712         g_assert_not_reached();
1713     }
1714
1715     tcg_temp_free_ptr(dptr);
1716     tcg_temp_free_ptr(nptr);
1717     tcg_temp_free_i32(desc);
1718 }
1719
1720 static bool trans_CNT_r(DisasContext *s, arg_CNT_r *a, uint32_t insn)
1721 {
1722     if (sve_access_check(s)) {
1723         unsigned fullsz = vec_full_reg_size(s);
1724         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1725         tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1726     }
1727     return true;
1728 }
1729
1730 static bool trans_INCDEC_r(DisasContext *s, arg_incdec_cnt *a, uint32_t insn)
1731 {
1732     if (sve_access_check(s)) {
1733         unsigned fullsz = vec_full_reg_size(s);
1734         unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1735         int inc = numelem * a->imm * (a->d ? -1 : 1);
1736         TCGv_i64 reg = cpu_reg(s, a->rd);
1737
1738         tcg_gen_addi_i64(reg, reg, inc);
1739     }
1740     return true;
1741 }
1742
1743 static bool trans_SINCDEC_r_32(DisasContext *s, arg_incdec_cnt *a,
1744                                uint32_t insn)
1745 {
1746     if (!sve_access_check(s)) {
1747         return true;
1748     }
1749
1750     unsigned fullsz = vec_full_reg_size(s);
1751     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752     int inc = numelem * a->imm;
1753     TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755     /* Use normal 64-bit arithmetic to detect 32-bit overflow.  */
1756     if (inc == 0) {
1757         if (a->u) {
1758             tcg_gen_ext32u_i64(reg, reg);
1759         } else {
1760             tcg_gen_ext32s_i64(reg, reg);
1761         }
1762     } else {
1763         TCGv_i64 t = tcg_const_i64(inc);
1764         do_sat_addsub_32(reg, t, a->u, a->d);
1765         tcg_temp_free_i64(t);
1766     }
1767     return true;
1768 }
1769
1770 static bool trans_SINCDEC_r_64(DisasContext *s, arg_incdec_cnt *a,
1771                                uint32_t insn)
1772 {
1773     if (!sve_access_check(s)) {
1774         return true;
1775     }
1776
1777     unsigned fullsz = vec_full_reg_size(s);
1778     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1779     int inc = numelem * a->imm;
1780     TCGv_i64 reg = cpu_reg(s, a->rd);
1781
1782     if (inc != 0) {
1783         TCGv_i64 t = tcg_const_i64(inc);
1784         do_sat_addsub_64(reg, t, a->u, a->d);
1785         tcg_temp_free_i64(t);
1786     }
1787     return true;
1788 }
1789
1790 static bool trans_INCDEC_v(DisasContext *s, arg_incdec2_cnt *a, uint32_t insn)
1791 {
1792     if (a->esz == 0) {
1793         return false;
1794     }
1795
1796     unsigned fullsz = vec_full_reg_size(s);
1797     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1798     int inc = numelem * a->imm;
1799
1800     if (inc != 0) {
1801         if (sve_access_check(s)) {
1802             TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1803             tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1804                               vec_full_reg_offset(s, a->rn),
1805                               t, fullsz, fullsz);
1806             tcg_temp_free_i64(t);
1807         }
1808     } else {
1809         do_mov_z(s, a->rd, a->rn);
1810     }
1811     return true;
1812 }
1813
1814 static bool trans_SINCDEC_v(DisasContext *s, arg_incdec2_cnt *a,
1815                             uint32_t insn)
1816 {
1817     if (a->esz == 0) {
1818         return false;
1819     }
1820
1821     unsigned fullsz = vec_full_reg_size(s);
1822     unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1823     int inc = numelem * a->imm;
1824
1825     if (inc != 0) {
1826         if (sve_access_check(s)) {
1827             TCGv_i64 t = tcg_const_i64(inc);
1828             do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1829             tcg_temp_free_i64(t);
1830         }
1831     } else {
1832         do_mov_z(s, a->rd, a->rn);
1833     }
1834     return true;
1835 }
1836
1837 /*
1838  *** SVE Bitwise Immediate Group
1839  */
1840
1841 static bool do_zz_dbm(DisasContext *s, arg_rr_dbm *a, GVecGen2iFn *gvec_fn)
1842 {
1843     uint64_t imm;
1844     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1845                                 extract32(a->dbm, 0, 6),
1846                                 extract32(a->dbm, 6, 6))) {
1847         return false;
1848     }
1849     if (sve_access_check(s)) {
1850         unsigned vsz = vec_full_reg_size(s);
1851         gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1852                 vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1853     }
1854     return true;
1855 }
1856
1857 static bool trans_AND_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1858 {
1859     return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1860 }
1861
1862 static bool trans_ORR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1863 {
1864     return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1865 }
1866
1867 static bool trans_EOR_zzi(DisasContext *s, arg_rr_dbm *a, uint32_t insn)
1868 {
1869     return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1870 }
1871
1872 static bool trans_DUPM(DisasContext *s, arg_DUPM *a, uint32_t insn)
1873 {
1874     uint64_t imm;
1875     if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, 12, 1),
1876                                 extract32(a->dbm, 0, 6),
1877                                 extract32(a->dbm, 6, 6))) {
1878         return false;
1879     }
1880     if (sve_access_check(s)) {
1881         do_dupi_z(s, a->rd, imm);
1882     }
1883     return true;
1884 }
1885
1886 /*
1887  *** SVE Integer Wide Immediate - Predicated Group
1888  */
1889
1890 /* Implement all merging copies.  This is used for CPY (immediate),
1891  * FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1892  */
1893 static void do_cpy_m(DisasContext *s, int esz, int rd, int rn, int pg,
1894                      TCGv_i64 val)
1895 {
1896     typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1897     static gen_cpy * const fns[4] = {
1898         gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1899         gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1900     };
1901     unsigned vsz = vec_full_reg_size(s);
1902     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
1903     TCGv_ptr t_zd = tcg_temp_new_ptr();
1904     TCGv_ptr t_zn = tcg_temp_new_ptr();
1905     TCGv_ptr t_pg = tcg_temp_new_ptr();
1906
1907     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1908     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1909     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1910
1911     fns[esz](t_zd, t_zn, t_pg, val, desc);
1912
1913     tcg_temp_free_ptr(t_zd);
1914     tcg_temp_free_ptr(t_zn);
1915     tcg_temp_free_ptr(t_pg);
1916     tcg_temp_free_i32(desc);
1917 }
1918
1919 static bool trans_FCPY(DisasContext *s, arg_FCPY *a, uint32_t insn)
1920 {
1921     if (a->esz == 0) {
1922         return false;
1923     }
1924     if (sve_access_check(s)) {
1925         /* Decode the VFP immediate.  */
1926         uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1927         TCGv_i64 t_imm = tcg_const_i64(imm);
1928         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1929         tcg_temp_free_i64(t_imm);
1930     }
1931     return true;
1932 }
1933
1934 static bool trans_CPY_m_i(DisasContext *s, arg_rpri_esz *a, uint32_t insn)
1935 {
1936     if (a->esz == 0 && extract32(insn, 13, 1)) {
1937         return false;
1938     }
1939     if (sve_access_check(s)) {
1940         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1941         do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1942         tcg_temp_free_i64(t_imm);
1943     }
1944     return true;
1945 }
1946
1947 static bool trans_CPY_z_i(DisasContext *s, arg_CPY_z_i *a, uint32_t insn)
1948 {
1949     static gen_helper_gvec_2i * const fns[4] = {
1950         gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1951         gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1952     };
1953
1954     if (a->esz == 0 && extract32(insn, 13, 1)) {
1955         return false;
1956     }
1957     if (sve_access_check(s)) {
1958         unsigned vsz = vec_full_reg_size(s);
1959         TCGv_i64 t_imm = tcg_const_i64(a->imm);
1960         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1961                             pred_full_reg_offset(s, a->pg),
1962                             t_imm, vsz, vsz, 0, fns[a->esz]);
1963         tcg_temp_free_i64(t_imm);
1964     }
1965     return true;
1966 }
1967
1968 /*
1969  *** SVE Permute Extract Group
1970  */
1971
1972 static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
1973 {
1974     if (!sve_access_check(s)) {
1975         return true;
1976     }
1977
1978     unsigned vsz = vec_full_reg_size(s);
1979     unsigned n_ofs = a->imm >= vsz ? 0 : a->imm;
1980     unsigned n_siz = vsz - n_ofs;
1981     unsigned d = vec_full_reg_offset(s, a->rd);
1982     unsigned n = vec_full_reg_offset(s, a->rn);
1983     unsigned m = vec_full_reg_offset(s, a->rm);
1984
1985     /* Use host vector move insns if we have appropriate sizes
1986      * and no unfortunate overlap.
1987      */
1988     if (m != d
1989         && n_ofs == size_for_gvec(n_ofs)
1990         && n_siz == size_for_gvec(n_siz)
1991         && (d != n || n_siz <= n_ofs)) {
1992         tcg_gen_gvec_mov(0, d, n + n_ofs, n_siz, n_siz);
1993         if (n_ofs != 0) {
1994             tcg_gen_gvec_mov(0, d + n_siz, m, n_ofs, n_ofs);
1995         }
1996     } else {
1997         tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
1998     }
1999     return true;
2000 }
2001
2002 /*
2003  *** SVE Permute - Unpredicated Group
2004  */
2005
2006 static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
2007 {
2008     if (sve_access_check(s)) {
2009         unsigned vsz = vec_full_reg_size(s);
2010         tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2011                              vsz, vsz, cpu_reg_sp(s, a->rn));
2012     }
2013     return true;
2014 }
2015
2016 static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
2017 {
2018     if ((a->imm & 0x1f) == 0) {
2019         return false;
2020     }
2021     if (sve_access_check(s)) {
2022         unsigned vsz = vec_full_reg_size(s);
2023         unsigned dofs = vec_full_reg_offset(s, a->rd);
2024         unsigned esz, index;
2025
2026         esz = ctz32(a->imm);
2027         index = a->imm >> (esz + 1);
2028
2029         if ((index << esz) < vsz) {
2030             unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2031             tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2032         } else {
2033             tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
2034         }
2035     }
2036     return true;
2037 }
2038
2039 static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
2040 {
2041     typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2042     static gen_insr * const fns[4] = {
2043         gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2044         gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2045     };
2046     unsigned vsz = vec_full_reg_size(s);
2047     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2048     TCGv_ptr t_zd = tcg_temp_new_ptr();
2049     TCGv_ptr t_zn = tcg_temp_new_ptr();
2050
2051     tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2052     tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2053
2054     fns[a->esz](t_zd, t_zn, val, desc);
2055
2056     tcg_temp_free_ptr(t_zd);
2057     tcg_temp_free_ptr(t_zn);
2058     tcg_temp_free_i32(desc);
2059 }
2060
2061 static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2062 {
2063     if (sve_access_check(s)) {
2064         TCGv_i64 t = tcg_temp_new_i64();
2065         tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2066         do_insr_i64(s, a, t);
2067         tcg_temp_free_i64(t);
2068     }
2069     return true;
2070 }
2071
2072 static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2073 {
2074     if (sve_access_check(s)) {
2075         do_insr_i64(s, a, cpu_reg(s, a->rm));
2076     }
2077     return true;
2078 }
2079
2080 static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2081 {
2082     static gen_helper_gvec_2 * const fns[4] = {
2083         gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2084         gen_helper_sve_rev_s, gen_helper_sve_rev_d
2085     };
2086
2087     if (sve_access_check(s)) {
2088         unsigned vsz = vec_full_reg_size(s);
2089         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2090                            vec_full_reg_offset(s, a->rn),
2091                            vsz, vsz, 0, fns[a->esz]);
2092     }
2093     return true;
2094 }
2095
2096 static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2097 {
2098     static gen_helper_gvec_3 * const fns[4] = {
2099         gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2100         gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2101     };
2102
2103     if (sve_access_check(s)) {
2104         unsigned vsz = vec_full_reg_size(s);
2105         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2106                            vec_full_reg_offset(s, a->rn),
2107                            vec_full_reg_offset(s, a->rm),
2108                            vsz, vsz, 0, fns[a->esz]);
2109     }
2110     return true;
2111 }
2112
2113 static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2114 {
2115     static gen_helper_gvec_2 * const fns[4][2] = {
2116         { NULL, NULL },
2117         { gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2118         { gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2119         { gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2120     };
2121
2122     if (a->esz == 0) {
2123         return false;
2124     }
2125     if (sve_access_check(s)) {
2126         unsigned vsz = vec_full_reg_size(s);
2127         tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2128                            vec_full_reg_offset(s, a->rn)
2129                            + (a->h ? vsz / 2 : 0),
2130                            vsz, vsz, 0, fns[a->esz][a->u]);
2131     }
2132     return true;
2133 }
2134
2135 /*
2136  *** SVE Permute - Predicates Group
2137  */
2138
2139 static bool do_perm_pred3(DisasContext *s, arg_rrr_esz *a, bool high_odd,
2140                           gen_helper_gvec_3 *fn)
2141 {
2142     if (!sve_access_check(s)) {
2143         return true;
2144     }
2145
2146     unsigned vsz = pred_full_reg_size(s);
2147
2148     /* Predicate sizes may be smaller and cannot use simd_desc.
2149        We cannot round up, as we do elsewhere, because we need
2150        the exact size for ZIP2 and REV.  We retain the style for
2151        the other helpers for consistency.  */
2152     TCGv_ptr t_d = tcg_temp_new_ptr();
2153     TCGv_ptr t_n = tcg_temp_new_ptr();
2154     TCGv_ptr t_m = tcg_temp_new_ptr();
2155     TCGv_i32 t_desc;
2156     int desc;
2157
2158     desc = vsz - 2;
2159     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2160     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2161
2162     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2163     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2164     tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2165     t_desc = tcg_const_i32(desc);
2166
2167     fn(t_d, t_n, t_m, t_desc);
2168
2169     tcg_temp_free_ptr(t_d);
2170     tcg_temp_free_ptr(t_n);
2171     tcg_temp_free_ptr(t_m);
2172     tcg_temp_free_i32(t_desc);
2173     return true;
2174 }
2175
2176 static bool do_perm_pred2(DisasContext *s, arg_rr_esz *a, bool high_odd,
2177                           gen_helper_gvec_2 *fn)
2178 {
2179     if (!sve_access_check(s)) {
2180         return true;
2181     }
2182
2183     unsigned vsz = pred_full_reg_size(s);
2184     TCGv_ptr t_d = tcg_temp_new_ptr();
2185     TCGv_ptr t_n = tcg_temp_new_ptr();
2186     TCGv_i32 t_desc;
2187     int desc;
2188
2189     tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2190     tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2191
2192     /* Predicate sizes may be smaller and cannot use simd_desc.
2193        We cannot round up, as we do elsewhere, because we need
2194        the exact size for ZIP2 and REV.  We retain the style for
2195        the other helpers for consistency.  */
2196
2197     desc = vsz - 2;
2198     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
2199     desc = deposit32(desc, SIMD_DATA_SHIFT + 2, 2, high_odd);
2200     t_desc = tcg_const_i32(desc);
2201
2202     fn(t_d, t_n, t_desc);
2203
2204     tcg_temp_free_i32(t_desc);
2205     tcg_temp_free_ptr(t_d);
2206     tcg_temp_free_ptr(t_n);
2207     return true;
2208 }
2209
2210 static bool trans_ZIP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2211 {
2212     return do_perm_pred3(s, a, 0, gen_helper_sve_zip_p);
2213 }
2214
2215 static bool trans_ZIP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2216 {
2217     return do_perm_pred3(s, a, 1, gen_helper_sve_zip_p);
2218 }
2219
2220 static bool trans_UZP1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2221 {
2222     return do_perm_pred3(s, a, 0, gen_helper_sve_uzp_p);
2223 }
2224
2225 static bool trans_UZP2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2226 {
2227     return do_perm_pred3(s, a, 1, gen_helper_sve_uzp_p);
2228 }
2229
2230 static bool trans_TRN1_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2231 {
2232     return do_perm_pred3(s, a, 0, gen_helper_sve_trn_p);
2233 }
2234
2235 static bool trans_TRN2_p(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2236 {
2237     return do_perm_pred3(s, a, 1, gen_helper_sve_trn_p);
2238 }
2239
2240 static bool trans_REV_p(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2241 {
2242     return do_perm_pred2(s, a, 0, gen_helper_sve_rev_p);
2243 }
2244
2245 static bool trans_PUNPKLO(DisasContext *s, arg_PUNPKLO *a, uint32_t insn)
2246 {
2247     return do_perm_pred2(s, a, 0, gen_helper_sve_punpk_p);
2248 }
2249
2250 static bool trans_PUNPKHI(DisasContext *s, arg_PUNPKHI *a, uint32_t insn)
2251 {
2252     return do_perm_pred2(s, a, 1, gen_helper_sve_punpk_p);
2253 }
2254
2255 /*
2256  *** SVE Permute - Interleaving Group
2257  */
2258
2259 static bool do_zip(DisasContext *s, arg_rrr_esz *a, bool high)
2260 {
2261     static gen_helper_gvec_3 * const fns[4] = {
2262         gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2263         gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2264     };
2265
2266     if (sve_access_check(s)) {
2267         unsigned vsz = vec_full_reg_size(s);
2268         unsigned high_ofs = high ? vsz / 2 : 0;
2269         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2270                            vec_full_reg_offset(s, a->rn) + high_ofs,
2271                            vec_full_reg_offset(s, a->rm) + high_ofs,
2272                            vsz, vsz, 0, fns[a->esz]);
2273     }
2274     return true;
2275 }
2276
2277 static bool do_zzz_data_ool(DisasContext *s, arg_rrr_esz *a, int data,
2278                             gen_helper_gvec_3 *fn)
2279 {
2280     if (sve_access_check(s)) {
2281         unsigned vsz = vec_full_reg_size(s);
2282         tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2283                            vec_full_reg_offset(s, a->rn),
2284                            vec_full_reg_offset(s, a->rm),
2285                            vsz, vsz, data, fn);
2286     }
2287     return true;
2288 }
2289
2290 static bool trans_ZIP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2291 {
2292     return do_zip(s, a, false);
2293 }
2294
2295 static bool trans_ZIP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2296 {
2297     return do_zip(s, a, true);
2298 }
2299
2300 static gen_helper_gvec_3 * const uzp_fns[4] = {
2301     gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2302     gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2303 };
2304
2305 static bool trans_UZP1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2306 {
2307     return do_zzz_data_ool(s, a, 0, uzp_fns[a->esz]);
2308 }
2309
2310 static bool trans_UZP2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2311 {
2312     return do_zzz_data_ool(s, a, 1 << a->esz, uzp_fns[a->esz]);
2313 }
2314
2315 static gen_helper_gvec_3 * const trn_fns[4] = {
2316     gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2317     gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2318 };
2319
2320 static bool trans_TRN1_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2321 {
2322     return do_zzz_data_ool(s, a, 0, trn_fns[a->esz]);
2323 }
2324
2325 static bool trans_TRN2_z(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2326 {
2327     return do_zzz_data_ool(s, a, 1 << a->esz, trn_fns[a->esz]);
2328 }
2329
2330 /*
2331  *** SVE Permute Vector - Predicated Group
2332  */
2333
2334 static bool trans_COMPACT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2335 {
2336     static gen_helper_gvec_3 * const fns[4] = {
2337         NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2338     };
2339     return do_zpz_ool(s, a, fns[a->esz]);
2340 }
2341
2342 /* Call the helper that computes the ARM LastActiveElement pseudocode
2343  * function, scaled by the element size.  This includes the not found
2344  * indication; e.g. not found for esz=3 is -8.
2345  */
2346 static void find_last_active(DisasContext *s, TCGv_i32 ret, int esz, int pg)
2347 {
2348     /* Predicate sizes may be smaller and cannot use simd_desc.  We cannot
2349      * round up, as we do elsewhere, because we need the exact size.
2350      */
2351     TCGv_ptr t_p = tcg_temp_new_ptr();
2352     TCGv_i32 t_desc;
2353     unsigned vsz = pred_full_reg_size(s);
2354     unsigned desc;
2355
2356     desc = vsz - 2;
2357     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2358
2359     tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2360     t_desc = tcg_const_i32(desc);
2361
2362     gen_helper_sve_last_active_element(ret, t_p, t_desc);
2363
2364     tcg_temp_free_i32(t_desc);
2365     tcg_temp_free_ptr(t_p);
2366 }
2367
2368 /* Increment LAST to the offset of the next element in the vector,
2369  * wrapping around to 0.
2370  */
2371 static void incr_last_active(DisasContext *s, TCGv_i32 last, int esz)
2372 {
2373     unsigned vsz = vec_full_reg_size(s);
2374
2375     tcg_gen_addi_i32(last, last, 1 << esz);
2376     if (is_power_of_2(vsz)) {
2377         tcg_gen_andi_i32(last, last, vsz - 1);
2378     } else {
2379         TCGv_i32 max = tcg_const_i32(vsz);
2380         TCGv_i32 zero = tcg_const_i32(0);
2381         tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2382         tcg_temp_free_i32(max);
2383         tcg_temp_free_i32(zero);
2384     }
2385 }
2386
2387 /* If LAST < 0, set LAST to the offset of the last element in the vector.  */
2388 static void wrap_last_active(DisasContext *s, TCGv_i32 last, int esz)
2389 {
2390     unsigned vsz = vec_full_reg_size(s);
2391
2392     if (is_power_of_2(vsz)) {
2393         tcg_gen_andi_i32(last, last, vsz - 1);
2394     } else {
2395         TCGv_i32 max = tcg_const_i32(vsz - (1 << esz));
2396         TCGv_i32 zero = tcg_const_i32(0);
2397         tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2398         tcg_temp_free_i32(max);
2399         tcg_temp_free_i32(zero);
2400     }
2401 }
2402
2403 /* Load an unsigned element of ESZ from BASE+OFS.  */
2404 static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2405 {
2406     TCGv_i64 r = tcg_temp_new_i64();
2407
2408     switch (esz) {
2409     case 0:
2410         tcg_gen_ld8u_i64(r, base, ofs);
2411         break;
2412     case 1:
2413         tcg_gen_ld16u_i64(r, base, ofs);
2414         break;
2415     case 2:
2416         tcg_gen_ld32u_i64(r, base, ofs);
2417         break;
2418     case 3:
2419         tcg_gen_ld_i64(r, base, ofs);
2420         break;
2421     default:
2422         g_assert_not_reached();
2423     }
2424     return r;
2425 }
2426
2427 /* Load an unsigned element of ESZ from RM[LAST].  */
2428 static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2429                                  int rm, int esz)
2430 {
2431     TCGv_ptr p = tcg_temp_new_ptr();
2432     TCGv_i64 r;
2433
2434     /* Convert offset into vector into offset into ENV.
2435      * The final adjustment for the vector register base
2436      * is added via constant offset to the load.
2437      */
2438 #ifdef HOST_WORDS_BIGENDIAN
2439     /* Adjust for element ordering.  See vec_reg_offset.  */
2440     if (esz < 3) {
2441         tcg_gen_xori_i32(last, last, 8 - (1 << esz));
2442     }
2443 #endif
2444     tcg_gen_ext_i32_ptr(p, last);
2445     tcg_gen_add_ptr(p, p, cpu_env);
2446
2447     r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2448     tcg_temp_free_ptr(p);
2449
2450     return r;
2451 }
2452
2453 /* Compute CLAST for a Zreg.  */
2454 static bool do_clast_vector(DisasContext *s, arg_rprr_esz *a, bool before)
2455 {
2456     TCGv_i32 last;
2457     TCGLabel *over;
2458     TCGv_i64 ele;
2459     unsigned vsz, esz = a->esz;
2460
2461     if (!sve_access_check(s)) {
2462         return true;
2463     }
2464
2465     last = tcg_temp_local_new_i32();
2466     over = gen_new_label();
2467
2468     find_last_active(s, last, esz, a->pg);
2469
2470     /* There is of course no movcond for a 2048-bit vector,
2471      * so we must branch over the actual store.
2472      */
2473     tcg_gen_brcondi_i32(TCG_COND_LT, last, 0, over);
2474
2475     if (!before) {
2476         incr_last_active(s, last, esz);
2477     }
2478
2479     ele = load_last_active(s, last, a->rm, esz);
2480     tcg_temp_free_i32(last);
2481
2482     vsz = vec_full_reg_size(s);
2483     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2484     tcg_temp_free_i64(ele);
2485
2486     /* If this insn used MOVPRFX, we may need a second move.  */
2487     if (a->rd != a->rn) {
2488         TCGLabel *done = gen_new_label();
2489         tcg_gen_br(done);
2490
2491         gen_set_label(over);
2492         do_mov_z(s, a->rd, a->rn);
2493
2494         gen_set_label(done);
2495     } else {
2496         gen_set_label(over);
2497     }
2498     return true;
2499 }
2500
2501 static bool trans_CLASTA_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2502 {
2503     return do_clast_vector(s, a, false);
2504 }
2505
2506 static bool trans_CLASTB_z(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2507 {
2508     return do_clast_vector(s, a, true);
2509 }
2510
2511 /* Compute CLAST for a scalar.  */
2512 static void do_clast_scalar(DisasContext *s, int esz, int pg, int rm,
2513                             bool before, TCGv_i64 reg_val)
2514 {
2515     TCGv_i32 last = tcg_temp_new_i32();
2516     TCGv_i64 ele, cmp, zero;
2517
2518     find_last_active(s, last, esz, pg);
2519
2520     /* Extend the original value of last prior to incrementing.  */
2521     cmp = tcg_temp_new_i64();
2522     tcg_gen_ext_i32_i64(cmp, last);
2523
2524     if (!before) {
2525         incr_last_active(s, last, esz);
2526     }
2527
2528     /* The conceit here is that while last < 0 indicates not found, after
2529      * adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2530      * from which we can load garbage.  We then discard the garbage with
2531      * a conditional move.
2532      */
2533     ele = load_last_active(s, last, rm, esz);
2534     tcg_temp_free_i32(last);
2535
2536     zero = tcg_const_i64(0);
2537     tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2538
2539     tcg_temp_free_i64(zero);
2540     tcg_temp_free_i64(cmp);
2541     tcg_temp_free_i64(ele);
2542 }
2543
2544 /* Compute CLAST for a Vreg.  */
2545 static bool do_clast_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2546 {
2547     if (sve_access_check(s)) {
2548         int esz = a->esz;
2549         int ofs = vec_reg_offset(s, a->rd, 0, esz);
2550         TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2551
2552         do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2553         write_fp_dreg(s, a->rd, reg);
2554         tcg_temp_free_i64(reg);
2555     }
2556     return true;
2557 }
2558
2559 static bool trans_CLASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2560 {
2561     return do_clast_fp(s, a, false);
2562 }
2563
2564 static bool trans_CLASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2565 {
2566     return do_clast_fp(s, a, true);
2567 }
2568
2569 /* Compute CLAST for a Xreg.  */
2570 static bool do_clast_general(DisasContext *s, arg_rpr_esz *a, bool before)
2571 {
2572     TCGv_i64 reg;
2573
2574     if (!sve_access_check(s)) {
2575         return true;
2576     }
2577
2578     reg = cpu_reg(s, a->rd);
2579     switch (a->esz) {
2580     case 0:
2581         tcg_gen_ext8u_i64(reg, reg);
2582         break;
2583     case 1:
2584         tcg_gen_ext16u_i64(reg, reg);
2585         break;
2586     case 2:
2587         tcg_gen_ext32u_i64(reg, reg);
2588         break;
2589     case 3:
2590         break;
2591     default:
2592         g_assert_not_reached();
2593     }
2594
2595     do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2596     return true;
2597 }
2598
2599 static bool trans_CLASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2600 {
2601     return do_clast_general(s, a, false);
2602 }
2603
2604 static bool trans_CLASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2605 {
2606     return do_clast_general(s, a, true);
2607 }
2608
2609 /* Compute LAST for a scalar.  */
2610 static TCGv_i64 do_last_scalar(DisasContext *s, int esz,
2611                                int pg, int rm, bool before)
2612 {
2613     TCGv_i32 last = tcg_temp_new_i32();
2614     TCGv_i64 ret;
2615
2616     find_last_active(s, last, esz, pg);
2617     if (before) {
2618         wrap_last_active(s, last, esz);
2619     } else {
2620         incr_last_active(s, last, esz);
2621     }
2622
2623     ret = load_last_active(s, last, rm, esz);
2624     tcg_temp_free_i32(last);
2625     return ret;
2626 }
2627
2628 /* Compute LAST for a Vreg.  */
2629 static bool do_last_fp(DisasContext *s, arg_rpr_esz *a, bool before)
2630 {
2631     if (sve_access_check(s)) {
2632         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2633         write_fp_dreg(s, a->rd, val);
2634         tcg_temp_free_i64(val);
2635     }
2636     return true;
2637 }
2638
2639 static bool trans_LASTA_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2640 {
2641     return do_last_fp(s, a, false);
2642 }
2643
2644 static bool trans_LASTB_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2645 {
2646     return do_last_fp(s, a, true);
2647 }
2648
2649 /* Compute LAST for a Xreg.  */
2650 static bool do_last_general(DisasContext *s, arg_rpr_esz *a, bool before)
2651 {
2652     if (sve_access_check(s)) {
2653         TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2654         tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2655         tcg_temp_free_i64(val);
2656     }
2657     return true;
2658 }
2659
2660 static bool trans_LASTA_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2661 {
2662     return do_last_general(s, a, false);
2663 }
2664
2665 static bool trans_LASTB_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2666 {
2667     return do_last_general(s, a, true);
2668 }
2669
2670 static bool trans_CPY_m_r(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2671 {
2672     if (sve_access_check(s)) {
2673         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2674     }
2675     return true;
2676 }
2677
2678 static bool trans_CPY_m_v(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2679 {
2680     if (sve_access_check(s)) {
2681         int ofs = vec_reg_offset(s, a->rn, 0, a->esz);
2682         TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2683         do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2684         tcg_temp_free_i64(t);
2685     }
2686     return true;
2687 }
2688
2689 static bool trans_REVB(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2690 {
2691     static gen_helper_gvec_3 * const fns[4] = {
2692         NULL,
2693         gen_helper_sve_revb_h,
2694         gen_helper_sve_revb_s,
2695         gen_helper_sve_revb_d,
2696     };
2697     return do_zpz_ool(s, a, fns[a->esz]);
2698 }
2699
2700 static bool trans_REVH(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2701 {
2702     static gen_helper_gvec_3 * const fns[4] = {
2703         NULL,
2704         NULL,
2705         gen_helper_sve_revh_s,
2706         gen_helper_sve_revh_d,
2707     };
2708     return do_zpz_ool(s, a, fns[a->esz]);
2709 }
2710
2711 static bool trans_REVW(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2712 {
2713     return do_zpz_ool(s, a, a->esz == 3 ? gen_helper_sve_revw_d : NULL);
2714 }
2715
2716 static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
2717 {
2718     static gen_helper_gvec_3 * const fns[4] = {
2719         gen_helper_sve_rbit_b,
2720         gen_helper_sve_rbit_h,
2721         gen_helper_sve_rbit_s,
2722         gen_helper_sve_rbit_d,
2723     };
2724     return do_zpz_ool(s, a, fns[a->esz]);
2725 }
2726
2727 static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
2728 {
2729     if (sve_access_check(s)) {
2730         unsigned vsz = vec_full_reg_size(s);
2731         tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2732                            vec_full_reg_offset(s, a->rn),
2733                            vec_full_reg_offset(s, a->rm),
2734                            pred_full_reg_offset(s, a->pg),
2735                            vsz, vsz, a->esz, gen_helper_sve_splice);
2736     }
2737     return true;
2738 }
2739
2740 /*
2741  *** SVE Integer Compare - Vectors Group
2742  */
2743
2744 static bool do_ppzz_flags(DisasContext *s, arg_rprr_esz *a,
2745                           gen_helper_gvec_flags_4 *gen_fn)
2746 {
2747     TCGv_ptr pd, zn, zm, pg;
2748     unsigned vsz;
2749     TCGv_i32 t;
2750
2751     if (gen_fn == NULL) {
2752         return false;
2753     }
2754     if (!sve_access_check(s)) {
2755         return true;
2756     }
2757
2758     vsz = vec_full_reg_size(s);
2759     t = tcg_const_i32(simd_desc(vsz, vsz, 0));
2760     pd = tcg_temp_new_ptr();
2761     zn = tcg_temp_new_ptr();
2762     zm = tcg_temp_new_ptr();
2763     pg = tcg_temp_new_ptr();
2764
2765     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2766     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2767     tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2768     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2769
2770     gen_fn(t, pd, zn, zm, pg, t);
2771
2772     tcg_temp_free_ptr(pd);
2773     tcg_temp_free_ptr(zn);
2774     tcg_temp_free_ptr(zm);
2775     tcg_temp_free_ptr(pg);
2776
2777     do_pred_flags(t);
2778
2779     tcg_temp_free_i32(t);
2780     return true;
2781 }
2782
2783 #define DO_PPZZ(NAME, name) \
2784 static bool trans_##NAME##_ppzz(DisasContext *s, arg_rprr_esz *a,         \
2785                                 uint32_t insn)                            \
2786 {                                                                         \
2787     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2788         gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h,   \
2789         gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d,   \
2790     };                                                                    \
2791     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2792 }
2793
2794 DO_PPZZ(CMPEQ, cmpeq)
2795 DO_PPZZ(CMPNE, cmpne)
2796 DO_PPZZ(CMPGT, cmpgt)
2797 DO_PPZZ(CMPGE, cmpge)
2798 DO_PPZZ(CMPHI, cmphi)
2799 DO_PPZZ(CMPHS, cmphs)
2800
2801 #undef DO_PPZZ
2802
2803 #define DO_PPZW(NAME, name) \
2804 static bool trans_##NAME##_ppzw(DisasContext *s, arg_rprr_esz *a,         \
2805                                 uint32_t insn)                            \
2806 {                                                                         \
2807     static gen_helper_gvec_flags_4 * const fns[4] = {                     \
2808         gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h,   \
2809         gen_helper_sve_##name##_ppzw_s, NULL                              \
2810     };                                                                    \
2811     return do_ppzz_flags(s, a, fns[a->esz]);                              \
2812 }
2813
2814 DO_PPZW(CMPEQ, cmpeq)
2815 DO_PPZW(CMPNE, cmpne)
2816 DO_PPZW(CMPGT, cmpgt)
2817 DO_PPZW(CMPGE, cmpge)
2818 DO_PPZW(CMPHI, cmphi)
2819 DO_PPZW(CMPHS, cmphs)
2820 DO_PPZW(CMPLT, cmplt)
2821 DO_PPZW(CMPLE, cmple)
2822 DO_PPZW(CMPLO, cmplo)
2823 DO_PPZW(CMPLS, cmpls)
2824
2825 #undef DO_PPZW
2826
2827 /*
2828  *** SVE Integer Compare - Immediate Groups
2829  */
2830
2831 static bool do_ppzi_flags(DisasContext *s, arg_rpri_esz *a,
2832                           gen_helper_gvec_flags_3 *gen_fn)
2833 {
2834     TCGv_ptr pd, zn, pg;
2835     unsigned vsz;
2836     TCGv_i32 t;
2837
2838     if (gen_fn == NULL) {
2839         return false;
2840     }
2841     if (!sve_access_check(s)) {
2842         return true;
2843     }
2844
2845     vsz = vec_full_reg_size(s);
2846     t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2847     pd = tcg_temp_new_ptr();
2848     zn = tcg_temp_new_ptr();
2849     pg = tcg_temp_new_ptr();
2850
2851     tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2852     tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2853     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2854
2855     gen_fn(t, pd, zn, pg, t);
2856
2857     tcg_temp_free_ptr(pd);
2858     tcg_temp_free_ptr(zn);
2859     tcg_temp_free_ptr(pg);
2860
2861     do_pred_flags(t);
2862
2863     tcg_temp_free_i32(t);
2864     return true;
2865 }
2866
2867 #define DO_PPZI(NAME, name) \
2868 static bool trans_##NAME##_ppzi(DisasContext *s, arg_rpri_esz *a,         \
2869                                 uint32_t insn)                            \
2870 {                                                                         \
2871     static gen_helper_gvec_flags_3 * const fns[4] = {                     \
2872         gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h,   \
2873         gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d,   \
2874     };                                                                    \
2875     return do_ppzi_flags(s, a, fns[a->esz]);                              \
2876 }
2877
2878 DO_PPZI(CMPEQ, cmpeq)
2879 DO_PPZI(CMPNE, cmpne)
2880 DO_PPZI(CMPGT, cmpgt)
2881 DO_PPZI(CMPGE, cmpge)
2882 DO_PPZI(CMPHI, cmphi)
2883 DO_PPZI(CMPHS, cmphs)
2884 DO_PPZI(CMPLT, cmplt)
2885 DO_PPZI(CMPLE, cmple)
2886 DO_PPZI(CMPLO, cmplo)
2887 DO_PPZI(CMPLS, cmpls)
2888
2889 #undef DO_PPZI
2890
2891 /*
2892  *** SVE Partition Break Group
2893  */
2894
2895 static bool do_brk3(DisasContext *s, arg_rprr_s *a,
2896                     gen_helper_gvec_4 *fn, gen_helper_gvec_flags_4 *fn_s)
2897 {
2898     if (!sve_access_check(s)) {
2899         return true;
2900     }
2901
2902     unsigned vsz = pred_full_reg_size(s);
2903
2904     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2905     TCGv_ptr d = tcg_temp_new_ptr();
2906     TCGv_ptr n = tcg_temp_new_ptr();
2907     TCGv_ptr m = tcg_temp_new_ptr();
2908     TCGv_ptr g = tcg_temp_new_ptr();
2909     TCGv_i32 t = tcg_const_i32(vsz - 2);
2910
2911     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2912     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2913     tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2914     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2915
2916     if (a->s) {
2917         fn_s(t, d, n, m, g, t);
2918         do_pred_flags(t);
2919     } else {
2920         fn(d, n, m, g, t);
2921     }
2922     tcg_temp_free_ptr(d);
2923     tcg_temp_free_ptr(n);
2924     tcg_temp_free_ptr(m);
2925     tcg_temp_free_ptr(g);
2926     tcg_temp_free_i32(t);
2927     return true;
2928 }
2929
2930 static bool do_brk2(DisasContext *s, arg_rpr_s *a,
2931                     gen_helper_gvec_3 *fn, gen_helper_gvec_flags_3 *fn_s)
2932 {
2933     if (!sve_access_check(s)) {
2934         return true;
2935     }
2936
2937     unsigned vsz = pred_full_reg_size(s);
2938
2939     /* Predicate sizes may be smaller and cannot use simd_desc.  */
2940     TCGv_ptr d = tcg_temp_new_ptr();
2941     TCGv_ptr n = tcg_temp_new_ptr();
2942     TCGv_ptr g = tcg_temp_new_ptr();
2943     TCGv_i32 t = tcg_const_i32(vsz - 2);
2944
2945     tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2946     tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2947     tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2948
2949     if (a->s) {
2950         fn_s(t, d, n, g, t);
2951         do_pred_flags(t);
2952     } else {
2953         fn(d, n, g, t);
2954     }
2955     tcg_temp_free_ptr(d);
2956     tcg_temp_free_ptr(n);
2957     tcg_temp_free_ptr(g);
2958     tcg_temp_free_i32(t);
2959     return true;
2960 }
2961
2962 static bool trans_BRKPA(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2963 {
2964     return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2965 }
2966
2967 static bool trans_BRKPB(DisasContext *s, arg_rprr_s *a, uint32_t insn)
2968 {
2969     return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2970 }
2971
2972 static bool trans_BRKA_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2973 {
2974     return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2975 }
2976
2977 static bool trans_BRKB_m(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2978 {
2979     return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2980 }
2981
2982 static bool trans_BRKA_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2983 {
2984     return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2985 }
2986
2987 static bool trans_BRKB_z(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2988 {
2989     return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
2990 }
2991
2992 static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
2993 {
2994     return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
2995 }
2996
2997 /*
2998  *** SVE Predicate Count Group
2999  */
3000
3001 static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
3002 {
3003     unsigned psz = pred_full_reg_size(s);
3004
3005     if (psz <= 8) {
3006         uint64_t psz_mask;
3007
3008         tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3009         if (pn != pg) {
3010             TCGv_i64 g = tcg_temp_new_i64();
3011             tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3012             tcg_gen_and_i64(val, val, g);
3013             tcg_temp_free_i64(g);
3014         }
3015
3016         /* Reduce the pred_esz_masks value simply to reduce the
3017          * size of the code generated here.
3018          */
3019         psz_mask = MAKE_64BIT_MASK(0, psz * 8);
3020         tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3021
3022         tcg_gen_ctpop_i64(val, val);
3023     } else {
3024         TCGv_ptr t_pn = tcg_temp_new_ptr();
3025         TCGv_ptr t_pg = tcg_temp_new_ptr();
3026         unsigned desc;
3027         TCGv_i32 t_desc;
3028
3029         desc = psz - 2;
3030         desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
3031
3032         tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3033         tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3034         t_desc = tcg_const_i32(desc);
3035
3036         gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3037         tcg_temp_free_ptr(t_pn);
3038         tcg_temp_free_ptr(t_pg);
3039         tcg_temp_free_i32(t_desc);
3040     }
3041 }
3042
3043 static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3044 {
3045     if (sve_access_check(s)) {
3046         do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3047     }
3048     return true;
3049 }
3050
3051 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3052                             uint32_t insn)
3053 {
3054     if (sve_access_check(s)) {
3055         TCGv_i64 reg = cpu_reg(s, a->rd);
3056         TCGv_i64 val = tcg_temp_new_i64();
3057
3058         do_cntp(s, val, a->esz, a->pg, a->pg);
3059         if (a->d) {
3060             tcg_gen_sub_i64(reg, reg, val);
3061         } else {
3062             tcg_gen_add_i64(reg, reg, val);
3063         }
3064         tcg_temp_free_i64(val);
3065     }
3066     return true;
3067 }
3068
3069 static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3070                             uint32_t insn)
3071 {
3072     if (a->esz == 0) {
3073         return false;
3074     }
3075     if (sve_access_check(s)) {
3076         unsigned vsz = vec_full_reg_size(s);
3077         TCGv_i64 val = tcg_temp_new_i64();
3078         GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3079
3080         do_cntp(s, val, a->esz, a->pg, a->pg);
3081         gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3082                 vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3083     }
3084     return true;
3085 }
3086
3087 static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3088                                 uint32_t insn)
3089 {
3090     if (sve_access_check(s)) {
3091         TCGv_i64 reg = cpu_reg(s, a->rd);
3092         TCGv_i64 val = tcg_temp_new_i64();
3093
3094         do_cntp(s, val, a->esz, a->pg, a->pg);
3095         do_sat_addsub_32(reg, val, a->u, a->d);
3096     }
3097     return true;
3098 }
3099
3100 static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3101                                 uint32_t insn)
3102 {
3103     if (sve_access_check(s)) {
3104         TCGv_i64 reg = cpu_reg(s, a->rd);
3105         TCGv_i64 val = tcg_temp_new_i64();
3106
3107         do_cntp(s, val, a->esz, a->pg, a->pg);
3108         do_sat_addsub_64(reg, val, a->u, a->d);
3109     }
3110     return true;
3111 }
3112
3113 static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3114                              uint32_t insn)
3115 {
3116     if (a->esz == 0) {
3117         return false;
3118     }
3119     if (sve_access_check(s)) {
3120         TCGv_i64 val = tcg_temp_new_i64();
3121         do_cntp(s, val, a->esz, a->pg, a->pg);
3122         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3123     }
3124     return true;
3125 }
3126
3127 /*
3128  *** SVE Integer Compare Scalars Group
3129  */
3130
3131 static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
3132 {
3133     if (!sve_access_check(s)) {
3134         return true;
3135     }
3136
3137     TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3138     TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3139     TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3140     TCGv_i64 cmp = tcg_temp_new_i64();
3141
3142     tcg_gen_setcond_i64(cond, cmp, rn, rm);
3143     tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3144     tcg_temp_free_i64(cmp);
3145
3146     /* VF = !NF & !CF.  */
3147     tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
3148     tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3149
3150     /* Both NF and VF actually look at bit 31.  */
3151     tcg_gen_neg_i32(cpu_NF, cpu_NF);
3152     tcg_gen_neg_i32(cpu_VF, cpu_VF);
3153     return true;
3154 }
3155
3156 static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
3157 {
3158     if (!sve_access_check(s)) {
3159         return true;
3160     }
3161
3162     TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
3163     TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
3164     TCGv_i64 t0 = tcg_temp_new_i64();
3165     TCGv_i64 t1 = tcg_temp_new_i64();
3166     TCGv_i32 t2, t3;
3167     TCGv_ptr ptr;
3168     unsigned desc, vsz = vec_full_reg_size(s);
3169     TCGCond cond;
3170
3171     if (!a->sf) {
3172         if (a->u) {
3173             tcg_gen_ext32u_i64(op0, op0);
3174             tcg_gen_ext32u_i64(op1, op1);
3175         } else {
3176             tcg_gen_ext32s_i64(op0, op0);
3177             tcg_gen_ext32s_i64(op1, op1);
3178         }
3179     }
3180
3181     /* For the helper, compress the different conditions into a computation
3182      * of how many iterations for which the condition is true.
3183      *
3184      * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
3185      * 2**64 iterations, overflowing to 0.  Of course, predicate registers
3186      * aren't that large, so any value >= predicate size is sufficient.
3187      */
3188     tcg_gen_sub_i64(t0, op1, op0);
3189
3190     /* t0 = MIN(op1 - op0, vsz).  */
3191     tcg_gen_movi_i64(t1, vsz);
3192     tcg_gen_umin_i64(t0, t0, t1);
3193     if (a->eq) {
3194         /* Equality means one more iteration.  */
3195         tcg_gen_addi_i64(t0, t0, 1);
3196     }
3197
3198     /* t0 = (condition true ? t0 : 0).  */
3199     cond = (a->u
3200             ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3201             : (a->eq ? TCG_COND_LE : TCG_COND_LT));
3202     tcg_gen_movi_i64(t1, 0);
3203     tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3204
3205     t2 = tcg_temp_new_i32();
3206     tcg_gen_extrl_i64_i32(t2, t0);
3207     tcg_temp_free_i64(t0);
3208     tcg_temp_free_i64(t1);
3209
3210     desc = (vsz / 8) - 2;
3211     desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
3212     t3 = tcg_const_i32(desc);
3213
3214     ptr = tcg_temp_new_ptr();
3215     tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3216
3217     gen_helper_sve_while(t2, ptr, t2, t3);
3218     do_pred_flags(t2);
3219
3220     tcg_temp_free_ptr(ptr);
3221     tcg_temp_free_i32(t2);
3222     tcg_temp_free_i32(t3);
3223     return true;
3224 }
3225
3226 /*
3227  *** SVE Integer Wide Immediate - Unpredicated Group
3228  */
3229
3230 static bool trans_FDUP(DisasContext *s, arg_FDUP *a, uint32_t insn)
3231 {
3232     if (a->esz == 0) {
3233         return false;
3234     }
3235     if (sve_access_check(s)) {
3236         unsigned vsz = vec_full_reg_size(s);
3237         int dofs = vec_full_reg_offset(s, a->rd);
3238         uint64_t imm;
3239
3240         /* Decode the VFP immediate.  */
3241         imm = vfp_expand_imm(a->esz, a->imm);
3242         imm = dup_const(a->esz, imm);
3243
3244         tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3245     }
3246     return true;
3247 }
3248
3249 static bool trans_DUP_i(DisasContext *s, arg_DUP_i *a, uint32_t insn)
3250 {
3251     if (a->esz == 0 && extract32(insn, 13, 1)) {
3252         return false;
3253     }
3254     if (sve_access_check(s)) {
3255         unsigned vsz = vec_full_reg_size(s);
3256         int dofs = vec_full_reg_offset(s, a->rd);
3257
3258         tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3259     }
3260     return true;
3261 }
3262
3263 static bool trans_ADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3264 {
3265     if (a->esz == 0 && extract32(insn, 13, 1)) {
3266         return false;
3267     }
3268     if (sve_access_check(s)) {
3269         unsigned vsz = vec_full_reg_size(s);
3270         tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3271                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3272     }
3273     return true;
3274 }
3275
3276 static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3277 {
3278     a->imm = -a->imm;
3279     return trans_ADD_zzi(s, a, insn);
3280 }
3281
3282 static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3283 {
3284     static const GVecGen2s op[4] = {
3285         { .fni8 = tcg_gen_vec_sub8_i64,
3286           .fniv = tcg_gen_sub_vec,
3287           .fno = gen_helper_sve_subri_b,
3288           .opc = INDEX_op_sub_vec,
3289           .vece = MO_8,
3290           .scalar_first = true },
3291         { .fni8 = tcg_gen_vec_sub16_i64,
3292           .fniv = tcg_gen_sub_vec,
3293           .fno = gen_helper_sve_subri_h,
3294           .opc = INDEX_op_sub_vec,
3295           .vece = MO_16,
3296           .scalar_first = true },
3297         { .fni4 = tcg_gen_sub_i32,
3298           .fniv = tcg_gen_sub_vec,
3299           .fno = gen_helper_sve_subri_s,
3300           .opc = INDEX_op_sub_vec,
3301           .vece = MO_32,
3302           .scalar_first = true },
3303         { .fni8 = tcg_gen_sub_i64,
3304           .fniv = tcg_gen_sub_vec,
3305           .fno = gen_helper_sve_subri_d,
3306           .opc = INDEX_op_sub_vec,
3307           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3308           .vece = MO_64,
3309           .scalar_first = true }
3310     };
3311
3312     if (a->esz == 0 && extract32(insn, 13, 1)) {
3313         return false;
3314     }
3315     if (sve_access_check(s)) {
3316         unsigned vsz = vec_full_reg_size(s);
3317         TCGv_i64 c = tcg_const_i64(a->imm);
3318         tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3319                         vec_full_reg_offset(s, a->rn),
3320                         vsz, vsz, c, &op[a->esz]);
3321         tcg_temp_free_i64(c);
3322     }
3323     return true;
3324 }
3325
3326 static bool trans_MUL_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3327 {
3328     if (sve_access_check(s)) {
3329         unsigned vsz = vec_full_reg_size(s);
3330         tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3331                           vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3332     }
3333     return true;
3334 }
3335
3336 static bool do_zzi_sat(DisasContext *s, arg_rri_esz *a, uint32_t insn,
3337                        bool u, bool d)
3338 {
3339     if (a->esz == 0 && extract32(insn, 13, 1)) {
3340         return false;
3341     }
3342     if (sve_access_check(s)) {
3343         TCGv_i64 val = tcg_const_i64(a->imm);
3344         do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3345         tcg_temp_free_i64(val);
3346     }
3347     return true;
3348 }
3349
3350 static bool trans_SQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3351 {
3352     return do_zzi_sat(s, a, insn, false, false);
3353 }
3354
3355 static bool trans_UQADD_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3356 {
3357     return do_zzi_sat(s, a, insn, true, false);
3358 }
3359
3360 static bool trans_SQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3361 {
3362     return do_zzi_sat(s, a, insn, false, true);
3363 }
3364
3365 static bool trans_UQSUB_zzi(DisasContext *s, arg_rri_esz *a, uint32_t insn)
3366 {
3367     return do_zzi_sat(s, a, insn, true, true);
3368 }
3369
3370 static bool do_zzi_ool(DisasContext *s, arg_rri_esz *a, gen_helper_gvec_2i *fn)
3371 {
3372     if (sve_access_check(s)) {
3373         unsigned vsz = vec_full_reg_size(s);
3374         TCGv_i64 c = tcg_const_i64(a->imm);
3375
3376         tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3377                             vec_full_reg_offset(s, a->rn),
3378                             c, vsz, vsz, 0, fn);
3379         tcg_temp_free_i64(c);
3380     }
3381     return true;
3382 }
3383
3384 #define DO_ZZI(NAME, name) \
3385 static bool trans_##NAME##_zzi(DisasContext *s, arg_rri_esz *a,         \
3386                                uint32_t insn)                           \
3387 {                                                                       \
3388     static gen_helper_gvec_2i * const fns[4] = {                        \
3389         gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h,         \
3390         gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d,         \
3391     };                                                                  \
3392     return do_zzi_ool(s, a, fns[a->esz]);                               \
3393 }
3394
3395 DO_ZZI(SMAX, smax)
3396 DO_ZZI(UMAX, umax)
3397 DO_ZZI(SMIN, smin)
3398 DO_ZZI(UMIN, umin)
3399
3400 #undef DO_ZZI
3401
3402 /*
3403  *** SVE Floating Point Accumulating Reduction Group
3404  */
3405
3406 static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a, uint32_t insn)
3407 {
3408     typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3409                           TCGv_ptr, TCGv_ptr, TCGv_i32);
3410     static fadda_fn * const fns[3] = {
3411         gen_helper_sve_fadda_h,
3412         gen_helper_sve_fadda_s,
3413         gen_helper_sve_fadda_d,
3414     };
3415     unsigned vsz = vec_full_reg_size(s);
3416     TCGv_ptr t_rm, t_pg, t_fpst;
3417     TCGv_i64 t_val;
3418     TCGv_i32 t_desc;
3419
3420     if (a->esz == 0) {
3421         return false;
3422     }
3423     if (!sve_access_check(s)) {
3424         return true;
3425     }
3426
3427     t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, 0, a->esz), a->esz);
3428     t_rm = tcg_temp_new_ptr();
3429     t_pg = tcg_temp_new_ptr();
3430     tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3431     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3432     t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3433     t_desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
3434
3435     fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3436
3437     tcg_temp_free_i32(t_desc);
3438     tcg_temp_free_ptr(t_fpst);
3439     tcg_temp_free_ptr(t_pg);
3440     tcg_temp_free_ptr(t_rm);
3441
3442     write_fp_dreg(s, a->rd, t_val);
3443     tcg_temp_free_i64(t_val);
3444     return true;
3445 }
3446
3447 /*
3448  *** SVE Floating Point Arithmetic - Unpredicated Group
3449  */
3450
3451 static bool do_zzz_fp(DisasContext *s, arg_rrr_esz *a,
3452                       gen_helper_gvec_3_ptr *fn)
3453 {
3454     if (fn == NULL) {
3455         return false;
3456     }
3457     if (sve_access_check(s)) {
3458         unsigned vsz = vec_full_reg_size(s);
3459         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3460         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3461                            vec_full_reg_offset(s, a->rn),
3462                            vec_full_reg_offset(s, a->rm),
3463                            status, vsz, vsz, 0, fn);
3464         tcg_temp_free_ptr(status);
3465     }
3466     return true;
3467 }
3468
3469
3470 #define DO_FP3(NAME, name) \
3471 static bool trans_##NAME(DisasContext *s, arg_rrr_esz *a, uint32_t insn) \
3472 {                                                                   \
3473     static gen_helper_gvec_3_ptr * const fns[4] = {                 \
3474         NULL, gen_helper_gvec_##name##_h,                           \
3475         gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d      \
3476     };                                                              \
3477     return do_zzz_fp(s, a, fns[a->esz]);                            \
3478 }
3479
3480 DO_FP3(FADD_zzz, fadd)
3481 DO_FP3(FSUB_zzz, fsub)
3482 DO_FP3(FMUL_zzz, fmul)
3483 DO_FP3(FTSMUL, ftsmul)
3484 DO_FP3(FRECPS, recps)
3485 DO_FP3(FRSQRTS, rsqrts)
3486
3487 #undef DO_FP3
3488
3489 /*
3490  *** SVE Floating Point Arithmetic - Predicated Group
3491  */
3492
3493 static bool do_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
3494                        gen_helper_gvec_4_ptr *fn)
3495 {
3496     if (fn == NULL) {
3497         return false;
3498     }
3499     if (sve_access_check(s)) {
3500         unsigned vsz = vec_full_reg_size(s);
3501         TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3502         tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3503                            vec_full_reg_offset(s, a->rn),
3504                            vec_full_reg_offset(s, a->rm),
3505                            pred_full_reg_offset(s, a->pg),
3506                            status, vsz, vsz, 0, fn);
3507         tcg_temp_free_ptr(status);
3508     }
3509     return true;
3510 }
3511
3512 #define DO_FP3(NAME, name) \
3513 static bool trans_##NAME(DisasContext *s, arg_rprr_esz *a, uint32_t insn) \
3514 {                                                                   \
3515     static gen_helper_gvec_4_ptr * const fns[4] = {                 \
3516         NULL, gen_helper_sve_##name##_h,                            \
3517         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d        \
3518     };                                                              \
3519     return do_zpzz_fp(s, a, fns[a->esz]);                           \
3520 }
3521
3522 DO_FP3(FADD_zpzz, fadd)
3523 DO_FP3(FSUB_zpzz, fsub)
3524 DO_FP3(FMUL_zpzz, fmul)
3525 DO_FP3(FMIN_zpzz, fmin)
3526 DO_FP3(FMAX_zpzz, fmax)
3527 DO_FP3(FMINNM_zpzz, fminnum)
3528 DO_FP3(FMAXNM_zpzz, fmaxnum)
3529 DO_FP3(FABD, fabd)
3530 DO_FP3(FSCALE, fscalbn)
3531 DO_FP3(FDIV, fdiv)
3532 DO_FP3(FMULX, fmulx)
3533
3534 #undef DO_FP3
3535
3536 typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3537
3538 static bool do_fmla(DisasContext *s, arg_rprrr_esz *a, gen_helper_sve_fmla *fn)
3539 {
3540     if (fn == NULL) {
3541         return false;
3542     }
3543     if (!sve_access_check(s)) {
3544         return true;
3545     }
3546
3547     unsigned vsz = vec_full_reg_size(s);
3548     unsigned desc;
3549     TCGv_i32 t_desc;
3550     TCGv_ptr pg = tcg_temp_new_ptr();
3551
3552     /* We would need 7 operands to pass these arguments "properly".
3553      * So we encode all the register numbers into the descriptor.
3554      */
3555     desc = deposit32(a->rd, 5, 5, a->rn);
3556     desc = deposit32(desc, 10, 5, a->rm);
3557     desc = deposit32(desc, 15, 5, a->ra);
3558     desc = simd_desc(vsz, vsz, desc);
3559
3560     t_desc = tcg_const_i32(desc);
3561     tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3562     fn(cpu_env, pg, t_desc);
3563     tcg_temp_free_i32(t_desc);
3564     tcg_temp_free_ptr(pg);
3565     return true;
3566 }
3567
3568 #define DO_FMLA(NAME, name) \
3569 static bool trans_##NAME(DisasContext *s, arg_rprrr_esz *a, uint32_t insn) \
3570 {                                                                    \
3571     static gen_helper_sve_fmla * const fns[4] = {                    \
3572         NULL, gen_helper_sve_##name##_h,                             \
3573         gen_helper_sve_##name##_s, gen_helper_sve_##name##_d         \
3574     };                                                               \
3575     return do_fmla(s, a, fns[a->esz]);                               \
3576 }
3577
3578 DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3579 DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3580 DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3581 DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3582
3583 #undef DO_FMLA
3584
3585 /*
3586  *** SVE Floating Point Unary Operations Predicated Group
3587  */
3588
3589 static bool do_zpz_ptr(DisasContext *s, int rd, int rn, int pg,
3590                        bool is_fp16, gen_helper_gvec_3_ptr *fn)
3591 {
3592     if (sve_access_check(s)) {
3593         unsigned vsz = vec_full_reg_size(s);
3594         TCGv_ptr status = get_fpstatus_ptr(is_fp16);
3595         tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
3596                            vec_full_reg_offset(s, rn),
3597                            pred_full_reg_offset(s, pg),
3598                            status, vsz, vsz, 0, fn);
3599         tcg_temp_free_ptr(status);
3600     }
3601     return true;
3602 }
3603
3604 static bool trans_SCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3605 {
3606     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
3607 }
3608
3609 static bool trans_SCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3610 {
3611     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
3612 }
3613
3614 static bool trans_SCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3615 {
3616     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
3617 }
3618
3619 static bool trans_SCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3620 {
3621     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
3622 }
3623
3624 static bool trans_SCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3625 {
3626     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
3627 }
3628
3629 static bool trans_SCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3630 {
3631     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
3632 }
3633
3634 static bool trans_SCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3635 {
3636     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
3637 }
3638
3639 static bool trans_UCVTF_hh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3640 {
3641     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
3642 }
3643
3644 static bool trans_UCVTF_sh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3645 {
3646     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
3647 }
3648
3649 static bool trans_UCVTF_dh(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3650 {
3651     return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
3652 }
3653
3654 static bool trans_UCVTF_ss(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3655 {
3656     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
3657 }
3658
3659 static bool trans_UCVTF_ds(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3660 {
3661     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
3662 }
3663
3664 static bool trans_UCVTF_sd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3665 {
3666     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
3667 }
3668
3669 static bool trans_UCVTF_dd(DisasContext *s, arg_rpr_esz *a, uint32_t insn)
3670 {
3671     return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
3672 }
3673
3674 /*
3675  *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
3676  */
3677
3678 /* Subroutine loading a vector register at VOFS of LEN bytes.
3679  * The load should begin at the address Rn + IMM.
3680  */
3681
3682 static void do_ldr(DisasContext *s, uint32_t vofs, uint32_t len,
3683                    int rn, int imm)
3684 {
3685     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3686     uint32_t len_remain = len % 8;
3687     uint32_t nparts = len / 8 + ctpop8(len_remain);
3688     int midx = get_mem_index(s);
3689     TCGv_i64 addr, t0, t1;
3690
3691     addr = tcg_temp_new_i64();
3692     t0 = tcg_temp_new_i64();
3693
3694     /* Note that unpredicated load/store of vector/predicate registers
3695      * are defined as a stream of bytes, which equates to little-endian
3696      * operations on larger quantities.  There is no nice way to force
3697      * a little-endian load for aarch64_be-linux-user out of line.
3698      *
3699      * Attempt to keep code expansion to a minimum by limiting the
3700      * amount of unrolling done.
3701      */
3702     if (nparts <= 4) {
3703         int i;
3704
3705         for (i = 0; i < len_align; i += 8) {
3706             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3707             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3708             tcg_gen_st_i64(t0, cpu_env, vofs + i);
3709         }
3710     } else {
3711         TCGLabel *loop = gen_new_label();
3712         TCGv_ptr tp, i = tcg_const_local_ptr(0);
3713
3714         gen_set_label(loop);
3715
3716         /* Minimize the number of local temps that must be re-read from
3717          * the stack each iteration.  Instead, re-compute values other
3718          * than the loop counter.
3719          */
3720         tp = tcg_temp_new_ptr();
3721         tcg_gen_addi_ptr(tp, i, imm);
3722         tcg_gen_extu_ptr_i64(addr, tp);
3723         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3724
3725         tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
3726
3727         tcg_gen_add_ptr(tp, cpu_env, i);
3728         tcg_gen_addi_ptr(i, i, 8);
3729         tcg_gen_st_i64(t0, tp, vofs);
3730         tcg_temp_free_ptr(tp);
3731
3732         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3733         tcg_temp_free_ptr(i);
3734     }
3735
3736     /* Predicate register loads can be any multiple of 2.
3737      * Note that we still store the entire 64-bit unit into cpu_env.
3738      */
3739     if (len_remain) {
3740         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3741
3742         switch (len_remain) {
3743         case 2:
3744         case 4:
3745         case 8:
3746             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3747             break;
3748
3749         case 6:
3750             t1 = tcg_temp_new_i64();
3751             tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
3752             tcg_gen_addi_i64(addr, addr, 4);
3753             tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
3754             tcg_gen_deposit_i64(t0, t0, t1, 32, 32);
3755             tcg_temp_free_i64(t1);
3756             break;
3757
3758         default:
3759             g_assert_not_reached();
3760         }
3761         tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
3762     }
3763     tcg_temp_free_i64(addr);
3764     tcg_temp_free_i64(t0);
3765 }
3766
3767 /* Similarly for stores.  */
3768 static void do_str(DisasContext *s, uint32_t vofs, uint32_t len,
3769                    int rn, int imm)
3770 {
3771     uint32_t len_align = QEMU_ALIGN_DOWN(len, 8);
3772     uint32_t len_remain = len % 8;
3773     uint32_t nparts = len / 8 + ctpop8(len_remain);
3774     int midx = get_mem_index(s);
3775     TCGv_i64 addr, t0;
3776
3777     addr = tcg_temp_new_i64();
3778     t0 = tcg_temp_new_i64();
3779
3780     /* Note that unpredicated load/store of vector/predicate registers
3781      * are defined as a stream of bytes, which equates to little-endian
3782      * operations on larger quantities.  There is no nice way to force
3783      * a little-endian store for aarch64_be-linux-user out of line.
3784      *
3785      * Attempt to keep code expansion to a minimum by limiting the
3786      * amount of unrolling done.
3787      */
3788     if (nparts <= 4) {
3789         int i;
3790
3791         for (i = 0; i < len_align; i += 8) {
3792             tcg_gen_ld_i64(t0, cpu_env, vofs + i);
3793             tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
3794             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3795         }
3796     } else {
3797         TCGLabel *loop = gen_new_label();
3798         TCGv_ptr t2, i = tcg_const_local_ptr(0);
3799
3800         gen_set_label(loop);
3801
3802         t2 = tcg_temp_new_ptr();
3803         tcg_gen_add_ptr(t2, cpu_env, i);
3804         tcg_gen_ld_i64(t0, t2, vofs);
3805
3806         /* Minimize the number of local temps that must be re-read from
3807          * the stack each iteration.  Instead, re-compute values other
3808          * than the loop counter.
3809          */
3810         tcg_gen_addi_ptr(t2, i, imm);
3811         tcg_gen_extu_ptr_i64(addr, t2);
3812         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
3813         tcg_temp_free_ptr(t2);
3814
3815         tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
3816
3817         tcg_gen_addi_ptr(i, i, 8);
3818
3819         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
3820         tcg_temp_free_ptr(i);
3821     }
3822
3823     /* Predicate register stores can be any multiple of 2.  */
3824     if (len_remain) {
3825         tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
3826         tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
3827
3828         switch (len_remain) {
3829         case 2:
3830         case 4:
3831         case 8:
3832             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE | ctz32(len_remain));
3833             break;
3834
3835         case 6:
3836             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
3837             tcg_gen_addi_i64(addr, addr, 4);
3838             tcg_gen_shri_i64(t0, t0, 32);
3839             tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
3840             break;
3841
3842         default:
3843             g_assert_not_reached();
3844         }
3845     }
3846     tcg_temp_free_i64(addr);
3847     tcg_temp_free_i64(t0);
3848 }
3849
3850 static bool trans_LDR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3851 {
3852     if (sve_access_check(s)) {
3853         int size = vec_full_reg_size(s);
3854         int off = vec_full_reg_offset(s, a->rd);
3855         do_ldr(s, off, size, a->rn, a->imm * size);
3856     }
3857     return true;
3858 }
3859
3860 static bool trans_LDR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3861 {
3862     if (sve_access_check(s)) {
3863         int size = pred_full_reg_size(s);
3864         int off = pred_full_reg_offset(s, a->rd);
3865         do_ldr(s, off, size, a->rn, a->imm * size);
3866     }
3867     return true;
3868 }
3869
3870 static bool trans_STR_zri(DisasContext *s, arg_rri *a, uint32_t insn)
3871 {
3872     if (sve_access_check(s)) {
3873         int size = vec_full_reg_size(s);
3874         int off = vec_full_reg_offset(s, a->rd);
3875         do_str(s, off, size, a->rn, a->imm * size);
3876     }
3877     return true;
3878 }
3879
3880 static bool trans_STR_pri(DisasContext *s, arg_rri *a, uint32_t insn)
3881 {
3882     if (sve_access_check(s)) {
3883         int size = pred_full_reg_size(s);
3884         int off = pred_full_reg_offset(s, a->rd);
3885         do_str(s, off, size, a->rn, a->imm * size);
3886     }
3887     return true;
3888 }
3889
3890 /*
3891  *** SVE Memory - Contiguous Load Group
3892  */
3893
3894 /* The memory mode of the dtype.  */
3895 static const TCGMemOp dtype_mop[16] = {
3896     MO_UB, MO_UB, MO_UB, MO_UB,
3897     MO_SL, MO_UW, MO_UW, MO_UW,
3898     MO_SW, MO_SW, MO_UL, MO_UL,
3899     MO_SB, MO_SB, MO_SB, MO_Q
3900 };
3901
3902 #define dtype_msz(x)  (dtype_mop[x] & MO_SIZE)
3903
3904 /* The vector element size of dtype.  */
3905 static const uint8_t dtype_esz[16] = {
3906     0, 1, 2, 3,
3907     3, 1, 2, 3,
3908     3, 2, 2, 3,
3909     3, 2, 1, 3
3910 };
3911
3912 static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
3913                        gen_helper_gvec_mem *fn)
3914 {
3915     unsigned vsz = vec_full_reg_size(s);
3916     TCGv_ptr t_pg;
3917     TCGv_i32 desc;
3918
3919     /* For e.g. LD4, there are not enough arguments to pass all 4
3920      * registers as pointers, so encode the regno into the data field.
3921      * For consistency, do this even for LD1.
3922      */
3923     desc = tcg_const_i32(simd_desc(vsz, vsz, zt));
3924     t_pg = tcg_temp_new_ptr();
3925
3926     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3927     fn(cpu_env, t_pg, addr, desc);
3928
3929     tcg_temp_free_ptr(t_pg);
3930     tcg_temp_free_i32(desc);
3931 }
3932
3933 static void do_ld_zpa(DisasContext *s, int zt, int pg,
3934                       TCGv_i64 addr, int dtype, int nreg)
3935 {
3936     static gen_helper_gvec_mem * const fns[16][4] = {
3937         { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
3938           gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
3939         { gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
3940         { gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
3941         { gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
3942
3943         { gen_helper_sve_ld1sds_r, NULL, NULL, NULL },
3944         { gen_helper_sve_ld1hh_r, gen_helper_sve_ld2hh_r,
3945           gen_helper_sve_ld3hh_r, gen_helper_sve_ld4hh_r },
3946         { gen_helper_sve_ld1hsu_r, NULL, NULL, NULL },
3947         { gen_helper_sve_ld1hdu_r, NULL, NULL, NULL },
3948
3949         { gen_helper_sve_ld1hds_r, NULL, NULL, NULL },
3950         { gen_helper_sve_ld1hss_r, NULL, NULL, NULL },
3951         { gen_helper_sve_ld1ss_r, gen_helper_sve_ld2ss_r,
3952           gen_helper_sve_ld3ss_r, gen_helper_sve_ld4ss_r },
3953         { gen_helper_sve_ld1sdu_r, NULL, NULL, NULL },
3954
3955         { gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
3956         { gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
3957         { gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
3958         { gen_helper_sve_ld1dd_r, gen_helper_sve_ld2dd_r,
3959           gen_helper_sve_ld3dd_r, gen_helper_sve_ld4dd_r },
3960     };
3961     gen_helper_gvec_mem *fn = fns[dtype][nreg];
3962
3963     /* While there are holes in the table, they are not
3964      * accessible via the instruction encoding.
3965      */
3966     assert(fn != NULL);
3967     do_mem_zpa(s, zt, pg, addr, fn);
3968 }
3969
3970 static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
3971 {
3972     if (a->rm == 31) {
3973         return false;
3974     }
3975     if (sve_access_check(s)) {
3976         TCGv_i64 addr = new_tmp_a64(s);
3977         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm),
3978                          (a->nreg + 1) << dtype_msz(a->dtype));
3979         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
3980         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3981     }
3982     return true;
3983 }
3984
3985 static bool trans_LD_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
3986 {
3987     if (sve_access_check(s)) {
3988         int vsz = vec_full_reg_size(s);
3989         int elements = vsz >> dtype_esz[a->dtype];
3990         TCGv_i64 addr = new_tmp_a64(s);
3991
3992         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
3993                          (a->imm * elements * (a->nreg + 1))
3994                          << dtype_msz(a->dtype));
3995         do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
3996     }
3997     return true;
3998 }
3999
4000 static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4001 {
4002     static gen_helper_gvec_mem * const fns[16] = {
4003         gen_helper_sve_ldff1bb_r,
4004         gen_helper_sve_ldff1bhu_r,
4005         gen_helper_sve_ldff1bsu_r,
4006         gen_helper_sve_ldff1bdu_r,
4007
4008         gen_helper_sve_ldff1sds_r,
4009         gen_helper_sve_ldff1hh_r,
4010         gen_helper_sve_ldff1hsu_r,
4011         gen_helper_sve_ldff1hdu_r,
4012
4013         gen_helper_sve_ldff1hds_r,
4014         gen_helper_sve_ldff1hss_r,
4015         gen_helper_sve_ldff1ss_r,
4016         gen_helper_sve_ldff1sdu_r,
4017
4018         gen_helper_sve_ldff1bds_r,
4019         gen_helper_sve_ldff1bss_r,
4020         gen_helper_sve_ldff1bhs_r,
4021         gen_helper_sve_ldff1dd_r,
4022     };
4023
4024     if (sve_access_check(s)) {
4025         TCGv_i64 addr = new_tmp_a64(s);
4026         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4027         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4028         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4029     }
4030     return true;
4031 }
4032
4033 static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4034 {
4035     static gen_helper_gvec_mem * const fns[16] = {
4036         gen_helper_sve_ldnf1bb_r,
4037         gen_helper_sve_ldnf1bhu_r,
4038         gen_helper_sve_ldnf1bsu_r,
4039         gen_helper_sve_ldnf1bdu_r,
4040
4041         gen_helper_sve_ldnf1sds_r,
4042         gen_helper_sve_ldnf1hh_r,
4043         gen_helper_sve_ldnf1hsu_r,
4044         gen_helper_sve_ldnf1hdu_r,
4045
4046         gen_helper_sve_ldnf1hds_r,
4047         gen_helper_sve_ldnf1hss_r,
4048         gen_helper_sve_ldnf1ss_r,
4049         gen_helper_sve_ldnf1sdu_r,
4050
4051         gen_helper_sve_ldnf1bds_r,
4052         gen_helper_sve_ldnf1bss_r,
4053         gen_helper_sve_ldnf1bhs_r,
4054         gen_helper_sve_ldnf1dd_r,
4055     };
4056
4057     if (sve_access_check(s)) {
4058         int vsz = vec_full_reg_size(s);
4059         int elements = vsz >> dtype_esz[a->dtype];
4060         int off = (a->imm * elements) << dtype_msz(a->dtype);
4061         TCGv_i64 addr = new_tmp_a64(s);
4062
4063         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4064         do_mem_zpa(s, a->rd, a->pg, addr, fns[a->dtype]);
4065     }
4066     return true;
4067 }
4068
4069 static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
4070 {
4071     static gen_helper_gvec_mem * const fns[4] = {
4072         gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_r,
4073         gen_helper_sve_ld1ss_r, gen_helper_sve_ld1dd_r,
4074     };
4075     unsigned vsz = vec_full_reg_size(s);
4076     TCGv_ptr t_pg;
4077     TCGv_i32 desc;
4078
4079     /* Load the first quadword using the normal predicated load helpers.  */
4080     desc = tcg_const_i32(simd_desc(16, 16, zt));
4081     t_pg = tcg_temp_new_ptr();
4082
4083     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4084     fns[msz](cpu_env, t_pg, addr, desc);
4085
4086     tcg_temp_free_ptr(t_pg);
4087     tcg_temp_free_i32(desc);
4088
4089     /* Replicate that first quadword.  */
4090     if (vsz > 16) {
4091         unsigned dofs = vec_full_reg_offset(s, zt);
4092         tcg_gen_gvec_dup_mem(4, dofs + 16, dofs, vsz - 16, vsz - 16);
4093     }
4094 }
4095
4096 static bool trans_LD1RQ_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
4097 {
4098     if (a->rm == 31) {
4099         return false;
4100     }
4101     if (sve_access_check(s)) {
4102         int msz = dtype_msz(a->dtype);
4103         TCGv_i64 addr = new_tmp_a64(s);
4104         tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4105         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4106         do_ldrq(s, a->rd, a->pg, addr, msz);
4107     }
4108     return true;
4109 }
4110
4111 static bool trans_LD1RQ_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4112 {
4113     if (sve_access_check(s)) {
4114         TCGv_i64 addr = new_tmp_a64(s);
4115         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 16);
4116         do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4117     }
4118     return true;
4119 }
4120
4121 /* Load and broadcast element.  */
4122 static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
4123 {
4124     if (!sve_access_check(s)) {
4125         return true;
4126     }
4127
4128     unsigned vsz = vec_full_reg_size(s);
4129     unsigned psz = pred_full_reg_size(s);
4130     unsigned esz = dtype_esz[a->dtype];
4131     TCGLabel *over = gen_new_label();
4132     TCGv_i64 temp;
4133
4134     /* If the guarding predicate has no bits set, no load occurs.  */
4135     if (psz <= 8) {
4136         /* Reduce the pred_esz_masks value simply to reduce the
4137          * size of the code generated here.
4138          */
4139         uint64_t psz_mask = MAKE_64BIT_MASK(0, psz * 8);
4140         temp = tcg_temp_new_i64();
4141         tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4142         tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4143         tcg_gen_brcondi_i64(TCG_COND_EQ, temp, 0, over);
4144         tcg_temp_free_i64(temp);
4145     } else {
4146         TCGv_i32 t32 = tcg_temp_new_i32();
4147         find_last_active(s, t32, esz, a->pg);
4148         tcg_gen_brcondi_i32(TCG_COND_LT, t32, 0, over);
4149         tcg_temp_free_i32(t32);
4150     }
4151
4152     /* Load the data.  */
4153     temp = tcg_temp_new_i64();
4154     tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << esz);
4155     tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4156                         s->be_data | dtype_mop[a->dtype]);
4157
4158     /* Broadcast to *all* elements.  */
4159     tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4160                          vsz, vsz, temp);
4161     tcg_temp_free_i64(temp);
4162
4163     /* Zero the inactive elements.  */
4164     gen_set_label(over);
4165     do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4166     return true;
4167 }
4168
4169 static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
4170                       int msz, int esz, int nreg)
4171 {
4172     static gen_helper_gvec_mem * const fn_single[4][4] = {
4173         { gen_helper_sve_st1bb_r, gen_helper_sve_st1bh_r,
4174           gen_helper_sve_st1bs_r, gen_helper_sve_st1bd_r },
4175         { NULL,                   gen_helper_sve_st1hh_r,
4176           gen_helper_sve_st1hs_r, gen_helper_sve_st1hd_r },
4177         { NULL, NULL,
4178           gen_helper_sve_st1ss_r, gen_helper_sve_st1sd_r },
4179         { NULL, NULL, NULL, gen_helper_sve_st1dd_r },
4180     };
4181     static gen_helper_gvec_mem * const fn_multiple[3][4] = {
4182         { gen_helper_sve_st2bb_r, gen_helper_sve_st2hh_r,
4183           gen_helper_sve_st2ss_r, gen_helper_sve_st2dd_r },
4184         { gen_helper_sve_st3bb_r, gen_helper_sve_st3hh_r,
4185           gen_helper_sve_st3ss_r, gen_helper_sve_st3dd_r },
4186         { gen_helper_sve_st4bb_r, gen_helper_sve_st4hh_r,
4187           gen_helper_sve_st4ss_r, gen_helper_sve_st4dd_r },
4188     };
4189     gen_helper_gvec_mem *fn;
4190
4191     if (nreg == 0) {
4192         /* ST1 */
4193         fn = fn_single[msz][esz];
4194     } else {
4195         /* ST2, ST3, ST4 -- msz == esz, enforced by encoding */
4196         assert(msz == esz);
4197         fn = fn_multiple[nreg - 1][msz];
4198     }
4199     assert(fn != NULL);
4200     do_mem_zpa(s, zt, pg, addr, fn);
4201 }
4202
4203 static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
4204 {
4205     if (a->rm == 31 || a->msz > a->esz) {
4206         return false;
4207     }
4208     if (sve_access_check(s)) {
4209         TCGv_i64 addr = new_tmp_a64(s);
4210         tcg_gen_muli_i64(addr, cpu_reg(s, a->rm), (a->nreg + 1) << a->msz);
4211         tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4212         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4213     }
4214     return true;
4215 }
4216
4217 static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
4218 {
4219     if (a->msz > a->esz) {
4220         return false;
4221     }
4222     if (sve_access_check(s)) {
4223         int vsz = vec_full_reg_size(s);
4224         int elements = vsz >> a->esz;
4225         TCGv_i64 addr = new_tmp_a64(s);
4226
4227         tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4228                          (a->imm * elements * (a->nreg + 1)) << a->msz);
4229         do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
4230     }
4231     return true;
4232 }
4233
4234 /*
4235  *** SVE gather loads / scatter stores
4236  */
4237
4238 static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
4239                        TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
4240 {
4241     unsigned vsz = vec_full_reg_size(s);
4242     TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, scale));
4243     TCGv_ptr t_zm = tcg_temp_new_ptr();
4244     TCGv_ptr t_pg = tcg_temp_new_ptr();
4245     TCGv_ptr t_zt = tcg_temp_new_ptr();
4246
4247     tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4248     tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
4249     tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
4250     fn(cpu_env, t_zt, t_pg, t_zm, scalar, desc);
4251
4252     tcg_temp_free_ptr(t_zt);
4253     tcg_temp_free_ptr(t_zm);
4254     tcg_temp_free_ptr(t_pg);
4255     tcg_temp_free_i32(desc);
4256 }
4257
4258 /* Indexed by [ff][xs][u][msz].  */
4259 static gen_helper_gvec_mem_scatter * const gather_load_fn32[2][2][2][3] = {
4260     { { { gen_helper_sve_ldbss_zsu,
4261           gen_helper_sve_ldhss_zsu,
4262           NULL, },
4263         { gen_helper_sve_ldbsu_zsu,
4264           gen_helper_sve_ldhsu_zsu,
4265           gen_helper_sve_ldssu_zsu, } },
4266       { { gen_helper_sve_ldbss_zss,
4267           gen_helper_sve_ldhss_zss,
4268           NULL, },
4269         { gen_helper_sve_ldbsu_zss,
4270           gen_helper_sve_ldhsu_zss,
4271           gen_helper_sve_ldssu_zss, } } },
4272     /* TODO fill in first-fault handlers */
4273 };
4274
4275 /* Note that we overload xs=2 to indicate 64-bit offset.  */
4276 static gen_helper_gvec_mem_scatter * const gather_load_fn64[2][3][2][4] = {
4277     { { { gen_helper_sve_ldbds_zsu,
4278           gen_helper_sve_ldhds_zsu,
4279           gen_helper_sve_ldsds_zsu,
4280           NULL, },
4281         { gen_helper_sve_ldbdu_zsu,
4282           gen_helper_sve_ldhdu_zsu,
4283           gen_helper_sve_ldsdu_zsu,
4284           gen_helper_sve_ldddu_zsu, } },
4285       { { gen_helper_sve_ldbds_zss,
4286           gen_helper_sve_ldhds_zss,
4287           gen_helper_sve_ldsds_zss,
4288           NULL, },
4289         { gen_helper_sve_ldbdu_zss,
4290           gen_helper_sve_ldhdu_zss,
4291           gen_helper_sve_ldsdu_zss,
4292           gen_helper_sve_ldddu_zss, } },
4293       { { gen_helper_sve_ldbds_zd,
4294           gen_helper_sve_ldhds_zd,
4295           gen_helper_sve_ldsds_zd,
4296           NULL, },
4297         { gen_helper_sve_ldbdu_zd,
4298           gen_helper_sve_ldhdu_zd,
4299           gen_helper_sve_ldsdu_zd,
4300           gen_helper_sve_ldddu_zd, } } },
4301     /* TODO fill in first-fault handlers */
4302 };
4303
4304 static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
4305 {
4306     gen_helper_gvec_mem_scatter *fn = NULL;
4307
4308     if (!sve_access_check(s)) {
4309         return true;
4310     }
4311
4312     switch (a->esz) {
4313     case MO_32:
4314         fn = gather_load_fn32[a->ff][a->xs][a->u][a->msz];
4315         break;
4316     case MO_64:
4317         fn = gather_load_fn64[a->ff][a->xs][a->u][a->msz];
4318         break;
4319     }
4320     assert(fn != NULL);
4321
4322     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4323                cpu_reg_sp(s, a->rn), fn);
4324     return true;
4325 }
4326
4327 static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
4328 {
4329     gen_helper_gvec_mem_scatter *fn = NULL;
4330     TCGv_i64 imm;
4331
4332     if (a->esz < a->msz || (a->esz == a->msz && !a->u)) {
4333         return false;
4334     }
4335     if (!sve_access_check(s)) {
4336         return true;
4337     }
4338
4339     switch (a->esz) {
4340     case MO_32:
4341         fn = gather_load_fn32[a->ff][0][a->u][a->msz];
4342         break;
4343     case MO_64:
4344         fn = gather_load_fn64[a->ff][2][a->u][a->msz];
4345         break;
4346     }
4347     assert(fn != NULL);
4348
4349     /* Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])
4350      * by loading the immediate into the scalar parameter.
4351      */
4352     imm = tcg_const_i64(a->imm << a->msz);
4353     do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
4354     tcg_temp_free_i64(imm);
4355     return true;
4356 }
4357
4358 static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
4359 {
4360     /* Indexed by [xs][msz].  */
4361     static gen_helper_gvec_mem_scatter * const fn32[2][3] = {
4362         { gen_helper_sve_stbs_zsu,
4363           gen_helper_sve_sths_zsu,
4364           gen_helper_sve_stss_zsu, },
4365         { gen_helper_sve_stbs_zss,
4366           gen_helper_sve_sths_zss,
4367           gen_helper_sve_stss_zss, },
4368     };
4369     /* Note that we overload xs=2 to indicate 64-bit offset.  */
4370     static gen_helper_gvec_mem_scatter * const fn64[3][4] = {
4371         { gen_helper_sve_stbd_zsu,
4372           gen_helper_sve_sthd_zsu,
4373           gen_helper_sve_stsd_zsu,
4374           gen_helper_sve_stdd_zsu, },
4375         { gen_helper_sve_stbd_zss,
4376           gen_helper_sve_sthd_zss,
4377           gen_helper_sve_stsd_zss,
4378           gen_helper_sve_stdd_zss, },
4379         { gen_helper_sve_stbd_zd,
4380           gen_helper_sve_sthd_zd,
4381           gen_helper_sve_stsd_zd,
4382           gen_helper_sve_stdd_zd, },
4383     };
4384     gen_helper_gvec_mem_scatter *fn;
4385
4386     if (a->esz < a->msz || (a->msz == 0 && a->scale)) {
4387         return false;
4388     }
4389     if (!sve_access_check(s)) {
4390         return true;
4391     }
4392     switch (a->esz) {
4393     case MO_32:
4394         fn = fn32[a->xs][a->msz];
4395         break;
4396     case MO_64:
4397         fn = fn64[a->xs][a->msz];
4398         break;
4399     default:
4400         g_assert_not_reached();
4401     }
4402     do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
4403                cpu_reg_sp(s, a->rn), fn);
4404     return true;
4405 }
4406
4407 /*
4408  * Prefetches
4409  */
4410
4411 static bool trans_PRF(DisasContext *s, arg_PRF *a, uint32_t insn)
4412 {
4413     /* Prefetch is a nop within QEMU.  */
4414     sve_access_check(s);
4415     return true;
4416 }
4417
4418 static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a, uint32_t insn)
4419 {
4420     if (a->rm == 31) {
4421         return false;
4422     }
4423     /* Prefetch is a nop within QEMU.  */
4424     sve_access_check(s);
4425     return true;
4426 }