target-arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <[email protected]>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include <stdarg.h>
  20 #include <stdlib.h>
  21 #include <stdio.h>
  22 #include <string.h>
  23 #include <inttypes.h>
  24
  25 #include "cpu.h"
  26 #include "tcg-op.h"
  27 #include "qemu/log.h"
  28 #include "arm_ldst.h"
  29 #include "translate.h"
  30 #include "internals.h"
  31 #include "qemu/host-utils.h"
  32
  33 #include "exec/semihost.h"
  34 #include "exec/gen-icount.h"
  35
  36 #include "exec/helper-proto.h"
  37 #include "exec/helper-gen.h"
  38
  39 #include "trace-tcg.h"
  40
  41 static TCGv_i64 cpu_X[32];
  42 static TCGv_i64 cpu_pc;
  43
  44 /* Load/store exclusive handling */
  45 static TCGv_i64 cpu_exclusive_high;
  46
  47 static const char *regnames[] = {
  48     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  49     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  50     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  51     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  52 };
  53
  54 enum a64_shift_type {
  55     A64_SHIFT_TYPE_LSL = 0,
  56     A64_SHIFT_TYPE_LSR = 1,
  57     A64_SHIFT_TYPE_ASR = 2,
  58     A64_SHIFT_TYPE_ROR = 3
  59 };
  60
  61 /* Table based decoder typedefs - used when the relevant bits for decode
  62  * are too awkwardly scattered across the instruction (eg SIMD).
  63  */
  64 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  65
  66 typedef struct AArch64DecodeTable {
  67     uint32_t pattern;
  68     uint32_t mask;
  69     AArch64DecodeFn *disas_fn;
  70 } AArch64DecodeTable;
  71
  72 /* Function prototype for gen_ functions for calling Neon helpers */
  73 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  74 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  75 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  76 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  77 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  78 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  79 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  80 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  81 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  82 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  83 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  84 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  85 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  86
  87 /* initialize TCG globals.  */
  88 void a64_translate_init(void)
  89 {
  90     int i;
  91
  92     cpu_pc = tcg_global_mem_new_i64(TCG_AREG0,
  93                                     offsetof(CPUARMState, pc),
  94                                     "pc");
  95     for (i = 0; i < 32; i++) {
  96         cpu_X[i] = tcg_global_mem_new_i64(TCG_AREG0,
  97                                           offsetof(CPUARMState, xregs[i]),
  98                                           regnames[i]);
  99     }
 100
 101     cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
 102         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 103 }
 104
 105 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 106 {
 107     /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 108      *  if EL1, access as if EL0; otherwise access at current EL
 109      */
 110     switch (s->mmu_idx) {
 111     case ARMMMUIdx_S12NSE1:
 112         return ARMMMUIdx_S12NSE0;
 113     case ARMMMUIdx_S1SE1:
 114         return ARMMMUIdx_S1SE0;
 115     case ARMMMUIdx_S2NS:
 116         g_assert_not_reached();
 117     default:
 118         return s->mmu_idx;
 119     }
 120 }
 121
 122 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 123                             fprintf_function cpu_fprintf, int flags)
 124 {
 125     ARMCPU *cpu = ARM_CPU(cs);
 126     CPUARMState *env = &cpu->env;
 127     uint32_t psr = pstate_read(env);
 128     int i;
 129
 130     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 131             env->pc, env->xregs[31]);
 132     for (i = 0; i < 31; i++) {
 133         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 134         if ((i % 4) == 3) {
 135             cpu_fprintf(f, "\n");
 136         } else {
 137             cpu_fprintf(f, " ");
 138         }
 139     }
 140     cpu_fprintf(f, "PSTATE=%08x (flags %c%c%c%c)\n",
 141                 psr,
 142                 psr & PSTATE_N ? 'N' : '-',
 143                 psr & PSTATE_Z ? 'Z' : '-',
 144                 psr & PSTATE_C ? 'C' : '-',
 145                 psr & PSTATE_V ? 'V' : '-');
 146     cpu_fprintf(f, "\n");
 147
 148     if (flags & CPU_DUMP_FPU) {
 149         int numvfpregs = 32;
 150         for (i = 0; i < numvfpregs; i += 2) {
 151             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 152             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 153             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 154                         i, vhi, vlo);
 155             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 156             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 157             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 158                         i + 1, vhi, vlo);
 159         }
 160         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 161                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 162     }
 163 }
 164
 165 void gen_a64_set_pc_im(uint64_t val)
 166 {
 167     tcg_gen_movi_i64(cpu_pc, val);
 168 }
 169
 170 typedef struct DisasCompare64 {
 171     TCGCond cond;
 172     TCGv_i64 value;
 173 } DisasCompare64;
 174
 175 static void a64_test_cc(DisasCompare64 *c64, int cc)
 176 {
 177     DisasCompare c32;
 178
 179     arm_test_cc(&c32, cc);
 180
 181     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 182        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 183     c64->cond = c32.cond;
 184     c64->value = tcg_temp_new_i64();
 185     tcg_gen_ext_i32_i64(c64->value, c32.value);
 186
 187     arm_free_cc(&c32);
 188 }
 189
 190 static void a64_free_cc(DisasCompare64 *c64)
 191 {
 192     tcg_temp_free_i64(c64->value);
 193 }
 194
 195 static void gen_exception_internal(int excp)
 196 {
 197     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 198
 199     assert(excp_is_internal(excp));
 200     gen_helper_exception_internal(cpu_env, tcg_excp);
 201     tcg_temp_free_i32(tcg_excp);
 202 }
 203
 204 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 205 {
 206     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 207     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 208     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 209
 210     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 211                                        tcg_syn, tcg_el);
 212     tcg_temp_free_i32(tcg_el);
 213     tcg_temp_free_i32(tcg_syn);
 214     tcg_temp_free_i32(tcg_excp);
 215 }
 216
 217 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 218 {
 219     gen_a64_set_pc_im(s->pc - offset);
 220     gen_exception_internal(excp);
 221     s->is_jmp = DISAS_EXC;
 222 }
 223
 224 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 225                                uint32_t syndrome, uint32_t target_el)
 226 {
 227     gen_a64_set_pc_im(s->pc - offset);
 228     gen_exception(excp, syndrome, target_el);
 229     s->is_jmp = DISAS_EXC;
 230 }
 231
 232 static void gen_ss_advance(DisasContext *s)
 233 {
 234     /* If the singlestep state is Active-not-pending, advance to
 235      * Active-pending.
 236      */
 237     if (s->ss_active) {
 238         s->pstate_ss = 0;
 239         gen_helper_clear_pstate_ss(cpu_env);
 240     }
 241 }
 242
 243 static void gen_step_complete_exception(DisasContext *s)
 244 {
 245     /* We just completed step of an insn. Move from Active-not-pending
 246      * to Active-pending, and then also take the swstep exception.
 247      * This corresponds to making the (IMPDEF) choice to prioritize
 248      * swstep exceptions over asynchronous exceptions taken to an exception
 249      * level where debug is disabled. This choice has the advantage that
 250      * we do not need to maintain internal state corresponding to the
 251      * ISV/EX syndrome bits between completion of the step and generation
 252      * of the exception, and our syndrome information is always correct.
 253      */
 254     gen_ss_advance(s);
 255     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 256                   default_exception_el(s));
 257     s->is_jmp = DISAS_EXC;
 258 }
 259
 260 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 261 {
 262     /* No direct tb linking with singlestep (either QEMU's or the ARM
 263      * debug architecture kind) or deterministic io
 264      */
 265     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 266         return false;
 267     }
 268
 269     /* Only link tbs from inside the same guest page */
 270     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 271         return false;
 272     }
 273
 274     return true;
 275 }
 276
 277 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 278 {
 279     TranslationBlock *tb;
 280
 281     tb = s->tb;
 282     if (use_goto_tb(s, n, dest)) {
 283         tcg_gen_goto_tb(n);
 284         gen_a64_set_pc_im(dest);
 285         tcg_gen_exit_tb((intptr_t)tb + n);
 286         s->is_jmp = DISAS_TB_JUMP;
 287     } else {
 288         gen_a64_set_pc_im(dest);
 289         if (s->ss_active) {
 290             gen_step_complete_exception(s);
 291         } else if (s->singlestep_enabled) {
 292             gen_exception_internal(EXCP_DEBUG);
 293         } else {
 294             tcg_gen_exit_tb(0);
 295             s->is_jmp = DISAS_TB_JUMP;
 296         }
 297     }
 298 }
 299
 300 static void unallocated_encoding(DisasContext *s)
 301 {
 302     /* Unallocated and reserved encodings are uncategorized */
 303     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 304                        default_exception_el(s));
 305 }
 306
 307 #define unsupported_encoding(s, insn)                                    \
 308     do {                                                                 \
 309         qemu_log_mask(LOG_UNIMP,                                         \
 310                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 311                       "at pc=%016" PRIx64 "\n",                          \
 312                       __FILE__, __LINE__, insn, s->pc - 4);              \
 313         unallocated_encoding(s);                                         \
 314     } while (0);
 315
 316 static void init_tmp_a64_array(DisasContext *s)
 317 {
 318 #ifdef CONFIG_DEBUG_TCG
 319     int i;
 320     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 321         TCGV_UNUSED_I64(s->tmp_a64[i]);
 322     }
 323 #endif
 324     s->tmp_a64_count = 0;
 325 }
 326
 327 static void free_tmp_a64(DisasContext *s)
 328 {
 329     int i;
 330     for (i = 0; i < s->tmp_a64_count; i++) {
 331         tcg_temp_free_i64(s->tmp_a64[i]);
 332     }
 333     init_tmp_a64_array(s);
 334 }
 335
 336 static TCGv_i64 new_tmp_a64(DisasContext *s)
 337 {
 338     assert(s->tmp_a64_count < TMP_A64_MAX);
 339     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 340 }
 341
 342 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 343 {
 344     TCGv_i64 t = new_tmp_a64(s);
 345     tcg_gen_movi_i64(t, 0);
 346     return t;
 347 }
 348
 349 /*
 350  * Register access functions
 351  *
 352  * These functions are used for directly accessing a register in where
 353  * changes to the final register value are likely to be made. If you
 354  * need to use a register for temporary calculation (e.g. index type
 355  * operations) use the read_* form.
 356  *
 357  * B1.2.1 Register mappings
 358  *
 359  * In instruction register encoding 31 can refer to ZR (zero register) or
 360  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 361  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 362  * This is the point of the _sp forms.
 363  */
 364 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 365 {
 366     if (reg == 31) {
 367         return new_tmp_a64_zero(s);
 368     } else {
 369         return cpu_X[reg];
 370     }
 371 }
 372
 373 /* register access for when 31 == SP */
 374 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 375 {
 376     return cpu_X[reg];
 377 }
 378
 379 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 380  * representing the register contents. This TCGv is an auto-freed
 381  * temporary so it need not be explicitly freed, and may be modified.
 382  */
 383 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 384 {
 385     TCGv_i64 v = new_tmp_a64(s);
 386     if (reg != 31) {
 387         if (sf) {
 388             tcg_gen_mov_i64(v, cpu_X[reg]);
 389         } else {
 390             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 391         }
 392     } else {
 393         tcg_gen_movi_i64(v, 0);
 394     }
 395     return v;
 396 }
 397
 398 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 399 {
 400     TCGv_i64 v = new_tmp_a64(s);
 401     if (sf) {
 402         tcg_gen_mov_i64(v, cpu_X[reg]);
 403     } else {
 404         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 405     }
 406     return v;
 407 }
 408
 409 /* We should have at some point before trying to access an FP register
 410  * done the necessary access check, so assert that
 411  * (a) we did the check and
 412  * (b) we didn't then just plough ahead anyway if it failed.
 413  * Print the instruction pattern in the abort message so we can figure
 414  * out what we need to fix if a user encounters this problem in the wild.
 415  */
 416 static inline void assert_fp_access_checked(DisasContext *s)
 417 {
 418 #ifdef CONFIG_DEBUG_TCG
 419     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 420         fprintf(stderr, "target-arm: FP access check missing for "
 421                 "instruction 0x%08x\n", s->insn);
 422         abort();
 423     }
 424 #endif
 425 }
 426
 427 /* Return the offset into CPUARMState of an element of specified
 428  * size, 'element' places in from the least significant end of
 429  * the FP/vector register Qn.
 430  */
 431 static inline int vec_reg_offset(DisasContext *s, int regno,
 432                                  int element, TCGMemOp size)
 433 {
 434     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 435 #ifdef HOST_WORDS_BIGENDIAN
 436     /* This is complicated slightly because vfp.regs[2n] is
 437      * still the low half and  vfp.regs[2n+1] the high half
 438      * of the 128 bit vector, even on big endian systems.
 439      * Calculate the offset assuming a fully bigendian 128 bits,
 440      * then XOR to account for the order of the two 64 bit halves.
 441      */
 442     offs += (16 - ((element + 1) * (1 << size)));
 443     offs ^= 8;
 444 #else
 445     offs += element * (1 << size);
 446 #endif
 447     assert_fp_access_checked(s);
 448     return offs;
 449 }
 450
 451 /* Return the offset into CPUARMState of a slice (from
 452  * the least significant end) of FP register Qn (ie
 453  * Dn, Sn, Hn or Bn).
 454  * (Note that this is not the same mapping as for A32; see cpu.h)
 455  */
 456 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 457 {
 458     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 459 #ifdef HOST_WORDS_BIGENDIAN
 460     offs += (8 - (1 << size));
 461 #endif
 462     assert_fp_access_checked(s);
 463     return offs;
 464 }
 465
 466 /* Offset of the high half of the 128 bit vector Qn */
 467 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 468 {
 469     assert_fp_access_checked(s);
 470     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 471 }
 472
 473 /* Convenience accessors for reading and writing single and double
 474  * FP registers. Writing clears the upper parts of the associated
 475  * 128 bit vector register, as required by the architecture.
 476  * Note that unlike the GP register accessors, the values returned
 477  * by the read functions must be manually freed.
 478  */
 479 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 480 {
 481     TCGv_i64 v = tcg_temp_new_i64();
 482
 483     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 484     return v;
 485 }
 486
 487 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 488 {
 489     TCGv_i32 v = tcg_temp_new_i32();
 490
 491     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 492     return v;
 493 }
 494
 495 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 496 {
 497     TCGv_i64 tcg_zero = tcg_const_i64(0);
 498
 499     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 500     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 501     tcg_temp_free_i64(tcg_zero);
 502 }
 503
 504 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 505 {
 506     TCGv_i64 tmp = tcg_temp_new_i64();
 507
 508     tcg_gen_extu_i32_i64(tmp, v);
 509     write_fp_dreg(s, reg, tmp);
 510     tcg_temp_free_i64(tmp);
 511 }
 512
 513 static TCGv_ptr get_fpstatus_ptr(void)
 514 {
 515     TCGv_ptr statusptr = tcg_temp_new_ptr();
 516     int offset;
 517
 518     /* In A64 all instructions (both FP and Neon) use the FPCR;
 519      * there is no equivalent of the A32 Neon "standard FPSCR value"
 520      * and all operations use vfp.fp_status.
 521      */
 522     offset = offsetof(CPUARMState, vfp.fp_status);
 523     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 524     return statusptr;
 525 }
 526
 527 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 528  * than the 32 bit equivalent.
 529  */
 530 static inline void gen_set_NZ64(TCGv_i64 result)
 531 {
 532     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 533     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 534 }
 535
 536 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 537 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 538 {
 539     if (sf) {
 540         gen_set_NZ64(result);
 541     } else {
 542         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 543         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 544     }
 545     tcg_gen_movi_i32(cpu_CF, 0);
 546     tcg_gen_movi_i32(cpu_VF, 0);
 547 }
 548
 549 /* dest = T0 + T1; compute C, N, V and Z flags */
 550 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 551 {
 552     if (sf) {
 553         TCGv_i64 result, flag, tmp;
 554         result = tcg_temp_new_i64();
 555         flag = tcg_temp_new_i64();
 556         tmp = tcg_temp_new_i64();
 557
 558         tcg_gen_movi_i64(tmp, 0);
 559         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 560
 561         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 562
 563         gen_set_NZ64(result);
 564
 565         tcg_gen_xor_i64(flag, result, t0);
 566         tcg_gen_xor_i64(tmp, t0, t1);
 567         tcg_gen_andc_i64(flag, flag, tmp);
 568         tcg_temp_free_i64(tmp);
 569         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 570
 571         tcg_gen_mov_i64(dest, result);
 572         tcg_temp_free_i64(result);
 573         tcg_temp_free_i64(flag);
 574     } else {
 575         /* 32 bit arithmetic */
 576         TCGv_i32 t0_32 = tcg_temp_new_i32();
 577         TCGv_i32 t1_32 = tcg_temp_new_i32();
 578         TCGv_i32 tmp = tcg_temp_new_i32();
 579
 580         tcg_gen_movi_i32(tmp, 0);
 581         tcg_gen_extrl_i64_i32(t0_32, t0);
 582         tcg_gen_extrl_i64_i32(t1_32, t1);
 583         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 584         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 585         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 586         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 587         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 588         tcg_gen_extu_i32_i64(dest, cpu_NF);
 589
 590         tcg_temp_free_i32(tmp);
 591         tcg_temp_free_i32(t0_32);
 592         tcg_temp_free_i32(t1_32);
 593     }
 594 }
 595
 596 /* dest = T0 - T1; compute C, N, V and Z flags */
 597 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 598 {
 599     if (sf) {
 600         /* 64 bit arithmetic */
 601         TCGv_i64 result, flag, tmp;
 602
 603         result = tcg_temp_new_i64();
 604         flag = tcg_temp_new_i64();
 605         tcg_gen_sub_i64(result, t0, t1);
 606
 607         gen_set_NZ64(result);
 608
 609         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 610         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 611
 612         tcg_gen_xor_i64(flag, result, t0);
 613         tmp = tcg_temp_new_i64();
 614         tcg_gen_xor_i64(tmp, t0, t1);
 615         tcg_gen_and_i64(flag, flag, tmp);
 616         tcg_temp_free_i64(tmp);
 617         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 618         tcg_gen_mov_i64(dest, result);
 619         tcg_temp_free_i64(flag);
 620         tcg_temp_free_i64(result);
 621     } else {
 622         /* 32 bit arithmetic */
 623         TCGv_i32 t0_32 = tcg_temp_new_i32();
 624         TCGv_i32 t1_32 = tcg_temp_new_i32();
 625         TCGv_i32 tmp;
 626
 627         tcg_gen_extrl_i64_i32(t0_32, t0);
 628         tcg_gen_extrl_i64_i32(t1_32, t1);
 629         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 630         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 631         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 632         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 633         tmp = tcg_temp_new_i32();
 634         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 635         tcg_temp_free_i32(t0_32);
 636         tcg_temp_free_i32(t1_32);
 637         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 638         tcg_temp_free_i32(tmp);
 639         tcg_gen_extu_i32_i64(dest, cpu_NF);
 640     }
 641 }
 642
 643 /* dest = T0 + T1 + CF; do not compute flags. */
 644 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 645 {
 646     TCGv_i64 flag = tcg_temp_new_i64();
 647     tcg_gen_extu_i32_i64(flag, cpu_CF);
 648     tcg_gen_add_i64(dest, t0, t1);
 649     tcg_gen_add_i64(dest, dest, flag);
 650     tcg_temp_free_i64(flag);
 651
 652     if (!sf) {
 653         tcg_gen_ext32u_i64(dest, dest);
 654     }
 655 }
 656
 657 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 658 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 659 {
 660     if (sf) {
 661         TCGv_i64 result, cf_64, vf_64, tmp;
 662         result = tcg_temp_new_i64();
 663         cf_64 = tcg_temp_new_i64();
 664         vf_64 = tcg_temp_new_i64();
 665         tmp = tcg_const_i64(0);
 666
 667         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 668         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 669         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 670         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 671         gen_set_NZ64(result);
 672
 673         tcg_gen_xor_i64(vf_64, result, t0);
 674         tcg_gen_xor_i64(tmp, t0, t1);
 675         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 676         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 677
 678         tcg_gen_mov_i64(dest, result);
 679
 680         tcg_temp_free_i64(tmp);
 681         tcg_temp_free_i64(vf_64);
 682         tcg_temp_free_i64(cf_64);
 683         tcg_temp_free_i64(result);
 684     } else {
 685         TCGv_i32 t0_32, t1_32, tmp;
 686         t0_32 = tcg_temp_new_i32();
 687         t1_32 = tcg_temp_new_i32();
 688         tmp = tcg_const_i32(0);
 689
 690         tcg_gen_extrl_i64_i32(t0_32, t0);
 691         tcg_gen_extrl_i64_i32(t1_32, t1);
 692         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 693         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 694
 695         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 696         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 697         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 698         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 699         tcg_gen_extu_i32_i64(dest, cpu_NF);
 700
 701         tcg_temp_free_i32(tmp);
 702         tcg_temp_free_i32(t1_32);
 703         tcg_temp_free_i32(t0_32);
 704     }
 705 }
 706
 707 /*
 708  * Load/Store generators
 709  */
 710
 711 /*
 712  * Store from GPR register to memory.
 713  */
 714 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 715                              TCGv_i64 tcg_addr, int size, int memidx)
 716 {
 717     g_assert(size <= 3);
 718     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, MO_TE + size);
 719 }
 720
 721 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 722                       TCGv_i64 tcg_addr, int size)
 723 {
 724     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s));
 725 }
 726
 727 /*
 728  * Load from memory to GPR register
 729  */
 730 static void do_gpr_ld_memidx(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 731                              int size, bool is_signed, bool extend, int memidx)
 732 {
 733     TCGMemOp memop = MO_TE + size;
 734
 735     g_assert(size <= 3);
 736
 737     if (is_signed) {
 738         memop += MO_SIGN;
 739     }
 740
 741     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 742
 743     if (extend && is_signed) {
 744         g_assert(size < 3);
 745         tcg_gen_ext32u_i64(dest, dest);
 746     }
 747 }
 748
 749 static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
 750                       int size, bool is_signed, bool extend)
 751 {
 752     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 753                      get_mem_index(s));
 754 }
 755
 756 /*
 757  * Store from FP register to memory
 758  */
 759 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 760 {
 761     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 762     TCGv_i64 tmp = tcg_temp_new_i64();
 763     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 764     if (size < 4) {
 765         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TE + size);
 766     } else {
 767         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 768         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s), MO_TEQ);
 769         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 770         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 771         tcg_gen_qemu_st_i64(tmp, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 772         tcg_temp_free_i64(tcg_hiaddr);
 773     }
 774
 775     tcg_temp_free_i64(tmp);
 776 }
 777
 778 /*
 779  * Load from memory to FP register
 780  */
 781 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 782 {
 783     /* This always zero-extends and writes to a full 128 bit wide vector */
 784     TCGv_i64 tmplo = tcg_temp_new_i64();
 785     TCGv_i64 tmphi;
 786
 787     if (size < 4) {
 788         TCGMemOp memop = MO_TE + size;
 789         tmphi = tcg_const_i64(0);
 790         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 791     } else {
 792         TCGv_i64 tcg_hiaddr;
 793         tmphi = tcg_temp_new_i64();
 794         tcg_hiaddr = tcg_temp_new_i64();
 795
 796         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), MO_TEQ);
 797         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 798         tcg_gen_qemu_ld_i64(tmphi, tcg_hiaddr, get_mem_index(s), MO_TEQ);
 799         tcg_temp_free_i64(tcg_hiaddr);
 800     }
 801
 802     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 803     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 804
 805     tcg_temp_free_i64(tmplo);
 806     tcg_temp_free_i64(tmphi);
 807 }
 808
 809 /*
 810  * Vector load/store helpers.
 811  *
 812  * The principal difference between this and a FP load is that we don't
 813  * zero extend as we are filling a partial chunk of the vector register.
 814  * These functions don't support 128 bit loads/stores, which would be
 815  * normal load/store operations.
 816  *
 817  * The _i32 versions are useful when operating on 32 bit quantities
 818  * (eg for floating point single or using Neon helper functions).
 819  */
 820
 821 /* Get value of an element within a vector register */
 822 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 823                              int element, TCGMemOp memop)
 824 {
 825     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 826     switch (memop) {
 827     case MO_8:
 828         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 829         break;
 830     case MO_16:
 831         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 832         break;
 833     case MO_32:
 834         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 835         break;
 836     case MO_8|MO_SIGN:
 837         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 838         break;
 839     case MO_16|MO_SIGN:
 840         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 841         break;
 842     case MO_32|MO_SIGN:
 843         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 844         break;
 845     case MO_64:
 846     case MO_64|MO_SIGN:
 847         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 848         break;
 849     default:
 850         g_assert_not_reached();
 851     }
 852 }
 853
 854 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 855                                  int element, TCGMemOp memop)
 856 {
 857     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 858     switch (memop) {
 859     case MO_8:
 860         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 861         break;
 862     case MO_16:
 863         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 864         break;
 865     case MO_8|MO_SIGN:
 866         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
 867         break;
 868     case MO_16|MO_SIGN:
 869         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
 870         break;
 871     case MO_32:
 872     case MO_32|MO_SIGN:
 873         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
 874         break;
 875     default:
 876         g_assert_not_reached();
 877     }
 878 }
 879
 880 /* Set value of an element within a vector register */
 881 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 882                               int element, TCGMemOp memop)
 883 {
 884     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 885     switch (memop) {
 886     case MO_8:
 887         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 888         break;
 889     case MO_16:
 890         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 891         break;
 892     case MO_32:
 893         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 894         break;
 895     case MO_64:
 896         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 897         break;
 898     default:
 899         g_assert_not_reached();
 900     }
 901 }
 902
 903 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 904                                   int destidx, int element, TCGMemOp memop)
 905 {
 906     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 907     switch (memop) {
 908     case MO_8:
 909         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
 910         break;
 911     case MO_16:
 912         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
 913         break;
 914     case MO_32:
 915         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
 916         break;
 917     default:
 918         g_assert_not_reached();
 919     }
 920 }
 921
 922 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
 923  * vector ops all need to do this).
 924  */
 925 static void clear_vec_high(DisasContext *s, int rd)
 926 {
 927     TCGv_i64 tcg_zero = tcg_const_i64(0);
 928
 929     write_vec_element(s, tcg_zero, rd, 1, MO_64);
 930     tcg_temp_free_i64(tcg_zero);
 931 }
 932
 933 /* Store from vector register to memory */
 934 static void do_vec_st(DisasContext *s, int srcidx, int element,
 935                       TCGv_i64 tcg_addr, int size)
 936 {
 937     TCGMemOp memop = MO_TE + size;
 938     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 939
 940     read_vec_element(s, tcg_tmp, srcidx, element, size);
 941     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 942
 943     tcg_temp_free_i64(tcg_tmp);
 944 }
 945
 946 /* Load from memory to vector register */
 947 static void do_vec_ld(DisasContext *s, int destidx, int element,
 948                       TCGv_i64 tcg_addr, int size)
 949 {
 950     TCGMemOp memop = MO_TE + size;
 951     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
 952
 953     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
 954     write_vec_element(s, tcg_tmp, destidx, element, size);
 955
 956     tcg_temp_free_i64(tcg_tmp);
 957 }
 958
 959 /* Check that FP/Neon access is enabled. If it is, return
 960  * true. If not, emit code to generate an appropriate exception,
 961  * and return false; the caller should not emit any code for
 962  * the instruction. Note that this check must happen after all
 963  * unallocated-encoding checks (otherwise the syndrome information
 964  * for the resulting exception will be incorrect).
 965  */
 966 static inline bool fp_access_check(DisasContext *s)
 967 {
 968     assert(!s->fp_access_checked);
 969     s->fp_access_checked = true;
 970
 971     if (!s->fp_excp_el) {
 972         return true;
 973     }
 974
 975     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
 976                        s->fp_excp_el);
 977     return false;
 978 }
 979
 980 /*
 981  * This utility function is for doing register extension with an
 982  * optional shift. You will likely want to pass a temporary for the
 983  * destination register. See DecodeRegExtend() in the ARM ARM.
 984  */
 985 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
 986                               int option, unsigned int shift)
 987 {
 988     int extsize = extract32(option, 0, 2);
 989     bool is_signed = extract32(option, 2, 1);
 990
 991     if (is_signed) {
 992         switch (extsize) {
 993         case 0:
 994             tcg_gen_ext8s_i64(tcg_out, tcg_in);
 995             break;
 996         case 1:
 997             tcg_gen_ext16s_i64(tcg_out, tcg_in);
 998             break;
 999         case 2:
1000             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1001             break;
1002         case 3:
1003             tcg_gen_mov_i64(tcg_out, tcg_in);
1004             break;
1005         }
1006     } else {
1007         switch (extsize) {
1008         case 0:
1009             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1010             break;
1011         case 1:
1012             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1013             break;
1014         case 2:
1015             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1016             break;
1017         case 3:
1018             tcg_gen_mov_i64(tcg_out, tcg_in);
1019             break;
1020         }
1021     }
1022
1023     if (shift) {
1024         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1025     }
1026 }
1027
1028 static inline void gen_check_sp_alignment(DisasContext *s)
1029 {
1030     /* The AArch64 architecture mandates that (if enabled via PSTATE
1031      * or SCTLR bits) there is a check that SP is 16-aligned on every
1032      * SP-relative load or store (with an exception generated if it is not).
1033      * In line with general QEMU practice regarding misaligned accesses,
1034      * we omit these checks for the sake of guest program performance.
1035      * This function is provided as a hook so we can more easily add these
1036      * checks in future (possibly as a "favour catching guest program bugs
1037      * over speed" user selectable option).
1038      */
1039 }
1040
1041 /*
1042  * This provides a simple table based table lookup decoder. It is
1043  * intended to be used when the relevant bits for decode are too
1044  * awkwardly placed and switch/if based logic would be confusing and
1045  * deeply nested. Since it's a linear search through the table, tables
1046  * should be kept small.
1047  *
1048  * It returns the first handler where insn & mask == pattern, or
1049  * NULL if there is no match.
1050  * The table is terminated by an empty mask (i.e. 0)
1051  */
1052 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1053                                                uint32_t insn)
1054 {
1055     const AArch64DecodeTable *tptr = table;
1056
1057     while (tptr->mask) {
1058         if ((insn & tptr->mask) == tptr->pattern) {
1059             return tptr->disas_fn;
1060         }
1061         tptr++;
1062     }
1063     return NULL;
1064 }
1065
1066 /*
1067  * the instruction disassembly implemented here matches
1068  * the instruction encoding classifications in chapter 3 (C3)
1069  * of the ARM Architecture Reference Manual (DDI0487A_a)
1070  */
1071
1072 /* C3.2.7 Unconditional branch (immediate)
1073  *   31  30       26 25                                  0
1074  * +----+-----------+-------------------------------------+
1075  * | op | 0 0 1 0 1 |                 imm26               |
1076  * +----+-----------+-------------------------------------+
1077  */
1078 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1079 {
1080     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1081
1082     if (insn & (1U << 31)) {
1083         /* C5.6.26 BL Branch with link */
1084         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1085     }
1086
1087     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1088     gen_goto_tb(s, 0, addr);
1089 }
1090
1091 /* C3.2.1 Compare & branch (immediate)
1092  *   31  30         25  24  23                  5 4      0
1093  * +----+-------------+----+---------------------+--------+
1094  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1095  * +----+-------------+----+---------------------+--------+
1096  */
1097 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1098 {
1099     unsigned int sf, op, rt;
1100     uint64_t addr;
1101     TCGLabel *label_match;
1102     TCGv_i64 tcg_cmp;
1103
1104     sf = extract32(insn, 31, 1);
1105     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1106     rt = extract32(insn, 0, 5);
1107     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1108
1109     tcg_cmp = read_cpu_reg(s, rt, sf);
1110     label_match = gen_new_label();
1111
1112     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1113                         tcg_cmp, 0, label_match);
1114
1115     gen_goto_tb(s, 0, s->pc);
1116     gen_set_label(label_match);
1117     gen_goto_tb(s, 1, addr);
1118 }
1119
1120 /* C3.2.5 Test & branch (immediate)
1121  *   31  30         25  24  23   19 18          5 4    0
1122  * +----+-------------+----+-------+-------------+------+
1123  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1124  * +----+-------------+----+-------+-------------+------+
1125  */
1126 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1127 {
1128     unsigned int bit_pos, op, rt;
1129     uint64_t addr;
1130     TCGLabel *label_match;
1131     TCGv_i64 tcg_cmp;
1132
1133     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1134     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1135     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1136     rt = extract32(insn, 0, 5);
1137
1138     tcg_cmp = tcg_temp_new_i64();
1139     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1140     label_match = gen_new_label();
1141     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1142                         tcg_cmp, 0, label_match);
1143     tcg_temp_free_i64(tcg_cmp);
1144     gen_goto_tb(s, 0, s->pc);
1145     gen_set_label(label_match);
1146     gen_goto_tb(s, 1, addr);
1147 }
1148
1149 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1150  *  31           25  24  23                  5   4  3    0
1151  * +---------------+----+---------------------+----+------+
1152  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1153  * +---------------+----+---------------------+----+------+
1154  */
1155 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1156 {
1157     unsigned int cond;
1158     uint64_t addr;
1159
1160     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1161         unallocated_encoding(s);
1162         return;
1163     }
1164     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1165     cond = extract32(insn, 0, 4);
1166
1167     if (cond < 0x0e) {
1168         /* genuinely conditional branches */
1169         TCGLabel *label_match = gen_new_label();
1170         arm_gen_test_cc(cond, label_match);
1171         gen_goto_tb(s, 0, s->pc);
1172         gen_set_label(label_match);
1173         gen_goto_tb(s, 1, addr);
1174     } else {
1175         /* 0xe and 0xf are both "always" conditions */
1176         gen_goto_tb(s, 0, addr);
1177     }
1178 }
1179
1180 /* C5.6.68 HINT */
1181 static void handle_hint(DisasContext *s, uint32_t insn,
1182                         unsigned int op1, unsigned int op2, unsigned int crm)
1183 {
1184     unsigned int selector = crm << 3 | op2;
1185
1186     if (op1 != 3) {
1187         unallocated_encoding(s);
1188         return;
1189     }
1190
1191     switch (selector) {
1192     case 0: /* NOP */
1193         return;
1194     case 3: /* WFI */
1195         s->is_jmp = DISAS_WFI;
1196         return;
1197     case 1: /* YIELD */
1198         s->is_jmp = DISAS_YIELD;
1199         return;
1200     case 2: /* WFE */
1201         s->is_jmp = DISAS_WFE;
1202         return;
1203     case 4: /* SEV */
1204     case 5: /* SEVL */
1205         /* we treat all as NOP at least for now */
1206         return;
1207     default:
1208         /* default specified as NOP equivalent */
1209         return;
1210     }
1211 }
1212
1213 static void gen_clrex(DisasContext *s, uint32_t insn)
1214 {
1215     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1216 }
1217
1218 /* CLREX, DSB, DMB, ISB */
1219 static void handle_sync(DisasContext *s, uint32_t insn,
1220                         unsigned int op1, unsigned int op2, unsigned int crm)
1221 {
1222     if (op1 != 3) {
1223         unallocated_encoding(s);
1224         return;
1225     }
1226
1227     switch (op2) {
1228     case 2: /* CLREX */
1229         gen_clrex(s, insn);
1230         return;
1231     case 4: /* DSB */
1232     case 5: /* DMB */
1233     case 6: /* ISB */
1234         /* We don't emulate caches so barriers are no-ops */
1235         return;
1236     default:
1237         unallocated_encoding(s);
1238         return;
1239     }
1240 }
1241
1242 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1243 static void handle_msr_i(DisasContext *s, uint32_t insn,
1244                          unsigned int op1, unsigned int op2, unsigned int crm)
1245 {
1246     int op = op1 << 3 | op2;
1247     switch (op) {
1248     case 0x05: /* SPSel */
1249         if (s->current_el == 0) {
1250             unallocated_encoding(s);
1251             return;
1252         }
1253         /* fall through */
1254     case 0x1e: /* DAIFSet */
1255     case 0x1f: /* DAIFClear */
1256     {
1257         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1258         TCGv_i32 tcg_op = tcg_const_i32(op);
1259         gen_a64_set_pc_im(s->pc - 4);
1260         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1261         tcg_temp_free_i32(tcg_imm);
1262         tcg_temp_free_i32(tcg_op);
1263         s->is_jmp = DISAS_UPDATE;
1264         break;
1265     }
1266     default:
1267         unallocated_encoding(s);
1268         return;
1269     }
1270 }
1271
1272 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1273 {
1274     TCGv_i32 tmp = tcg_temp_new_i32();
1275     TCGv_i32 nzcv = tcg_temp_new_i32();
1276
1277     /* build bit 31, N */
1278     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1279     /* build bit 30, Z */
1280     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1281     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1282     /* build bit 29, C */
1283     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1284     /* build bit 28, V */
1285     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1286     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1287     /* generate result */
1288     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1289
1290     tcg_temp_free_i32(nzcv);
1291     tcg_temp_free_i32(tmp);
1292 }
1293
1294 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1295
1296 {
1297     TCGv_i32 nzcv = tcg_temp_new_i32();
1298
1299     /* take NZCV from R[t] */
1300     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1301
1302     /* bit 31, N */
1303     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1304     /* bit 30, Z */
1305     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1306     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1307     /* bit 29, C */
1308     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1309     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1310     /* bit 28, V */
1311     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1312     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1313     tcg_temp_free_i32(nzcv);
1314 }
1315
1316 /* C5.6.129 MRS - move from system register
1317  * C5.6.131 MSR (register) - move to system register
1318  * C5.6.204 SYS
1319  * C5.6.205 SYSL
1320  * These are all essentially the same insn in 'read' and 'write'
1321  * versions, with varying op0 fields.
1322  */
1323 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1324                        unsigned int op0, unsigned int op1, unsigned int op2,
1325                        unsigned int crn, unsigned int crm, unsigned int rt)
1326 {
1327     const ARMCPRegInfo *ri;
1328     TCGv_i64 tcg_rt;
1329
1330     ri = get_arm_cp_reginfo(s->cp_regs,
1331                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1332                                                crn, crm, op0, op1, op2));
1333
1334     if (!ri) {
1335         /* Unknown register; this might be a guest error or a QEMU
1336          * unimplemented feature.
1337          */
1338         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1339                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1340                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1341         unallocated_encoding(s);
1342         return;
1343     }
1344
1345     /* Check access permissions */
1346     if (!cp_access_ok(s->current_el, ri, isread)) {
1347         unallocated_encoding(s);
1348         return;
1349     }
1350
1351     if (ri->accessfn) {
1352         /* Emit code to perform further access permissions checks at
1353          * runtime; this may result in an exception.
1354          */
1355         TCGv_ptr tmpptr;
1356         TCGv_i32 tcg_syn;
1357         uint32_t syndrome;
1358
1359         gen_a64_set_pc_im(s->pc - 4);
1360         tmpptr = tcg_const_ptr(ri);
1361         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1362         tcg_syn = tcg_const_i32(syndrome);
1363         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn);
1364         tcg_temp_free_ptr(tmpptr);
1365         tcg_temp_free_i32(tcg_syn);
1366     }
1367
1368     /* Handle special cases first */
1369     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1370     case ARM_CP_NOP:
1371         return;
1372     case ARM_CP_NZCV:
1373         tcg_rt = cpu_reg(s, rt);
1374         if (isread) {
1375             gen_get_nzcv(tcg_rt);
1376         } else {
1377             gen_set_nzcv(tcg_rt);
1378         }
1379         return;
1380     case ARM_CP_CURRENTEL:
1381         /* Reads as current EL value from pstate, which is
1382          * guaranteed to be constant by the tb flags.
1383          */
1384         tcg_rt = cpu_reg(s, rt);
1385         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1386         return;
1387     case ARM_CP_DC_ZVA:
1388         /* Writes clear the aligned block of memory which rt points into. */
1389         tcg_rt = cpu_reg(s, rt);
1390         gen_helper_dc_zva(cpu_env, tcg_rt);
1391         return;
1392     default:
1393         break;
1394     }
1395
1396     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1397         gen_io_start();
1398     }
1399
1400     tcg_rt = cpu_reg(s, rt);
1401
1402     if (isread) {
1403         if (ri->type & ARM_CP_CONST) {
1404             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1405         } else if (ri->readfn) {
1406             TCGv_ptr tmpptr;
1407             tmpptr = tcg_const_ptr(ri);
1408             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1409             tcg_temp_free_ptr(tmpptr);
1410         } else {
1411             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1412         }
1413     } else {
1414         if (ri->type & ARM_CP_CONST) {
1415             /* If not forbidden by access permissions, treat as WI */
1416             return;
1417         } else if (ri->writefn) {
1418             TCGv_ptr tmpptr;
1419             tmpptr = tcg_const_ptr(ri);
1420             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1421             tcg_temp_free_ptr(tmpptr);
1422         } else {
1423             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1424         }
1425     }
1426
1427     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1428         /* I/O operations must end the TB here (whether read or write) */
1429         gen_io_end();
1430         s->is_jmp = DISAS_UPDATE;
1431     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1432         /* We default to ending the TB on a coprocessor register write,
1433          * but allow this to be suppressed by the register definition
1434          * (usually only necessary to work around guest bugs).
1435          */
1436         s->is_jmp = DISAS_UPDATE;
1437     }
1438 }
1439
1440 /* C3.2.4 System
1441  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1442  * +---------------------+---+-----+-----+-------+-------+-----+------+
1443  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1444  * +---------------------+---+-----+-----+-------+-------+-----+------+
1445  */
1446 static void disas_system(DisasContext *s, uint32_t insn)
1447 {
1448     unsigned int l, op0, op1, crn, crm, op2, rt;
1449     l = extract32(insn, 21, 1);
1450     op0 = extract32(insn, 19, 2);
1451     op1 = extract32(insn, 16, 3);
1452     crn = extract32(insn, 12, 4);
1453     crm = extract32(insn, 8, 4);
1454     op2 = extract32(insn, 5, 3);
1455     rt = extract32(insn, 0, 5);
1456
1457     if (op0 == 0) {
1458         if (l || rt != 31) {
1459             unallocated_encoding(s);
1460             return;
1461         }
1462         switch (crn) {
1463         case 2: /* C5.6.68 HINT */
1464             handle_hint(s, insn, op1, op2, crm);
1465             break;
1466         case 3: /* CLREX, DSB, DMB, ISB */
1467             handle_sync(s, insn, op1, op2, crm);
1468             break;
1469         case 4: /* C5.6.130 MSR (immediate) */
1470             handle_msr_i(s, insn, op1, op2, crm);
1471             break;
1472         default:
1473             unallocated_encoding(s);
1474             break;
1475         }
1476         return;
1477     }
1478     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1479 }
1480
1481 /* C3.2.3 Exception generation
1482  *
1483  *  31             24 23 21 20                     5 4   2 1  0
1484  * +-----------------+-----+------------------------+-----+----+
1485  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1486  * +-----------------------+------------------------+----------+
1487  */
1488 static void disas_exc(DisasContext *s, uint32_t insn)
1489 {
1490     int opc = extract32(insn, 21, 3);
1491     int op2_ll = extract32(insn, 0, 5);
1492     int imm16 = extract32(insn, 5, 16);
1493     TCGv_i32 tmp;
1494
1495     switch (opc) {
1496     case 0:
1497         /* For SVC, HVC and SMC we advance the single-step state
1498          * machine before taking the exception. This is architecturally
1499          * mandated, to ensure that single-stepping a system call
1500          * instruction works properly.
1501          */
1502         switch (op2_ll) {
1503         case 1:
1504             gen_ss_advance(s);
1505             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1506                                default_exception_el(s));
1507             break;
1508         case 2:
1509             if (s->current_el == 0) {
1510                 unallocated_encoding(s);
1511                 break;
1512             }
1513             /* The pre HVC helper handles cases when HVC gets trapped
1514              * as an undefined insn by runtime configuration.
1515              */
1516             gen_a64_set_pc_im(s->pc - 4);
1517             gen_helper_pre_hvc(cpu_env);
1518             gen_ss_advance(s);
1519             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1520             break;
1521         case 3:
1522             if (s->current_el == 0) {
1523                 unallocated_encoding(s);
1524                 break;
1525             }
1526             gen_a64_set_pc_im(s->pc - 4);
1527             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1528             gen_helper_pre_smc(cpu_env, tmp);
1529             tcg_temp_free_i32(tmp);
1530             gen_ss_advance(s);
1531             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1532             break;
1533         default:
1534             unallocated_encoding(s);
1535             break;
1536         }
1537         break;
1538     case 1:
1539         if (op2_ll != 0) {
1540             unallocated_encoding(s);
1541             break;
1542         }
1543         /* BRK */
1544         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1545                            default_exception_el(s));
1546         break;
1547     case 2:
1548         if (op2_ll != 0) {
1549             unallocated_encoding(s);
1550             break;
1551         }
1552         /* HLT. This has two purposes.
1553          * Architecturally, it is an external halting debug instruction.
1554          * Since QEMU doesn't implement external debug, we treat this as
1555          * it is required for halting debug disabled: it will UNDEF.
1556          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1557          */
1558         if (semihosting_enabled() && imm16 == 0xf000) {
1559 #ifndef CONFIG_USER_ONLY
1560             /* In system mode, don't allow userspace access to semihosting,
1561              * to provide some semblance of security (and for consistency
1562              * with our 32-bit semihosting).
1563              */
1564             if (s->current_el == 0) {
1565                 unsupported_encoding(s, insn);
1566                 break;
1567             }
1568 #endif
1569             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1570         } else {
1571             unsupported_encoding(s, insn);
1572         }
1573         break;
1574     case 5:
1575         if (op2_ll < 1 || op2_ll > 3) {
1576             unallocated_encoding(s);
1577             break;
1578         }
1579         /* DCPS1, DCPS2, DCPS3 */
1580         unsupported_encoding(s, insn);
1581         break;
1582     default:
1583         unallocated_encoding(s);
1584         break;
1585     }
1586 }
1587
1588 /* C3.2.7 Unconditional branch (register)
1589  *  31           25 24   21 20   16 15   10 9    5 4     0
1590  * +---------------+-------+-------+-------+------+-------+
1591  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1592  * +---------------+-------+-------+-------+------+-------+
1593  */
1594 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1595 {
1596     unsigned int opc, op2, op3, rn, op4;
1597
1598     opc = extract32(insn, 21, 4);
1599     op2 = extract32(insn, 16, 5);
1600     op3 = extract32(insn, 10, 6);
1601     rn = extract32(insn, 5, 5);
1602     op4 = extract32(insn, 0, 5);
1603
1604     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1605         unallocated_encoding(s);
1606         return;
1607     }
1608
1609     switch (opc) {
1610     case 0: /* BR */
1611     case 2: /* RET */
1612         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1613         break;
1614     case 1: /* BLR */
1615         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1616         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1617         break;
1618     case 4: /* ERET */
1619         if (s->current_el == 0) {
1620             unallocated_encoding(s);
1621             return;
1622         }
1623         gen_helper_exception_return(cpu_env);
1624         s->is_jmp = DISAS_JUMP;
1625         return;
1626     case 5: /* DRPS */
1627         if (rn != 0x1f) {
1628             unallocated_encoding(s);
1629         } else {
1630             unsupported_encoding(s, insn);
1631         }
1632         return;
1633     default:
1634         unallocated_encoding(s);
1635         return;
1636     }
1637
1638     s->is_jmp = DISAS_JUMP;
1639 }
1640
1641 /* C3.2 Branches, exception generating and system instructions */
1642 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1643 {
1644     switch (extract32(insn, 25, 7)) {
1645     case 0x0a: case 0x0b:
1646     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1647         disas_uncond_b_imm(s, insn);
1648         break;
1649     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1650         disas_comp_b_imm(s, insn);
1651         break;
1652     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1653         disas_test_b_imm(s, insn);
1654         break;
1655     case 0x2a: /* Conditional branch (immediate) */
1656         disas_cond_b_imm(s, insn);
1657         break;
1658     case 0x6a: /* Exception generation / System */
1659         if (insn & (1 << 24)) {
1660             disas_system(s, insn);
1661         } else {
1662             disas_exc(s, insn);
1663         }
1664         break;
1665     case 0x6b: /* Unconditional branch (register) */
1666         disas_uncond_b_reg(s, insn);
1667         break;
1668     default:
1669         unallocated_encoding(s);
1670         break;
1671     }
1672 }
1673
1674 /*
1675  * Load/Store exclusive instructions are implemented by remembering
1676  * the value/address loaded, and seeing if these are the same
1677  * when the store is performed. This is not actually the architecturally
1678  * mandated semantics, but it works for typical guest code sequences
1679  * and avoids having to monitor regular stores.
1680  *
1681  * In system emulation mode only one CPU will be running at once, so
1682  * this sequence is effectively atomic.  In user emulation mode we
1683  * throw an exception and handle the atomic operation elsewhere.
1684  */
1685 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1686                                TCGv_i64 addr, int size, bool is_pair)
1687 {
1688     TCGv_i64 tmp = tcg_temp_new_i64();
1689     TCGMemOp memop = MO_TE + size;
1690
1691     g_assert(size <= 3);
1692     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1693
1694     if (is_pair) {
1695         TCGv_i64 addr2 = tcg_temp_new_i64();
1696         TCGv_i64 hitmp = tcg_temp_new_i64();
1697
1698         g_assert(size >= 2);
1699         tcg_gen_addi_i64(addr2, addr, 1 << size);
1700         tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1701         tcg_temp_free_i64(addr2);
1702         tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1703         tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1704         tcg_temp_free_i64(hitmp);
1705     }
1706
1707     tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1708     tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1709
1710     tcg_temp_free_i64(tmp);
1711     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1712 }
1713
1714 #ifdef CONFIG_USER_ONLY
1715 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1716                                 TCGv_i64 addr, int size, int is_pair)
1717 {
1718     tcg_gen_mov_i64(cpu_exclusive_test, addr);
1719     tcg_gen_movi_i32(cpu_exclusive_info,
1720                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1721     gen_exception_internal_insn(s, 4, EXCP_STREX);
1722 }
1723 #else
1724 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1725                                 TCGv_i64 inaddr, int size, int is_pair)
1726 {
1727     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1728      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1729      *     [addr] = {Rt};
1730      *     if (is_pair) {
1731      *         [addr + datasize] = {Rt2};
1732      *     }
1733      *     {Rd} = 0;
1734      * } else {
1735      *     {Rd} = 1;
1736      * }
1737      * env->exclusive_addr = -1;
1738      */
1739     TCGLabel *fail_label = gen_new_label();
1740     TCGLabel *done_label = gen_new_label();
1741     TCGv_i64 addr = tcg_temp_local_new_i64();
1742     TCGv_i64 tmp;
1743
1744     /* Copy input into a local temp so it is not trashed when the
1745      * basic block ends at the branch insn.
1746      */
1747     tcg_gen_mov_i64(addr, inaddr);
1748     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1749
1750     tmp = tcg_temp_new_i64();
1751     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), MO_TE + size);
1752     tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1753     tcg_temp_free_i64(tmp);
1754
1755     if (is_pair) {
1756         TCGv_i64 addrhi = tcg_temp_new_i64();
1757         TCGv_i64 tmphi = tcg_temp_new_i64();
1758
1759         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1760         tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s), MO_TE + size);
1761         tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1762
1763         tcg_temp_free_i64(tmphi);
1764         tcg_temp_free_i64(addrhi);
1765     }
1766
1767     /* We seem to still have the exclusive monitor, so do the store */
1768     tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s), MO_TE + size);
1769     if (is_pair) {
1770         TCGv_i64 addrhi = tcg_temp_new_i64();
1771
1772         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1773         tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1774                             get_mem_index(s), MO_TE + size);
1775         tcg_temp_free_i64(addrhi);
1776     }
1777
1778     tcg_temp_free_i64(addr);
1779
1780     tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1781     tcg_gen_br(done_label);
1782     gen_set_label(fail_label);
1783     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1784     gen_set_label(done_label);
1785     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1786
1787 }
1788 #endif
1789
1790 /* C3.3.6 Load/store exclusive
1791  *
1792  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1793  * +-----+-------------+----+---+----+------+----+-------+------+------+
1794  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1795  * +-----+-------------+----+---+----+------+----+-------+------+------+
1796  *
1797  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1798  *   L: 0 -> store, 1 -> load
1799  *  o2: 0 -> exclusive, 1 -> not
1800  *  o1: 0 -> single register, 1 -> register pair
1801  *  o0: 1 -> load-acquire/store-release, 0 -> not
1802  *
1803  *  o0 == 0 AND o2 == 1 is un-allocated
1804  *  o1 == 1 is un-allocated except for 32 and 64 bit sizes
1805  */
1806 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1807 {
1808     int rt = extract32(insn, 0, 5);
1809     int rn = extract32(insn, 5, 5);
1810     int rt2 = extract32(insn, 10, 5);
1811     int is_lasr = extract32(insn, 15, 1);
1812     int rs = extract32(insn, 16, 5);
1813     int is_pair = extract32(insn, 21, 1);
1814     int is_store = !extract32(insn, 22, 1);
1815     int is_excl = !extract32(insn, 23, 1);
1816     int size = extract32(insn, 30, 2);
1817     TCGv_i64 tcg_addr;
1818
1819     if ((!is_excl && !is_lasr) ||
1820         (is_pair && size < 2)) {
1821         unallocated_encoding(s);
1822         return;
1823     }
1824
1825     if (rn == 31) {
1826         gen_check_sp_alignment(s);
1827     }
1828     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1829
1830     /* Note that since TCG is single threaded load-acquire/store-release
1831      * semantics require no extra if (is_lasr) { ... } handling.
1832      */
1833
1834     if (is_excl) {
1835         if (!is_store) {
1836             s->is_ldex = true;
1837             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1838         } else {
1839             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1840         }
1841     } else {
1842         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1843         if (is_store) {
1844             do_gpr_st(s, tcg_rt, tcg_addr, size);
1845         } else {
1846             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false);
1847         }
1848         if (is_pair) {
1849             TCGv_i64 tcg_rt2 = cpu_reg(s, rt);
1850             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
1851             if (is_store) {
1852                 do_gpr_st(s, tcg_rt2, tcg_addr, size);
1853             } else {
1854                 do_gpr_ld(s, tcg_rt2, tcg_addr, size, false, false);
1855             }
1856         }
1857     }
1858 }
1859
1860 /*
1861  * C3.3.5 Load register (literal)
1862  *
1863  *  31 30 29   27  26 25 24 23                5 4     0
1864  * +-----+-------+---+-----+-------------------+-------+
1865  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1866  * +-----+-------+---+-----+-------------------+-------+
1867  *
1868  * V: 1 -> vector (simd/fp)
1869  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1870  *                   10-> 32 bit signed, 11 -> prefetch
1871  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1872  */
1873 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1874 {
1875     int rt = extract32(insn, 0, 5);
1876     int64_t imm = sextract32(insn, 5, 19) << 2;
1877     bool is_vector = extract32(insn, 26, 1);
1878     int opc = extract32(insn, 30, 2);
1879     bool is_signed = false;
1880     int size = 2;
1881     TCGv_i64 tcg_rt, tcg_addr;
1882
1883     if (is_vector) {
1884         if (opc == 3) {
1885             unallocated_encoding(s);
1886             return;
1887         }
1888         size = 2 + opc;
1889         if (!fp_access_check(s)) {
1890             return;
1891         }
1892     } else {
1893         if (opc == 3) {
1894             /* PRFM (literal) : prefetch */
1895             return;
1896         }
1897         size = 2 + extract32(opc, 0, 1);
1898         is_signed = extract32(opc, 1, 1);
1899     }
1900
1901     tcg_rt = cpu_reg(s, rt);
1902
1903     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1904     if (is_vector) {
1905         do_fp_ld(s, rt, tcg_addr, size);
1906     } else {
1907         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
1908     }
1909     tcg_temp_free_i64(tcg_addr);
1910 }
1911
1912 /*
1913  * C5.6.80 LDNP (Load Pair - non-temporal hint)
1914  * C5.6.81 LDP (Load Pair - non vector)
1915  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
1916  * C5.6.176 STNP (Store Pair - non-temporal hint)
1917  * C5.6.177 STP (Store Pair - non vector)
1918  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
1919  * C6.3.165 LDP (Load Pair of SIMD&FP)
1920  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
1921  * C6.3.284 STP (Store Pair of SIMD&FP)
1922  *
1923  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
1924  * +-----+-------+---+---+-------+---+-----------------------------+
1925  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
1926  * +-----+-------+---+---+-------+---+-------+-------+------+------+
1927  *
1928  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
1929  *      LDPSW                    01
1930  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
1931  *   V: 0 -> GPR, 1 -> Vector
1932  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
1933  *      10 -> signed offset, 11 -> pre-index
1934  *   L: 0 -> Store 1 -> Load
1935  *
1936  * Rt, Rt2 = GPR or SIMD registers to be stored
1937  * Rn = general purpose register containing address
1938  * imm7 = signed offset (multiple of 4 or 8 depending on size)
1939  */
1940 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
1941 {
1942     int rt = extract32(insn, 0, 5);
1943     int rn = extract32(insn, 5, 5);
1944     int rt2 = extract32(insn, 10, 5);
1945     uint64_t offset = sextract64(insn, 15, 7);
1946     int index = extract32(insn, 23, 2);
1947     bool is_vector = extract32(insn, 26, 1);
1948     bool is_load = extract32(insn, 22, 1);
1949     int opc = extract32(insn, 30, 2);
1950
1951     bool is_signed = false;
1952     bool postindex = false;
1953     bool wback = false;
1954
1955     TCGv_i64 tcg_addr; /* calculated address */
1956     int size;
1957
1958     if (opc == 3) {
1959         unallocated_encoding(s);
1960         return;
1961     }
1962
1963     if (is_vector) {
1964         size = 2 + opc;
1965     } else {
1966         size = 2 + extract32(opc, 1, 1);
1967         is_signed = extract32(opc, 0, 1);
1968         if (!is_load && is_signed) {
1969             unallocated_encoding(s);
1970             return;
1971         }
1972     }
1973
1974     switch (index) {
1975     case 1: /* post-index */
1976         postindex = true;
1977         wback = true;
1978         break;
1979     case 0:
1980         /* signed offset with "non-temporal" hint. Since we don't emulate
1981          * caches we don't care about hints to the cache system about
1982          * data access patterns, and handle this identically to plain
1983          * signed offset.
1984          */
1985         if (is_signed) {
1986             /* There is no non-temporal-hint version of LDPSW */
1987             unallocated_encoding(s);
1988             return;
1989         }
1990         postindex = false;
1991         break;
1992     case 2: /* signed offset, rn not updated */
1993         postindex = false;
1994         break;
1995     case 3: /* pre-index */
1996         postindex = false;
1997         wback = true;
1998         break;
1999     }
2000
2001     if (is_vector && !fp_access_check(s)) {
2002         return;
2003     }
2004
2005     offset <<= size;
2006
2007     if (rn == 31) {
2008         gen_check_sp_alignment(s);
2009     }
2010
2011     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2012
2013     if (!postindex) {
2014         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2015     }
2016
2017     if (is_vector) {
2018         if (is_load) {
2019             do_fp_ld(s, rt, tcg_addr, size);
2020         } else {
2021             do_fp_st(s, rt, tcg_addr, size);
2022         }
2023     } else {
2024         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2025         if (is_load) {
2026             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false);
2027         } else {
2028             do_gpr_st(s, tcg_rt, tcg_addr, size);
2029         }
2030     }
2031     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2032     if (is_vector) {
2033         if (is_load) {
2034             do_fp_ld(s, rt2, tcg_addr, size);
2035         } else {
2036             do_fp_st(s, rt2, tcg_addr, size);
2037         }
2038     } else {
2039         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2040         if (is_load) {
2041             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false);
2042         } else {
2043             do_gpr_st(s, tcg_rt2, tcg_addr, size);
2044         }
2045     }
2046
2047     if (wback) {
2048         if (postindex) {
2049             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2050         } else {
2051             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2052         }
2053         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2054     }
2055 }
2056
2057 /*
2058  * C3.3.8 Load/store (immediate post-indexed)
2059  * C3.3.9 Load/store (immediate pre-indexed)
2060  * C3.3.12 Load/store (unscaled immediate)
2061  *
2062  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2063  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2064  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2065  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2066  *
2067  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2068          10 -> unprivileged
2069  * V = 0 -> non-vector
2070  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2071  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2072  */
2073 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn)
2074 {
2075     int rt = extract32(insn, 0, 5);
2076     int rn = extract32(insn, 5, 5);
2077     int imm9 = sextract32(insn, 12, 9);
2078     int opc = extract32(insn, 22, 2);
2079     int size = extract32(insn, 30, 2);
2080     int idx = extract32(insn, 10, 2);
2081     bool is_signed = false;
2082     bool is_store = false;
2083     bool is_extended = false;
2084     bool is_unpriv = (idx == 2);
2085     bool is_vector = extract32(insn, 26, 1);
2086     bool post_index;
2087     bool writeback;
2088
2089     TCGv_i64 tcg_addr;
2090
2091     if (is_vector) {
2092         size |= (opc & 2) << 1;
2093         if (size > 4 || is_unpriv) {
2094             unallocated_encoding(s);
2095             return;
2096         }
2097         is_store = ((opc & 1) == 0);
2098         if (!fp_access_check(s)) {
2099             return;
2100         }
2101     } else {
2102         if (size == 3 && opc == 2) {
2103             /* PRFM - prefetch */
2104             if (is_unpriv) {
2105                 unallocated_encoding(s);
2106                 return;
2107             }
2108             return;
2109         }
2110         if (opc == 3 && size > 1) {
2111             unallocated_encoding(s);
2112             return;
2113         }
2114         is_store = (opc == 0);
2115         is_signed = opc & (1<<1);
2116         is_extended = (size < 3) && (opc & 1);
2117     }
2118
2119     switch (idx) {
2120     case 0:
2121     case 2:
2122         post_index = false;
2123         writeback = false;
2124         break;
2125     case 1:
2126         post_index = true;
2127         writeback = true;
2128         break;
2129     case 3:
2130         post_index = false;
2131         writeback = true;
2132         break;
2133     }
2134
2135     if (rn == 31) {
2136         gen_check_sp_alignment(s);
2137     }
2138     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2139
2140     if (!post_index) {
2141         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2142     }
2143
2144     if (is_vector) {
2145         if (is_store) {
2146             do_fp_st(s, rt, tcg_addr, size);
2147         } else {
2148             do_fp_ld(s, rt, tcg_addr, size);
2149         }
2150     } else {
2151         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2152         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2153
2154         if (is_store) {
2155             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx);
2156         } else {
2157             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2158                              is_signed, is_extended, memidx);
2159         }
2160     }
2161
2162     if (writeback) {
2163         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2164         if (post_index) {
2165             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2166         }
2167         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2168     }
2169 }
2170
2171 /*
2172  * C3.3.10 Load/store (register offset)
2173  *
2174  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2175  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2176  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2177  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2178  *
2179  * For non-vector:
2180  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2181  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2182  * For vector:
2183  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2184  *   opc<0>: 0 -> store, 1 -> load
2185  * V: 1 -> vector/simd
2186  * opt: extend encoding (see DecodeRegExtend)
2187  * S: if S=1 then scale (essentially index by sizeof(size))
2188  * Rt: register to transfer into/out of
2189  * Rn: address register or SP for base
2190  * Rm: offset register or ZR for offset
2191  */
2192 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn)
2193 {
2194     int rt = extract32(insn, 0, 5);
2195     int rn = extract32(insn, 5, 5);
2196     int shift = extract32(insn, 12, 1);
2197     int rm = extract32(insn, 16, 5);
2198     int opc = extract32(insn, 22, 2);
2199     int opt = extract32(insn, 13, 3);
2200     int size = extract32(insn, 30, 2);
2201     bool is_signed = false;
2202     bool is_store = false;
2203     bool is_extended = false;
2204     bool is_vector = extract32(insn, 26, 1);
2205
2206     TCGv_i64 tcg_rm;
2207     TCGv_i64 tcg_addr;
2208
2209     if (extract32(opt, 1, 1) == 0) {
2210         unallocated_encoding(s);
2211         return;
2212     }
2213
2214     if (is_vector) {
2215         size |= (opc & 2) << 1;
2216         if (size > 4) {
2217             unallocated_encoding(s);
2218             return;
2219         }
2220         is_store = !extract32(opc, 0, 1);
2221         if (!fp_access_check(s)) {
2222             return;
2223         }
2224     } else {
2225         if (size == 3 && opc == 2) {
2226             /* PRFM - prefetch */
2227             return;
2228         }
2229         if (opc == 3 && size > 1) {
2230             unallocated_encoding(s);
2231             return;
2232         }
2233         is_store = (opc == 0);
2234         is_signed = extract32(opc, 1, 1);
2235         is_extended = (size < 3) && extract32(opc, 0, 1);
2236     }
2237
2238     if (rn == 31) {
2239         gen_check_sp_alignment(s);
2240     }
2241     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2242
2243     tcg_rm = read_cpu_reg(s, rm, 1);
2244     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2245
2246     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2247
2248     if (is_vector) {
2249         if (is_store) {
2250             do_fp_st(s, rt, tcg_addr, size);
2251         } else {
2252             do_fp_ld(s, rt, tcg_addr, size);
2253         }
2254     } else {
2255         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2256         if (is_store) {
2257             do_gpr_st(s, tcg_rt, tcg_addr, size);
2258         } else {
2259             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2260         }
2261     }
2262 }
2263
2264 /*
2265  * C3.3.13 Load/store (unsigned immediate)
2266  *
2267  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2268  * +----+-------+---+-----+-----+------------+-------+------+
2269  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2270  * +----+-------+---+-----+-----+------------+-------+------+
2271  *
2272  * For non-vector:
2273  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2274  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2275  * For vector:
2276  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2277  *   opc<0>: 0 -> store, 1 -> load
2278  * Rn: base address register (inc SP)
2279  * Rt: target register
2280  */
2281 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn)
2282 {
2283     int rt = extract32(insn, 0, 5);
2284     int rn = extract32(insn, 5, 5);
2285     unsigned int imm12 = extract32(insn, 10, 12);
2286     bool is_vector = extract32(insn, 26, 1);
2287     int size = extract32(insn, 30, 2);
2288     int opc = extract32(insn, 22, 2);
2289     unsigned int offset;
2290
2291     TCGv_i64 tcg_addr;
2292
2293     bool is_store;
2294     bool is_signed = false;
2295     bool is_extended = false;
2296
2297     if (is_vector) {
2298         size |= (opc & 2) << 1;
2299         if (size > 4) {
2300             unallocated_encoding(s);
2301             return;
2302         }
2303         is_store = !extract32(opc, 0, 1);
2304         if (!fp_access_check(s)) {
2305             return;
2306         }
2307     } else {
2308         if (size == 3 && opc == 2) {
2309             /* PRFM - prefetch */
2310             return;
2311         }
2312         if (opc == 3 && size > 1) {
2313             unallocated_encoding(s);
2314             return;
2315         }
2316         is_store = (opc == 0);
2317         is_signed = extract32(opc, 1, 1);
2318         is_extended = (size < 3) && extract32(opc, 0, 1);
2319     }
2320
2321     if (rn == 31) {
2322         gen_check_sp_alignment(s);
2323     }
2324     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2325     offset = imm12 << size;
2326     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2327
2328     if (is_vector) {
2329         if (is_store) {
2330             do_fp_st(s, rt, tcg_addr, size);
2331         } else {
2332             do_fp_ld(s, rt, tcg_addr, size);
2333         }
2334     } else {
2335         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2336         if (is_store) {
2337             do_gpr_st(s, tcg_rt, tcg_addr, size);
2338         } else {
2339             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended);
2340         }
2341     }
2342 }
2343
2344 /* Load/store register (all forms) */
2345 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2346 {
2347     switch (extract32(insn, 24, 2)) {
2348     case 0:
2349         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2350             disas_ldst_reg_roffset(s, insn);
2351         } else {
2352             /* Load/store register (unscaled immediate)
2353              * Load/store immediate pre/post-indexed
2354              * Load/store register unprivileged
2355              */
2356             disas_ldst_reg_imm9(s, insn);
2357         }
2358         break;
2359     case 1:
2360         disas_ldst_reg_unsigned_imm(s, insn);
2361         break;
2362     default:
2363         unallocated_encoding(s);
2364         break;
2365     }
2366 }
2367
2368 /* C3.3.1 AdvSIMD load/store multiple structures
2369  *
2370  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2371  * +---+---+---------------+---+-------------+--------+------+------+------+
2372  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2373  * +---+---+---------------+---+-------------+--------+------+------+------+
2374  *
2375  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2376  *
2377  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2378  * +---+---+---------------+---+---+---------+--------+------+------+------+
2379  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2380  * +---+---+---------------+---+---+---------+--------+------+------+------+
2381  *
2382  * Rt: first (or only) SIMD&FP register to be transferred
2383  * Rn: base address or SP
2384  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2385  */
2386 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2387 {
2388     int rt = extract32(insn, 0, 5);
2389     int rn = extract32(insn, 5, 5);
2390     int size = extract32(insn, 10, 2);
2391     int opcode = extract32(insn, 12, 4);
2392     bool is_store = !extract32(insn, 22, 1);
2393     bool is_postidx = extract32(insn, 23, 1);
2394     bool is_q = extract32(insn, 30, 1);
2395     TCGv_i64 tcg_addr, tcg_rn;
2396
2397     int ebytes = 1 << size;
2398     int elements = (is_q ? 128 : 64) / (8 << size);
2399     int rpt;    /* num iterations */
2400     int selem;  /* structure elements */
2401     int r;
2402
2403     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2404         unallocated_encoding(s);
2405         return;
2406     }
2407
2408     /* From the shared decode logic */
2409     switch (opcode) {
2410     case 0x0:
2411         rpt = 1;
2412         selem = 4;
2413         break;
2414     case 0x2:
2415         rpt = 4;
2416         selem = 1;
2417         break;
2418     case 0x4:
2419         rpt = 1;
2420         selem = 3;
2421         break;
2422     case 0x6:
2423         rpt = 3;
2424         selem = 1;
2425         break;
2426     case 0x7:
2427         rpt = 1;
2428         selem = 1;
2429         break;
2430     case 0x8:
2431         rpt = 1;
2432         selem = 2;
2433         break;
2434     case 0xa:
2435         rpt = 2;
2436         selem = 1;
2437         break;
2438     default:
2439         unallocated_encoding(s);
2440         return;
2441     }
2442
2443     if (size == 3 && !is_q && selem != 1) {
2444         /* reserved */
2445         unallocated_encoding(s);
2446         return;
2447     }
2448
2449     if (!fp_access_check(s)) {
2450         return;
2451     }
2452
2453     if (rn == 31) {
2454         gen_check_sp_alignment(s);
2455     }
2456
2457     tcg_rn = cpu_reg_sp(s, rn);
2458     tcg_addr = tcg_temp_new_i64();
2459     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2460
2461     for (r = 0; r < rpt; r++) {
2462         int e;
2463         for (e = 0; e < elements; e++) {
2464             int tt = (rt + r) % 32;
2465             int xs;
2466             for (xs = 0; xs < selem; xs++) {
2467                 if (is_store) {
2468                     do_vec_st(s, tt, e, tcg_addr, size);
2469                 } else {
2470                     do_vec_ld(s, tt, e, tcg_addr, size);
2471
2472                     /* For non-quad operations, setting a slice of the low
2473                      * 64 bits of the register clears the high 64 bits (in
2474                      * the ARM ARM pseudocode this is implicit in the fact
2475                      * that 'rval' is a 64 bit wide variable). We optimize
2476                      * by noticing that we only need to do this the first
2477                      * time we touch a register.
2478                      */
2479                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2480                         clear_vec_high(s, tt);
2481                     }
2482                 }
2483                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2484                 tt = (tt + 1) % 32;
2485             }
2486         }
2487     }
2488
2489     if (is_postidx) {
2490         int rm = extract32(insn, 16, 5);
2491         if (rm == 31) {
2492             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2493         } else {
2494             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2495         }
2496     }
2497     tcg_temp_free_i64(tcg_addr);
2498 }
2499
2500 /* C3.3.3 AdvSIMD load/store single structure
2501  *
2502  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2503  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2504  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2505  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2506  *
2507  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2508  *
2509  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2510  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2511  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2512  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2513  *
2514  * Rt: first (or only) SIMD&FP register to be transferred
2515  * Rn: base address or SP
2516  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2517  * index = encoded in Q:S:size dependent on size
2518  *
2519  * lane_size = encoded in R, opc
2520  * transfer width = encoded in opc, S, size
2521  */
2522 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2523 {
2524     int rt = extract32(insn, 0, 5);
2525     int rn = extract32(insn, 5, 5);
2526     int size = extract32(insn, 10, 2);
2527     int S = extract32(insn, 12, 1);
2528     int opc = extract32(insn, 13, 3);
2529     int R = extract32(insn, 21, 1);
2530     int is_load = extract32(insn, 22, 1);
2531     int is_postidx = extract32(insn, 23, 1);
2532     int is_q = extract32(insn, 30, 1);
2533
2534     int scale = extract32(opc, 1, 2);
2535     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2536     bool replicate = false;
2537     int index = is_q << 3 | S << 2 | size;
2538     int ebytes, xs;
2539     TCGv_i64 tcg_addr, tcg_rn;
2540
2541     switch (scale) {
2542     case 3:
2543         if (!is_load || S) {
2544             unallocated_encoding(s);
2545             return;
2546         }
2547         scale = size;
2548         replicate = true;
2549         break;
2550     case 0:
2551         break;
2552     case 1:
2553         if (extract32(size, 0, 1)) {
2554             unallocated_encoding(s);
2555             return;
2556         }
2557         index >>= 1;
2558         break;
2559     case 2:
2560         if (extract32(size, 1, 1)) {
2561             unallocated_encoding(s);
2562             return;
2563         }
2564         if (!extract32(size, 0, 1)) {
2565             index >>= 2;
2566         } else {
2567             if (S) {
2568                 unallocated_encoding(s);
2569                 return;
2570             }
2571             index >>= 3;
2572             scale = 3;
2573         }
2574         break;
2575     default:
2576         g_assert_not_reached();
2577     }
2578
2579     if (!fp_access_check(s)) {
2580         return;
2581     }
2582
2583     ebytes = 1 << scale;
2584
2585     if (rn == 31) {
2586         gen_check_sp_alignment(s);
2587     }
2588
2589     tcg_rn = cpu_reg_sp(s, rn);
2590     tcg_addr = tcg_temp_new_i64();
2591     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2592
2593     for (xs = 0; xs < selem; xs++) {
2594         if (replicate) {
2595             /* Load and replicate to all elements */
2596             uint64_t mulconst;
2597             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2598
2599             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2600                                 get_mem_index(s), MO_TE + scale);
2601             switch (scale) {
2602             case 0:
2603                 mulconst = 0x0101010101010101ULL;
2604                 break;
2605             case 1:
2606                 mulconst = 0x0001000100010001ULL;
2607                 break;
2608             case 2:
2609                 mulconst = 0x0000000100000001ULL;
2610                 break;
2611             case 3:
2612                 mulconst = 0;
2613                 break;
2614             default:
2615                 g_assert_not_reached();
2616             }
2617             if (mulconst) {
2618                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2619             }
2620             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2621             if (is_q) {
2622                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2623             } else {
2624                 clear_vec_high(s, rt);
2625             }
2626             tcg_temp_free_i64(tcg_tmp);
2627         } else {
2628             /* Load/store one element per register */
2629             if (is_load) {
2630                 do_vec_ld(s, rt, index, tcg_addr, MO_TE + scale);
2631             } else {
2632                 do_vec_st(s, rt, index, tcg_addr, MO_TE + scale);
2633             }
2634         }
2635         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2636         rt = (rt + 1) % 32;
2637     }
2638
2639     if (is_postidx) {
2640         int rm = extract32(insn, 16, 5);
2641         if (rm == 31) {
2642             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2643         } else {
2644             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2645         }
2646     }
2647     tcg_temp_free_i64(tcg_addr);
2648 }
2649
2650 /* C3.3 Loads and stores */
2651 static void disas_ldst(DisasContext *s, uint32_t insn)
2652 {
2653     switch (extract32(insn, 24, 6)) {
2654     case 0x08: /* Load/store exclusive */
2655         disas_ldst_excl(s, insn);
2656         break;
2657     case 0x18: case 0x1c: /* Load register (literal) */
2658         disas_ld_lit(s, insn);
2659         break;
2660     case 0x28: case 0x29:
2661     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2662         disas_ldst_pair(s, insn);
2663         break;
2664     case 0x38: case 0x39:
2665     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2666         disas_ldst_reg(s, insn);
2667         break;
2668     case 0x0c: /* AdvSIMD load/store multiple structures */
2669         disas_ldst_multiple_struct(s, insn);
2670         break;
2671     case 0x0d: /* AdvSIMD load/store single structure */
2672         disas_ldst_single_struct(s, insn);
2673         break;
2674     default:
2675         unallocated_encoding(s);
2676         break;
2677     }
2678 }
2679
2680 /* C3.4.6 PC-rel. addressing
2681  *   31  30   29 28       24 23                5 4    0
2682  * +----+-------+-----------+-------------------+------+
2683  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2684  * +----+-------+-----------+-------------------+------+
2685  */
2686 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2687 {
2688     unsigned int page, rd;
2689     uint64_t base;
2690     uint64_t offset;
2691
2692     page = extract32(insn, 31, 1);
2693     /* SignExtend(immhi:immlo) -> offset */
2694     offset = sextract64(insn, 5, 19);
2695     offset = offset << 2 | extract32(insn, 29, 2);
2696     rd = extract32(insn, 0, 5);
2697     base = s->pc - 4;
2698
2699     if (page) {
2700         /* ADRP (page based) */
2701         base &= ~0xfff;
2702         offset <<= 12;
2703     }
2704
2705     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2706 }
2707
2708 /*
2709  * C3.4.1 Add/subtract (immediate)
2710  *
2711  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2712  * +--+--+--+-----------+-----+-------------+-----+-----+
2713  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2714  * +--+--+--+-----------+-----+-------------+-----+-----+
2715  *
2716  *    sf: 0 -> 32bit, 1 -> 64bit
2717  *    op: 0 -> add  , 1 -> sub
2718  *     S: 1 -> set flags
2719  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2720  */
2721 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2722 {
2723     int rd = extract32(insn, 0, 5);
2724     int rn = extract32(insn, 5, 5);
2725     uint64_t imm = extract32(insn, 10, 12);
2726     int shift = extract32(insn, 22, 2);
2727     bool setflags = extract32(insn, 29, 1);
2728     bool sub_op = extract32(insn, 30, 1);
2729     bool is_64bit = extract32(insn, 31, 1);
2730
2731     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2732     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2733     TCGv_i64 tcg_result;
2734
2735     switch (shift) {
2736     case 0x0:
2737         break;
2738     case 0x1:
2739         imm <<= 12;
2740         break;
2741     default:
2742         unallocated_encoding(s);
2743         return;
2744     }
2745
2746     tcg_result = tcg_temp_new_i64();
2747     if (!setflags) {
2748         if (sub_op) {
2749             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2750         } else {
2751             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2752         }
2753     } else {
2754         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2755         if (sub_op) {
2756             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2757         } else {
2758             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2759         }
2760         tcg_temp_free_i64(tcg_imm);
2761     }
2762
2763     if (is_64bit) {
2764         tcg_gen_mov_i64(tcg_rd, tcg_result);
2765     } else {
2766         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2767     }
2768
2769     tcg_temp_free_i64(tcg_result);
2770 }
2771
2772 /* The input should be a value in the bottom e bits (with higher
2773  * bits zero); returns that value replicated into every element
2774  * of size e in a 64 bit integer.
2775  */
2776 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2777 {
2778     assert(e != 0);
2779     while (e < 64) {
2780         mask |= mask << e;
2781         e *= 2;
2782     }
2783     return mask;
2784 }
2785
2786 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2787 static inline uint64_t bitmask64(unsigned int length)
2788 {
2789     assert(length > 0 && length <= 64);
2790     return ~0ULL >> (64 - length);
2791 }
2792
2793 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2794  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2795  * value (ie should cause a guest UNDEF exception), and true if they are
2796  * valid, in which case the decoded bit pattern is written to result.
2797  */
2798 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2799                                    unsigned int imms, unsigned int immr)
2800 {
2801     uint64_t mask;
2802     unsigned e, levels, s, r;
2803     int len;
2804
2805     assert(immn < 2 && imms < 64 && immr < 64);
2806
2807     /* The bit patterns we create here are 64 bit patterns which
2808      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2809      * 64 bits each. Each element contains the same value: a run
2810      * of between 1 and e-1 non-zero bits, rotated within the
2811      * element by between 0 and e-1 bits.
2812      *
2813      * The element size and run length are encoded into immn (1 bit)
2814      * and imms (6 bits) as follows:
2815      * 64 bit elements: immn = 1, imms = <length of run - 1>
2816      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2817      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2818      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2819      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2820      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2821      * Notice that immn = 0, imms = 11111x is the only combination
2822      * not covered by one of the above options; this is reserved.
2823      * Further, <length of run - 1> all-ones is a reserved pattern.
2824      *
2825      * In all cases the rotation is by immr % e (and immr is 6 bits).
2826      */
2827
2828     /* First determine the element size */
2829     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2830     if (len < 1) {
2831         /* This is the immn == 0, imms == 0x11111x case */
2832         return false;
2833     }
2834     e = 1 << len;
2835
2836     levels = e - 1;
2837     s = imms & levels;
2838     r = immr & levels;
2839
2840     if (s == levels) {
2841         /* <length of run - 1> mustn't be all-ones. */
2842         return false;
2843     }
2844
2845     /* Create the value of one element: s+1 set bits rotated
2846      * by r within the element (which is e bits wide)...
2847      */
2848     mask = bitmask64(s + 1);
2849     if (r) {
2850         mask = (mask >> r) | (mask << (e - r));
2851         mask &= bitmask64(e);
2852     }
2853     /* ...then replicate the element over the whole 64 bit value */
2854     mask = bitfield_replicate(mask, e);
2855     *result = mask;
2856     return true;
2857 }
2858
2859 /* C3.4.4 Logical (immediate)
2860  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2861  * +----+-----+-------------+---+------+------+------+------+
2862  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2863  * +----+-----+-------------+---+------+------+------+------+
2864  */
2865 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2866 {
2867     unsigned int sf, opc, is_n, immr, imms, rn, rd;
2868     TCGv_i64 tcg_rd, tcg_rn;
2869     uint64_t wmask;
2870     bool is_and = false;
2871
2872     sf = extract32(insn, 31, 1);
2873     opc = extract32(insn, 29, 2);
2874     is_n = extract32(insn, 22, 1);
2875     immr = extract32(insn, 16, 6);
2876     imms = extract32(insn, 10, 6);
2877     rn = extract32(insn, 5, 5);
2878     rd = extract32(insn, 0, 5);
2879
2880     if (!sf && is_n) {
2881         unallocated_encoding(s);
2882         return;
2883     }
2884
2885     if (opc == 0x3) { /* ANDS */
2886         tcg_rd = cpu_reg(s, rd);
2887     } else {
2888         tcg_rd = cpu_reg_sp(s, rd);
2889     }
2890     tcg_rn = cpu_reg(s, rn);
2891
2892     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
2893         /* some immediate field values are reserved */
2894         unallocated_encoding(s);
2895         return;
2896     }
2897
2898     if (!sf) {
2899         wmask &= 0xffffffff;
2900     }
2901
2902     switch (opc) {
2903     case 0x3: /* ANDS */
2904     case 0x0: /* AND */
2905         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
2906         is_and = true;
2907         break;
2908     case 0x1: /* ORR */
2909         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
2910         break;
2911     case 0x2: /* EOR */
2912         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
2913         break;
2914     default:
2915         assert(FALSE); /* must handle all above */
2916         break;
2917     }
2918
2919     if (!sf && !is_and) {
2920         /* zero extend final result; we know we can skip this for AND
2921          * since the immediate had the high 32 bits clear.
2922          */
2923         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2924     }
2925
2926     if (opc == 3) { /* ANDS */
2927         gen_logic_CC(sf, tcg_rd);
2928     }
2929 }
2930
2931 /*
2932  * C3.4.5 Move wide (immediate)
2933  *
2934  *  31 30 29 28         23 22 21 20             5 4    0
2935  * +--+-----+-------------+-----+----------------+------+
2936  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
2937  * +--+-----+-------------+-----+----------------+------+
2938  *
2939  * sf: 0 -> 32 bit, 1 -> 64 bit
2940  * opc: 00 -> N, 10 -> Z, 11 -> K
2941  * hw: shift/16 (0,16, and sf only 32, 48)
2942  */
2943 static void disas_movw_imm(DisasContext *s, uint32_t insn)
2944 {
2945     int rd = extract32(insn, 0, 5);
2946     uint64_t imm = extract32(insn, 5, 16);
2947     int sf = extract32(insn, 31, 1);
2948     int opc = extract32(insn, 29, 2);
2949     int pos = extract32(insn, 21, 2) << 4;
2950     TCGv_i64 tcg_rd = cpu_reg(s, rd);
2951     TCGv_i64 tcg_imm;
2952
2953     if (!sf && (pos >= 32)) {
2954         unallocated_encoding(s);
2955         return;
2956     }
2957
2958     switch (opc) {
2959     case 0: /* MOVN */
2960     case 2: /* MOVZ */
2961         imm <<= pos;
2962         if (opc == 0) {
2963             imm = ~imm;
2964         }
2965         if (!sf) {
2966             imm &= 0xffffffffu;
2967         }
2968         tcg_gen_movi_i64(tcg_rd, imm);
2969         break;
2970     case 3: /* MOVK */
2971         tcg_imm = tcg_const_i64(imm);
2972         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
2973         tcg_temp_free_i64(tcg_imm);
2974         if (!sf) {
2975             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
2976         }
2977         break;
2978     default:
2979         unallocated_encoding(s);
2980         break;
2981     }
2982 }
2983
2984 /* C3.4.2 Bitfield
2985  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2986  * +----+-----+-------------+---+------+------+------+------+
2987  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
2988  * +----+-----+-------------+---+------+------+------+------+
2989  */
2990 static void disas_bitfield(DisasContext *s, uint32_t insn)
2991 {
2992     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
2993     TCGv_i64 tcg_rd, tcg_tmp;
2994
2995     sf = extract32(insn, 31, 1);
2996     opc = extract32(insn, 29, 2);
2997     n = extract32(insn, 22, 1);
2998     ri = extract32(insn, 16, 6);
2999     si = extract32(insn, 10, 6);
3000     rn = extract32(insn, 5, 5);
3001     rd = extract32(insn, 0, 5);
3002     bitsize = sf ? 64 : 32;
3003
3004     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3005         unallocated_encoding(s);
3006         return;
3007     }
3008
3009     tcg_rd = cpu_reg(s, rd);
3010
3011     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3012        to be smaller than bitsize, we'll never reference data outside the
3013        low 32-bits anyway.  */
3014     tcg_tmp = read_cpu_reg(s, rn, 1);
3015
3016     /* Recognize the common aliases.  */
3017     if (opc == 0) { /* SBFM */
3018         if (ri == 0) {
3019             if (si == 7) { /* SXTB */
3020                 tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3021                 goto done;
3022             } else if (si == 15) { /* SXTH */
3023                 tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3024                 goto done;
3025             } else if (si == 31) { /* SXTW */
3026                 tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3027                 goto done;
3028             }
3029         }
3030         if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3031             if (si == 31) {
3032                 tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3033             }
3034             tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3035             goto done;
3036         }
3037     } else if (opc == 2) { /* UBFM */
3038         if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3039             tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3040             return;
3041         }
3042         if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3043             if (si == 31) {
3044                 tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3045             }
3046             tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3047             return;
3048         }
3049         if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3050             int shift = bitsize - 1 - si;
3051             tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3052             goto done;
3053         }
3054     }
3055
3056     if (opc != 1) { /* SBFM or UBFM */
3057         tcg_gen_movi_i64(tcg_rd, 0);
3058     }
3059
3060     /* do the bit move operation */
3061     if (si >= ri) {
3062         /* Wd<s-r:0> = Wn<s:r> */
3063         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3064         pos = 0;
3065         len = (si - ri) + 1;
3066     } else {
3067         /* Wd<32+s-r,32-r> = Wn<s:0> */
3068         pos = bitsize - ri;
3069         len = si + 1;
3070     }
3071
3072     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3073
3074     if (opc == 0) { /* SBFM - sign extend the destination field */
3075         tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3076         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3077     }
3078
3079  done:
3080     if (!sf) { /* zero extend final result */
3081         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3082     }
3083 }
3084
3085 /* C3.4.3 Extract
3086  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3087  * +----+------+-------------+---+----+------+--------+------+------+
3088  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3089  * +----+------+-------------+---+----+------+--------+------+------+
3090  */
3091 static void disas_extract(DisasContext *s, uint32_t insn)
3092 {
3093     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3094
3095     sf = extract32(insn, 31, 1);
3096     n = extract32(insn, 22, 1);
3097     rm = extract32(insn, 16, 5);
3098     imm = extract32(insn, 10, 6);
3099     rn = extract32(insn, 5, 5);
3100     rd = extract32(insn, 0, 5);
3101     op21 = extract32(insn, 29, 2);
3102     op0 = extract32(insn, 21, 1);
3103     bitsize = sf ? 64 : 32;
3104
3105     if (sf != n || op21 || op0 || imm >= bitsize) {
3106         unallocated_encoding(s);
3107     } else {
3108         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3109
3110         tcg_rd = cpu_reg(s, rd);
3111
3112         if (unlikely(imm == 0)) {
3113             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3114              * so an extract from bit 0 is a special case.
3115              */
3116             if (sf) {
3117                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3118             } else {
3119                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3120             }
3121         } else if (rm == rn) { /* ROR */
3122             tcg_rm = cpu_reg(s, rm);
3123             if (sf) {
3124                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3125             } else {
3126                 TCGv_i32 tmp = tcg_temp_new_i32();
3127                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3128                 tcg_gen_rotri_i32(tmp, tmp, imm);
3129                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3130                 tcg_temp_free_i32(tmp);
3131             }
3132         } else {
3133             tcg_rm = read_cpu_reg(s, rm, sf);
3134             tcg_rn = read_cpu_reg(s, rn, sf);
3135             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3136             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3137             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3138             if (!sf) {
3139                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3140             }
3141         }
3142     }
3143 }
3144
3145 /* C3.4 Data processing - immediate */
3146 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3147 {
3148     switch (extract32(insn, 23, 6)) {
3149     case 0x20: case 0x21: /* PC-rel. addressing */
3150         disas_pc_rel_adr(s, insn);
3151         break;
3152     case 0x22: case 0x23: /* Add/subtract (immediate) */
3153         disas_add_sub_imm(s, insn);
3154         break;
3155     case 0x24: /* Logical (immediate) */
3156         disas_logic_imm(s, insn);
3157         break;
3158     case 0x25: /* Move wide (immediate) */
3159         disas_movw_imm(s, insn);
3160         break;
3161     case 0x26: /* Bitfield */
3162         disas_bitfield(s, insn);
3163         break;
3164     case 0x27: /* Extract */
3165         disas_extract(s, insn);
3166         break;
3167     default:
3168         unallocated_encoding(s);
3169         break;
3170     }
3171 }
3172
3173 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3174  * Note that it is the caller's responsibility to ensure that the
3175  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3176  * mandated semantics for out of range shifts.
3177  */
3178 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3179                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3180 {
3181     switch (shift_type) {
3182     case A64_SHIFT_TYPE_LSL:
3183         tcg_gen_shl_i64(dst, src, shift_amount);
3184         break;
3185     case A64_SHIFT_TYPE_LSR:
3186         tcg_gen_shr_i64(dst, src, shift_amount);
3187         break;
3188     case A64_SHIFT_TYPE_ASR:
3189         if (!sf) {
3190             tcg_gen_ext32s_i64(dst, src);
3191         }
3192         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3193         break;
3194     case A64_SHIFT_TYPE_ROR:
3195         if (sf) {
3196             tcg_gen_rotr_i64(dst, src, shift_amount);
3197         } else {
3198             TCGv_i32 t0, t1;
3199             t0 = tcg_temp_new_i32();
3200             t1 = tcg_temp_new_i32();
3201             tcg_gen_extrl_i64_i32(t0, src);
3202             tcg_gen_extrl_i64_i32(t1, shift_amount);
3203             tcg_gen_rotr_i32(t0, t0, t1);
3204             tcg_gen_extu_i32_i64(dst, t0);
3205             tcg_temp_free_i32(t0);
3206             tcg_temp_free_i32(t1);
3207         }
3208         break;
3209     default:
3210         assert(FALSE); /* all shift types should be handled */
3211         break;
3212     }
3213
3214     if (!sf) { /* zero extend final result */
3215         tcg_gen_ext32u_i64(dst, dst);
3216     }
3217 }
3218
3219 /* Shift a TCGv src by immediate, put result in dst.
3220  * The shift amount must be in range (this should always be true as the
3221  * relevant instructions will UNDEF on bad shift immediates).
3222  */
3223 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3224                           enum a64_shift_type shift_type, unsigned int shift_i)
3225 {
3226     assert(shift_i < (sf ? 64 : 32));
3227
3228     if (shift_i == 0) {
3229         tcg_gen_mov_i64(dst, src);
3230     } else {
3231         TCGv_i64 shift_const;
3232
3233         shift_const = tcg_const_i64(shift_i);
3234         shift_reg(dst, src, sf, shift_type, shift_const);
3235         tcg_temp_free_i64(shift_const);
3236     }
3237 }
3238
3239 /* C3.5.10 Logical (shifted register)
3240  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3241  * +----+-----+-----------+-------+---+------+--------+------+------+
3242  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3243  * +----+-----+-----------+-------+---+------+--------+------+------+
3244  */
3245 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3246 {
3247     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3248     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3249
3250     sf = extract32(insn, 31, 1);
3251     opc = extract32(insn, 29, 2);
3252     shift_type = extract32(insn, 22, 2);
3253     invert = extract32(insn, 21, 1);
3254     rm = extract32(insn, 16, 5);
3255     shift_amount = extract32(insn, 10, 6);
3256     rn = extract32(insn, 5, 5);
3257     rd = extract32(insn, 0, 5);
3258
3259     if (!sf && (shift_amount & (1 << 5))) {
3260         unallocated_encoding(s);
3261         return;
3262     }
3263
3264     tcg_rd = cpu_reg(s, rd);
3265
3266     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3267         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3268          * register-register MOV and MVN, so it is worth special casing.
3269          */
3270         tcg_rm = cpu_reg(s, rm);
3271         if (invert) {
3272             tcg_gen_not_i64(tcg_rd, tcg_rm);
3273             if (!sf) {
3274                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3275             }
3276         } else {
3277             if (sf) {
3278                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3279             } else {
3280                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3281             }
3282         }
3283         return;
3284     }
3285
3286     tcg_rm = read_cpu_reg(s, rm, sf);
3287
3288     if (shift_amount) {
3289         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3290     }
3291
3292     tcg_rn = cpu_reg(s, rn);
3293
3294     switch (opc | (invert << 2)) {
3295     case 0: /* AND */
3296     case 3: /* ANDS */
3297         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3298         break;
3299     case 1: /* ORR */
3300         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3301         break;
3302     case 2: /* EOR */
3303         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3304         break;
3305     case 4: /* BIC */
3306     case 7: /* BICS */
3307         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3308         break;
3309     case 5: /* ORN */
3310         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3311         break;
3312     case 6: /* EON */
3313         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3314         break;
3315     default:
3316         assert(FALSE);
3317         break;
3318     }
3319
3320     if (!sf) {
3321         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3322     }
3323
3324     if (opc == 3) {
3325         gen_logic_CC(sf, tcg_rd);
3326     }
3327 }
3328
3329 /*
3330  * C3.5.1 Add/subtract (extended register)
3331  *
3332  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3333  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3334  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3335  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3336  *
3337  *  sf: 0 -> 32bit, 1 -> 64bit
3338  *  op: 0 -> add  , 1 -> sub
3339  *   S: 1 -> set flags
3340  * opt: 00
3341  * option: extension type (see DecodeRegExtend)
3342  * imm3: optional shift to Rm
3343  *
3344  * Rd = Rn + LSL(extend(Rm), amount)
3345  */
3346 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3347 {
3348     int rd = extract32(insn, 0, 5);
3349     int rn = extract32(insn, 5, 5);
3350     int imm3 = extract32(insn, 10, 3);
3351     int option = extract32(insn, 13, 3);
3352     int rm = extract32(insn, 16, 5);
3353     bool setflags = extract32(insn, 29, 1);
3354     bool sub_op = extract32(insn, 30, 1);
3355     bool sf = extract32(insn, 31, 1);
3356
3357     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3358     TCGv_i64 tcg_rd;
3359     TCGv_i64 tcg_result;
3360
3361     if (imm3 > 4) {
3362         unallocated_encoding(s);
3363         return;
3364     }
3365
3366     /* non-flag setting ops may use SP */
3367     if (!setflags) {
3368         tcg_rd = cpu_reg_sp(s, rd);
3369     } else {
3370         tcg_rd = cpu_reg(s, rd);
3371     }
3372     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3373
3374     tcg_rm = read_cpu_reg(s, rm, sf);
3375     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3376
3377     tcg_result = tcg_temp_new_i64();
3378
3379     if (!setflags) {
3380         if (sub_op) {
3381             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3382         } else {
3383             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3384         }
3385     } else {
3386         if (sub_op) {
3387             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3388         } else {
3389             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3390         }
3391     }
3392
3393     if (sf) {
3394         tcg_gen_mov_i64(tcg_rd, tcg_result);
3395     } else {
3396         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3397     }
3398
3399     tcg_temp_free_i64(tcg_result);
3400 }
3401
3402 /*
3403  * C3.5.2 Add/subtract (shifted register)
3404  *
3405  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3406  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3407  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3408  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3409  *
3410  *    sf: 0 -> 32bit, 1 -> 64bit
3411  *    op: 0 -> add  , 1 -> sub
3412  *     S: 1 -> set flags
3413  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3414  *  imm6: Shift amount to apply to Rm before the add/sub
3415  */
3416 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3417 {
3418     int rd = extract32(insn, 0, 5);
3419     int rn = extract32(insn, 5, 5);
3420     int imm6 = extract32(insn, 10, 6);
3421     int rm = extract32(insn, 16, 5);
3422     int shift_type = extract32(insn, 22, 2);
3423     bool setflags = extract32(insn, 29, 1);
3424     bool sub_op = extract32(insn, 30, 1);
3425     bool sf = extract32(insn, 31, 1);
3426
3427     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3428     TCGv_i64 tcg_rn, tcg_rm;
3429     TCGv_i64 tcg_result;
3430
3431     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3432         unallocated_encoding(s);
3433         return;
3434     }
3435
3436     tcg_rn = read_cpu_reg(s, rn, sf);
3437     tcg_rm = read_cpu_reg(s, rm, sf);
3438
3439     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3440
3441     tcg_result = tcg_temp_new_i64();
3442
3443     if (!setflags) {
3444         if (sub_op) {
3445             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3446         } else {
3447             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3448         }
3449     } else {
3450         if (sub_op) {
3451             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3452         } else {
3453             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3454         }
3455     }
3456
3457     if (sf) {
3458         tcg_gen_mov_i64(tcg_rd, tcg_result);
3459     } else {
3460         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3461     }
3462
3463     tcg_temp_free_i64(tcg_result);
3464 }
3465
3466 /* C3.5.9 Data-processing (3 source)
3467
3468    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3469   +--+------+-----------+------+------+----+------+------+------+
3470   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3471   +--+------+-----------+------+------+----+------+------+------+
3472
3473  */
3474 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3475 {
3476     int rd = extract32(insn, 0, 5);
3477     int rn = extract32(insn, 5, 5);
3478     int ra = extract32(insn, 10, 5);
3479     int rm = extract32(insn, 16, 5);
3480     int op_id = (extract32(insn, 29, 3) << 4) |
3481         (extract32(insn, 21, 3) << 1) |
3482         extract32(insn, 15, 1);
3483     bool sf = extract32(insn, 31, 1);
3484     bool is_sub = extract32(op_id, 0, 1);
3485     bool is_high = extract32(op_id, 2, 1);
3486     bool is_signed = false;
3487     TCGv_i64 tcg_op1;
3488     TCGv_i64 tcg_op2;
3489     TCGv_i64 tcg_tmp;
3490
3491     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3492     switch (op_id) {
3493     case 0x42: /* SMADDL */
3494     case 0x43: /* SMSUBL */
3495     case 0x44: /* SMULH */
3496         is_signed = true;
3497         break;
3498     case 0x0: /* MADD (32bit) */
3499     case 0x1: /* MSUB (32bit) */
3500     case 0x40: /* MADD (64bit) */
3501     case 0x41: /* MSUB (64bit) */
3502     case 0x4a: /* UMADDL */
3503     case 0x4b: /* UMSUBL */
3504     case 0x4c: /* UMULH */
3505         break;
3506     default:
3507         unallocated_encoding(s);
3508         return;
3509     }
3510
3511     if (is_high) {
3512         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3513         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3514         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3515         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3516
3517         if (is_signed) {
3518             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3519         } else {
3520             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3521         }
3522
3523         tcg_temp_free_i64(low_bits);
3524         return;
3525     }
3526
3527     tcg_op1 = tcg_temp_new_i64();
3528     tcg_op2 = tcg_temp_new_i64();
3529     tcg_tmp = tcg_temp_new_i64();
3530
3531     if (op_id < 0x42) {
3532         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3533         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3534     } else {
3535         if (is_signed) {
3536             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3537             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3538         } else {
3539             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3540             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3541         }
3542     }
3543
3544     if (ra == 31 && !is_sub) {
3545         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3546         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3547     } else {
3548         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3549         if (is_sub) {
3550             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3551         } else {
3552             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3553         }
3554     }
3555
3556     if (!sf) {
3557         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3558     }
3559
3560     tcg_temp_free_i64(tcg_op1);
3561     tcg_temp_free_i64(tcg_op2);
3562     tcg_temp_free_i64(tcg_tmp);
3563 }
3564
3565 /* C3.5.3 - Add/subtract (with carry)
3566  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3567  * +--+--+--+------------------------+------+---------+------+-----+
3568  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3569  * +--+--+--+------------------------+------+---------+------+-----+
3570  *                                            [000000]
3571  */
3572
3573 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3574 {
3575     unsigned int sf, op, setflags, rm, rn, rd;
3576     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3577
3578     if (extract32(insn, 10, 6) != 0) {
3579         unallocated_encoding(s);
3580         return;
3581     }
3582
3583     sf = extract32(insn, 31, 1);
3584     op = extract32(insn, 30, 1);
3585     setflags = extract32(insn, 29, 1);
3586     rm = extract32(insn, 16, 5);
3587     rn = extract32(insn, 5, 5);
3588     rd = extract32(insn, 0, 5);
3589
3590     tcg_rd = cpu_reg(s, rd);
3591     tcg_rn = cpu_reg(s, rn);
3592
3593     if (op) {
3594         tcg_y = new_tmp_a64(s);
3595         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3596     } else {
3597         tcg_y = cpu_reg(s, rm);
3598     }
3599
3600     if (setflags) {
3601         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3602     } else {
3603         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3604     }
3605 }
3606
3607 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3608  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3609  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3610  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3611  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3612  *        [1]                             y                [0]       [0]
3613  */
3614 static void disas_cc(DisasContext *s, uint32_t insn)
3615 {
3616     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3617     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3618     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3619     DisasCompare c;
3620
3621     if (!extract32(insn, 29, 1)) {
3622         unallocated_encoding(s);
3623         return;
3624     }
3625     if (insn & (1 << 10 | 1 << 4)) {
3626         unallocated_encoding(s);
3627         return;
3628     }
3629     sf = extract32(insn, 31, 1);
3630     op = extract32(insn, 30, 1);
3631     is_imm = extract32(insn, 11, 1);
3632     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3633     cond = extract32(insn, 12, 4);
3634     rn = extract32(insn, 5, 5);
3635     nzcv = extract32(insn, 0, 4);
3636
3637     /* Set T0 = !COND.  */
3638     tcg_t0 = tcg_temp_new_i32();
3639     arm_test_cc(&c, cond);
3640     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3641     arm_free_cc(&c);
3642
3643     /* Load the arguments for the new comparison.  */
3644     if (is_imm) {
3645         tcg_y = new_tmp_a64(s);
3646         tcg_gen_movi_i64(tcg_y, y);
3647     } else {
3648         tcg_y = cpu_reg(s, y);
3649     }
3650     tcg_rn = cpu_reg(s, rn);
3651
3652     /* Set the flags for the new comparison.  */
3653     tcg_tmp = tcg_temp_new_i64();
3654     if (op) {
3655         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3656     } else {
3657         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3658     }
3659     tcg_temp_free_i64(tcg_tmp);
3660
3661     /* If COND was false, force the flags to #nzcv.  Compute two masks
3662      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3663      * For tcg hosts that support ANDC, we can make do with just T1.
3664      * In either case, allow the tcg optimizer to delete any unused mask.
3665      */
3666     tcg_t1 = tcg_temp_new_i32();
3667     tcg_t2 = tcg_temp_new_i32();
3668     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3669     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3670
3671     if (nzcv & 8) { /* N */
3672         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3673     } else {
3674         if (TCG_TARGET_HAS_andc_i32) {
3675             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3676         } else {
3677             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3678         }
3679     }
3680     if (nzcv & 4) { /* Z */
3681         if (TCG_TARGET_HAS_andc_i32) {
3682             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3683         } else {
3684             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3685         }
3686     } else {
3687         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3688     }
3689     if (nzcv & 2) { /* C */
3690         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3691     } else {
3692         if (TCG_TARGET_HAS_andc_i32) {
3693             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3694         } else {
3695             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3696         }
3697     }
3698     if (nzcv & 1) { /* V */
3699         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3700     } else {
3701         if (TCG_TARGET_HAS_andc_i32) {
3702             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3703         } else {
3704             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3705         }
3706     }
3707     tcg_temp_free_i32(tcg_t0);
3708     tcg_temp_free_i32(tcg_t1);
3709     tcg_temp_free_i32(tcg_t2);
3710 }
3711
3712 /* C3.5.6 Conditional select
3713  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3714  * +----+----+---+-----------------+------+------+-----+------+------+
3715  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3716  * +----+----+---+-----------------+------+------+-----+------+------+
3717  */
3718 static void disas_cond_select(DisasContext *s, uint32_t insn)
3719 {
3720     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3721     TCGv_i64 tcg_rd, zero;
3722     DisasCompare64 c;
3723
3724     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3725         /* S == 1 or op2<1> == 1 */
3726         unallocated_encoding(s);
3727         return;
3728     }
3729     sf = extract32(insn, 31, 1);
3730     else_inv = extract32(insn, 30, 1);
3731     rm = extract32(insn, 16, 5);
3732     cond = extract32(insn, 12, 4);
3733     else_inc = extract32(insn, 10, 1);
3734     rn = extract32(insn, 5, 5);
3735     rd = extract32(insn, 0, 5);
3736
3737     tcg_rd = cpu_reg(s, rd);
3738
3739     a64_test_cc(&c, cond);
3740     zero = tcg_const_i64(0);
3741
3742     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3743         /* CSET & CSETM.  */
3744         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3745         if (else_inv) {
3746             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3747         }
3748     } else {
3749         TCGv_i64 t_true = cpu_reg(s, rn);
3750         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3751         if (else_inv && else_inc) {
3752             tcg_gen_neg_i64(t_false, t_false);
3753         } else if (else_inv) {
3754             tcg_gen_not_i64(t_false, t_false);
3755         } else if (else_inc) {
3756             tcg_gen_addi_i64(t_false, t_false, 1);
3757         }
3758         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3759     }
3760
3761     tcg_temp_free_i64(zero);
3762     a64_free_cc(&c);
3763
3764     if (!sf) {
3765         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3766     }
3767 }
3768
3769 static void handle_clz(DisasContext *s, unsigned int sf,
3770                        unsigned int rn, unsigned int rd)
3771 {
3772     TCGv_i64 tcg_rd, tcg_rn;
3773     tcg_rd = cpu_reg(s, rd);
3774     tcg_rn = cpu_reg(s, rn);
3775
3776     if (sf) {
3777         gen_helper_clz64(tcg_rd, tcg_rn);
3778     } else {
3779         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3780         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3781         gen_helper_clz(tcg_tmp32, tcg_tmp32);
3782         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3783         tcg_temp_free_i32(tcg_tmp32);
3784     }
3785 }
3786
3787 static void handle_cls(DisasContext *s, unsigned int sf,
3788                        unsigned int rn, unsigned int rd)
3789 {
3790     TCGv_i64 tcg_rd, tcg_rn;
3791     tcg_rd = cpu_reg(s, rd);
3792     tcg_rn = cpu_reg(s, rn);
3793
3794     if (sf) {
3795         gen_helper_cls64(tcg_rd, tcg_rn);
3796     } else {
3797         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3798         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3799         gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3800         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3801         tcg_temp_free_i32(tcg_tmp32);
3802     }
3803 }
3804
3805 static void handle_rbit(DisasContext *s, unsigned int sf,
3806                         unsigned int rn, unsigned int rd)
3807 {
3808     TCGv_i64 tcg_rd, tcg_rn;
3809     tcg_rd = cpu_reg(s, rd);
3810     tcg_rn = cpu_reg(s, rn);
3811
3812     if (sf) {
3813         gen_helper_rbit64(tcg_rd, tcg_rn);
3814     } else {
3815         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3816         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3817         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3818         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3819         tcg_temp_free_i32(tcg_tmp32);
3820     }
3821 }
3822
3823 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3824 static void handle_rev64(DisasContext *s, unsigned int sf,
3825                          unsigned int rn, unsigned int rd)
3826 {
3827     if (!sf) {
3828         unallocated_encoding(s);
3829         return;
3830     }
3831     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3832 }
3833
3834 /* C5.6.149 REV with sf==0, opcode==2
3835  * C5.6.151 REV32 (sf==1, opcode==2)
3836  */
3837 static void handle_rev32(DisasContext *s, unsigned int sf,
3838                          unsigned int rn, unsigned int rd)
3839 {
3840     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3841
3842     if (sf) {
3843         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3844         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3845
3846         /* bswap32_i64 requires zero high word */
3847         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3848         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3849         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3850         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3851         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3852
3853         tcg_temp_free_i64(tcg_tmp);
3854     } else {
3855         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3856         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3857     }
3858 }
3859
3860 /* C5.6.150 REV16 (opcode==1) */
3861 static void handle_rev16(DisasContext *s, unsigned int sf,
3862                          unsigned int rn, unsigned int rd)
3863 {
3864     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3865     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3866     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3867
3868     tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3869     tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3870
3871     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3872     tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3873     tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3874     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3875
3876     if (sf) {
3877         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3878         tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3879         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3880         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
3881
3882         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
3883         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3884         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
3885     }
3886
3887     tcg_temp_free_i64(tcg_tmp);
3888 }
3889
3890 /* C3.5.7 Data-processing (1 source)
3891  *   31  30  29  28             21 20     16 15    10 9    5 4    0
3892  * +----+---+---+-----------------+---------+--------+------+------+
3893  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
3894  * +----+---+---+-----------------+---------+--------+------+------+
3895  */
3896 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
3897 {
3898     unsigned int sf, opcode, rn, rd;
3899
3900     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
3901         unallocated_encoding(s);
3902         return;
3903     }
3904
3905     sf = extract32(insn, 31, 1);
3906     opcode = extract32(insn, 10, 6);
3907     rn = extract32(insn, 5, 5);
3908     rd = extract32(insn, 0, 5);
3909
3910     switch (opcode) {
3911     case 0: /* RBIT */
3912         handle_rbit(s, sf, rn, rd);
3913         break;
3914     case 1: /* REV16 */
3915         handle_rev16(s, sf, rn, rd);
3916         break;
3917     case 2: /* REV32 */
3918         handle_rev32(s, sf, rn, rd);
3919         break;
3920     case 3: /* REV64 */
3921         handle_rev64(s, sf, rn, rd);
3922         break;
3923     case 4: /* CLZ */
3924         handle_clz(s, sf, rn, rd);
3925         break;
3926     case 5: /* CLS */
3927         handle_cls(s, sf, rn, rd);
3928         break;
3929     }
3930 }
3931
3932 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
3933                        unsigned int rm, unsigned int rn, unsigned int rd)
3934 {
3935     TCGv_i64 tcg_n, tcg_m, tcg_rd;
3936     tcg_rd = cpu_reg(s, rd);
3937
3938     if (!sf && is_signed) {
3939         tcg_n = new_tmp_a64(s);
3940         tcg_m = new_tmp_a64(s);
3941         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
3942         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
3943     } else {
3944         tcg_n = read_cpu_reg(s, rn, sf);
3945         tcg_m = read_cpu_reg(s, rm, sf);
3946     }
3947
3948     if (is_signed) {
3949         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
3950     } else {
3951         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
3952     }
3953
3954     if (!sf) { /* zero extend final result */
3955         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3956     }
3957 }
3958
3959 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
3960 static void handle_shift_reg(DisasContext *s,
3961                              enum a64_shift_type shift_type, unsigned int sf,
3962                              unsigned int rm, unsigned int rn, unsigned int rd)
3963 {
3964     TCGv_i64 tcg_shift = tcg_temp_new_i64();
3965     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3966     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3967
3968     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
3969     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
3970     tcg_temp_free_i64(tcg_shift);
3971 }
3972
3973 /* CRC32[BHWX], CRC32C[BHWX] */
3974 static void handle_crc32(DisasContext *s,
3975                          unsigned int sf, unsigned int sz, bool crc32c,
3976                          unsigned int rm, unsigned int rn, unsigned int rd)
3977 {
3978     TCGv_i64 tcg_acc, tcg_val;
3979     TCGv_i32 tcg_bytes;
3980
3981     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
3982         || (sf == 1 && sz != 3)
3983         || (sf == 0 && sz == 3)) {
3984         unallocated_encoding(s);
3985         return;
3986     }
3987
3988     if (sz == 3) {
3989         tcg_val = cpu_reg(s, rm);
3990     } else {
3991         uint64_t mask;
3992         switch (sz) {
3993         case 0:
3994             mask = 0xFF;
3995             break;
3996         case 1:
3997             mask = 0xFFFF;
3998             break;
3999         case 2:
4000             mask = 0xFFFFFFFF;
4001             break;
4002         default:
4003             g_assert_not_reached();
4004         }
4005         tcg_val = new_tmp_a64(s);
4006         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4007     }
4008
4009     tcg_acc = cpu_reg(s, rn);
4010     tcg_bytes = tcg_const_i32(1 << sz);
4011
4012     if (crc32c) {
4013         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4014     } else {
4015         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4016     }
4017
4018     tcg_temp_free_i32(tcg_bytes);
4019 }
4020
4021 /* C3.5.8 Data-processing (2 source)
4022  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4023  * +----+---+---+-----------------+------+--------+------+------+
4024  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4025  * +----+---+---+-----------------+------+--------+------+------+
4026  */
4027 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4028 {
4029     unsigned int sf, rm, opcode, rn, rd;
4030     sf = extract32(insn, 31, 1);
4031     rm = extract32(insn, 16, 5);
4032     opcode = extract32(insn, 10, 6);
4033     rn = extract32(insn, 5, 5);
4034     rd = extract32(insn, 0, 5);
4035
4036     if (extract32(insn, 29, 1)) {
4037         unallocated_encoding(s);
4038         return;
4039     }
4040
4041     switch (opcode) {
4042     case 2: /* UDIV */
4043         handle_div(s, false, sf, rm, rn, rd);
4044         break;
4045     case 3: /* SDIV */
4046         handle_div(s, true, sf, rm, rn, rd);
4047         break;
4048     case 8: /* LSLV */
4049         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4050         break;
4051     case 9: /* LSRV */
4052         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4053         break;
4054     case 10: /* ASRV */
4055         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4056         break;
4057     case 11: /* RORV */
4058         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4059         break;
4060     case 16:
4061     case 17:
4062     case 18:
4063     case 19:
4064     case 20:
4065     case 21:
4066     case 22:
4067     case 23: /* CRC32 */
4068     {
4069         int sz = extract32(opcode, 0, 2);
4070         bool crc32c = extract32(opcode, 2, 1);
4071         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4072         break;
4073     }
4074     default:
4075         unallocated_encoding(s);
4076         break;
4077     }
4078 }
4079
4080 /* C3.5 Data processing - register */
4081 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4082 {
4083     switch (extract32(insn, 24, 5)) {
4084     case 0x0a: /* Logical (shifted register) */
4085         disas_logic_reg(s, insn);
4086         break;
4087     case 0x0b: /* Add/subtract */
4088         if (insn & (1 << 21)) { /* (extended register) */
4089             disas_add_sub_ext_reg(s, insn);
4090         } else {
4091             disas_add_sub_reg(s, insn);
4092         }
4093         break;
4094     case 0x1b: /* Data-processing (3 source) */
4095         disas_data_proc_3src(s, insn);
4096         break;
4097     case 0x1a:
4098         switch (extract32(insn, 21, 3)) {
4099         case 0x0: /* Add/subtract (with carry) */
4100             disas_adc_sbc(s, insn);
4101             break;
4102         case 0x2: /* Conditional compare */
4103             disas_cc(s, insn); /* both imm and reg forms */
4104             break;
4105         case 0x4: /* Conditional select */
4106             disas_cond_select(s, insn);
4107             break;
4108         case 0x6: /* Data-processing */
4109             if (insn & (1 << 30)) { /* (1 source) */
4110                 disas_data_proc_1src(s, insn);
4111             } else {            /* (2 source) */
4112                 disas_data_proc_2src(s, insn);
4113             }
4114             break;
4115         default:
4116             unallocated_encoding(s);
4117             break;
4118         }
4119         break;
4120     default:
4121         unallocated_encoding(s);
4122         break;
4123     }
4124 }
4125
4126 static void handle_fp_compare(DisasContext *s, bool is_double,
4127                               unsigned int rn, unsigned int rm,
4128                               bool cmp_with_zero, bool signal_all_nans)
4129 {
4130     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4131     TCGv_ptr fpst = get_fpstatus_ptr();
4132
4133     if (is_double) {
4134         TCGv_i64 tcg_vn, tcg_vm;
4135
4136         tcg_vn = read_fp_dreg(s, rn);
4137         if (cmp_with_zero) {
4138             tcg_vm = tcg_const_i64(0);
4139         } else {
4140             tcg_vm = read_fp_dreg(s, rm);
4141         }
4142         if (signal_all_nans) {
4143             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4144         } else {
4145             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4146         }
4147         tcg_temp_free_i64(tcg_vn);
4148         tcg_temp_free_i64(tcg_vm);
4149     } else {
4150         TCGv_i32 tcg_vn, tcg_vm;
4151
4152         tcg_vn = read_fp_sreg(s, rn);
4153         if (cmp_with_zero) {
4154             tcg_vm = tcg_const_i32(0);
4155         } else {
4156             tcg_vm = read_fp_sreg(s, rm);
4157         }
4158         if (signal_all_nans) {
4159             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4160         } else {
4161             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4162         }
4163         tcg_temp_free_i32(tcg_vn);
4164         tcg_temp_free_i32(tcg_vm);
4165     }
4166
4167     tcg_temp_free_ptr(fpst);
4168
4169     gen_set_nzcv(tcg_flags);
4170
4171     tcg_temp_free_i64(tcg_flags);
4172 }
4173
4174 /* C3.6.22 Floating point compare
4175  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4176  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4177  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4178  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4179  */
4180 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4181 {
4182     unsigned int mos, type, rm, op, rn, opc, op2r;
4183
4184     mos = extract32(insn, 29, 3);
4185     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4186     rm = extract32(insn, 16, 5);
4187     op = extract32(insn, 14, 2);
4188     rn = extract32(insn, 5, 5);
4189     opc = extract32(insn, 3, 2);
4190     op2r = extract32(insn, 0, 3);
4191
4192     if (mos || op || op2r || type > 1) {
4193         unallocated_encoding(s);
4194         return;
4195     }
4196
4197     if (!fp_access_check(s)) {
4198         return;
4199     }
4200
4201     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4202 }
4203
4204 /* C3.6.23 Floating point conditional compare
4205  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4206  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4207  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4208  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4209  */
4210 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4211 {
4212     unsigned int mos, type, rm, cond, rn, op, nzcv;
4213     TCGv_i64 tcg_flags;
4214     TCGLabel *label_continue = NULL;
4215
4216     mos = extract32(insn, 29, 3);
4217     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4218     rm = extract32(insn, 16, 5);
4219     cond = extract32(insn, 12, 4);
4220     rn = extract32(insn, 5, 5);
4221     op = extract32(insn, 4, 1);
4222     nzcv = extract32(insn, 0, 4);
4223
4224     if (mos || type > 1) {
4225         unallocated_encoding(s);
4226         return;
4227     }
4228
4229     if (!fp_access_check(s)) {
4230         return;
4231     }
4232
4233     if (cond < 0x0e) { /* not always */
4234         TCGLabel *label_match = gen_new_label();
4235         label_continue = gen_new_label();
4236         arm_gen_test_cc(cond, label_match);
4237         /* nomatch: */
4238         tcg_flags = tcg_const_i64(nzcv << 28);
4239         gen_set_nzcv(tcg_flags);
4240         tcg_temp_free_i64(tcg_flags);
4241         tcg_gen_br(label_continue);
4242         gen_set_label(label_match);
4243     }
4244
4245     handle_fp_compare(s, type, rn, rm, false, op);
4246
4247     if (cond < 0x0e) {
4248         gen_set_label(label_continue);
4249     }
4250 }
4251
4252 /* C3.6.24 Floating point conditional select
4253  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4254  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4255  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4256  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4257  */
4258 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4259 {
4260     unsigned int mos, type, rm, cond, rn, rd;
4261     TCGv_i64 t_true, t_false, t_zero;
4262     DisasCompare64 c;
4263
4264     mos = extract32(insn, 29, 3);
4265     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4266     rm = extract32(insn, 16, 5);
4267     cond = extract32(insn, 12, 4);
4268     rn = extract32(insn, 5, 5);
4269     rd = extract32(insn, 0, 5);
4270
4271     if (mos || type > 1) {
4272         unallocated_encoding(s);
4273         return;
4274     }
4275
4276     if (!fp_access_check(s)) {
4277         return;
4278     }
4279
4280     /* Zero extend sreg inputs to 64 bits now.  */
4281     t_true = tcg_temp_new_i64();
4282     t_false = tcg_temp_new_i64();
4283     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4284     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4285
4286     a64_test_cc(&c, cond);
4287     t_zero = tcg_const_i64(0);
4288     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4289     tcg_temp_free_i64(t_zero);
4290     tcg_temp_free_i64(t_false);
4291     a64_free_cc(&c);
4292
4293     /* Note that sregs write back zeros to the high bits,
4294        and we've already done the zero-extension.  */
4295     write_fp_dreg(s, rd, t_true);
4296     tcg_temp_free_i64(t_true);
4297 }
4298
4299 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4300 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4301 {
4302     TCGv_ptr fpst;
4303     TCGv_i32 tcg_op;
4304     TCGv_i32 tcg_res;
4305
4306     fpst = get_fpstatus_ptr();
4307     tcg_op = read_fp_sreg(s, rn);
4308     tcg_res = tcg_temp_new_i32();
4309
4310     switch (opcode) {
4311     case 0x0: /* FMOV */
4312         tcg_gen_mov_i32(tcg_res, tcg_op);
4313         break;
4314     case 0x1: /* FABS */
4315         gen_helper_vfp_abss(tcg_res, tcg_op);
4316         break;
4317     case 0x2: /* FNEG */
4318         gen_helper_vfp_negs(tcg_res, tcg_op);
4319         break;
4320     case 0x3: /* FSQRT */
4321         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4322         break;
4323     case 0x8: /* FRINTN */
4324     case 0x9: /* FRINTP */
4325     case 0xa: /* FRINTM */
4326     case 0xb: /* FRINTZ */
4327     case 0xc: /* FRINTA */
4328     {
4329         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4330
4331         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4332         gen_helper_rints(tcg_res, tcg_op, fpst);
4333
4334         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4335         tcg_temp_free_i32(tcg_rmode);
4336         break;
4337     }
4338     case 0xe: /* FRINTX */
4339         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4340         break;
4341     case 0xf: /* FRINTI */
4342         gen_helper_rints(tcg_res, tcg_op, fpst);
4343         break;
4344     default:
4345         abort();
4346     }
4347
4348     write_fp_sreg(s, rd, tcg_res);
4349
4350     tcg_temp_free_ptr(fpst);
4351     tcg_temp_free_i32(tcg_op);
4352     tcg_temp_free_i32(tcg_res);
4353 }
4354
4355 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4356 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4357 {
4358     TCGv_ptr fpst;
4359     TCGv_i64 tcg_op;
4360     TCGv_i64 tcg_res;
4361
4362     fpst = get_fpstatus_ptr();
4363     tcg_op = read_fp_dreg(s, rn);
4364     tcg_res = tcg_temp_new_i64();
4365
4366     switch (opcode) {
4367     case 0x0: /* FMOV */
4368         tcg_gen_mov_i64(tcg_res, tcg_op);
4369         break;
4370     case 0x1: /* FABS */
4371         gen_helper_vfp_absd(tcg_res, tcg_op);
4372         break;
4373     case 0x2: /* FNEG */
4374         gen_helper_vfp_negd(tcg_res, tcg_op);
4375         break;
4376     case 0x3: /* FSQRT */
4377         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4378         break;
4379     case 0x8: /* FRINTN */
4380     case 0x9: /* FRINTP */
4381     case 0xa: /* FRINTM */
4382     case 0xb: /* FRINTZ */
4383     case 0xc: /* FRINTA */
4384     {
4385         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4386
4387         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4388         gen_helper_rintd(tcg_res, tcg_op, fpst);
4389
4390         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4391         tcg_temp_free_i32(tcg_rmode);
4392         break;
4393     }
4394     case 0xe: /* FRINTX */
4395         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4396         break;
4397     case 0xf: /* FRINTI */
4398         gen_helper_rintd(tcg_res, tcg_op, fpst);
4399         break;
4400     default:
4401         abort();
4402     }
4403
4404     write_fp_dreg(s, rd, tcg_res);
4405
4406     tcg_temp_free_ptr(fpst);
4407     tcg_temp_free_i64(tcg_op);
4408     tcg_temp_free_i64(tcg_res);
4409 }
4410
4411 static void handle_fp_fcvt(DisasContext *s, int opcode,
4412                            int rd, int rn, int dtype, int ntype)
4413 {
4414     switch (ntype) {
4415     case 0x0:
4416     {
4417         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4418         if (dtype == 1) {
4419             /* Single to double */
4420             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4421             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4422             write_fp_dreg(s, rd, tcg_rd);
4423             tcg_temp_free_i64(tcg_rd);
4424         } else {
4425             /* Single to half */
4426             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4427             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4428             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4429             write_fp_sreg(s, rd, tcg_rd);
4430             tcg_temp_free_i32(tcg_rd);
4431         }
4432         tcg_temp_free_i32(tcg_rn);
4433         break;
4434     }
4435     case 0x1:
4436     {
4437         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4438         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4439         if (dtype == 0) {
4440             /* Double to single */
4441             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4442         } else {
4443             /* Double to half */
4444             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4445             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4446         }
4447         write_fp_sreg(s, rd, tcg_rd);
4448         tcg_temp_free_i32(tcg_rd);
4449         tcg_temp_free_i64(tcg_rn);
4450         break;
4451     }
4452     case 0x3:
4453     {
4454         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4455         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4456         if (dtype == 0) {
4457             /* Half to single */
4458             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4459             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4460             write_fp_sreg(s, rd, tcg_rd);
4461             tcg_temp_free_i32(tcg_rd);
4462         } else {
4463             /* Half to double */
4464             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4465             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4466             write_fp_dreg(s, rd, tcg_rd);
4467             tcg_temp_free_i64(tcg_rd);
4468         }
4469         tcg_temp_free_i32(tcg_rn);
4470         break;
4471     }
4472     default:
4473         abort();
4474     }
4475 }
4476
4477 /* C3.6.25 Floating point data-processing (1 source)
4478  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4479  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4480  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4481  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4482  */
4483 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4484 {
4485     int type = extract32(insn, 22, 2);
4486     int opcode = extract32(insn, 15, 6);
4487     int rn = extract32(insn, 5, 5);
4488     int rd = extract32(insn, 0, 5);
4489
4490     switch (opcode) {
4491     case 0x4: case 0x5: case 0x7:
4492     {
4493         /* FCVT between half, single and double precision */
4494         int dtype = extract32(opcode, 0, 2);
4495         if (type == 2 || dtype == type) {
4496             unallocated_encoding(s);
4497             return;
4498         }
4499         if (!fp_access_check(s)) {
4500             return;
4501         }
4502
4503         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4504         break;
4505     }
4506     case 0x0 ... 0x3:
4507     case 0x8 ... 0xc:
4508     case 0xe ... 0xf:
4509         /* 32-to-32 and 64-to-64 ops */
4510         switch (type) {
4511         case 0:
4512             if (!fp_access_check(s)) {
4513                 return;
4514             }
4515
4516             handle_fp_1src_single(s, opcode, rd, rn);
4517             break;
4518         case 1:
4519             if (!fp_access_check(s)) {
4520                 return;
4521             }
4522
4523             handle_fp_1src_double(s, opcode, rd, rn);
4524             break;
4525         default:
4526             unallocated_encoding(s);
4527         }
4528         break;
4529     default:
4530         unallocated_encoding(s);
4531         break;
4532     }
4533 }
4534
4535 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4536 static void handle_fp_2src_single(DisasContext *s, int opcode,
4537                                   int rd, int rn, int rm)
4538 {
4539     TCGv_i32 tcg_op1;
4540     TCGv_i32 tcg_op2;
4541     TCGv_i32 tcg_res;
4542     TCGv_ptr fpst;
4543
4544     tcg_res = tcg_temp_new_i32();
4545     fpst = get_fpstatus_ptr();
4546     tcg_op1 = read_fp_sreg(s, rn);
4547     tcg_op2 = read_fp_sreg(s, rm);
4548
4549     switch (opcode) {
4550     case 0x0: /* FMUL */
4551         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4552         break;
4553     case 0x1: /* FDIV */
4554         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4555         break;
4556     case 0x2: /* FADD */
4557         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4558         break;
4559     case 0x3: /* FSUB */
4560         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4561         break;
4562     case 0x4: /* FMAX */
4563         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4564         break;
4565     case 0x5: /* FMIN */
4566         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4567         break;
4568     case 0x6: /* FMAXNM */
4569         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4570         break;
4571     case 0x7: /* FMINNM */
4572         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4573         break;
4574     case 0x8: /* FNMUL */
4575         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4576         gen_helper_vfp_negs(tcg_res, tcg_res);
4577         break;
4578     }
4579
4580     write_fp_sreg(s, rd, tcg_res);
4581
4582     tcg_temp_free_ptr(fpst);
4583     tcg_temp_free_i32(tcg_op1);
4584     tcg_temp_free_i32(tcg_op2);
4585     tcg_temp_free_i32(tcg_res);
4586 }
4587
4588 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4589 static void handle_fp_2src_double(DisasContext *s, int opcode,
4590                                   int rd, int rn, int rm)
4591 {
4592     TCGv_i64 tcg_op1;
4593     TCGv_i64 tcg_op2;
4594     TCGv_i64 tcg_res;
4595     TCGv_ptr fpst;
4596
4597     tcg_res = tcg_temp_new_i64();
4598     fpst = get_fpstatus_ptr();
4599     tcg_op1 = read_fp_dreg(s, rn);
4600     tcg_op2 = read_fp_dreg(s, rm);
4601
4602     switch (opcode) {
4603     case 0x0: /* FMUL */
4604         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4605         break;
4606     case 0x1: /* FDIV */
4607         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4608         break;
4609     case 0x2: /* FADD */
4610         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4611         break;
4612     case 0x3: /* FSUB */
4613         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4614         break;
4615     case 0x4: /* FMAX */
4616         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4617         break;
4618     case 0x5: /* FMIN */
4619         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4620         break;
4621     case 0x6: /* FMAXNM */
4622         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4623         break;
4624     case 0x7: /* FMINNM */
4625         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4626         break;
4627     case 0x8: /* FNMUL */
4628         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4629         gen_helper_vfp_negd(tcg_res, tcg_res);
4630         break;
4631     }
4632
4633     write_fp_dreg(s, rd, tcg_res);
4634
4635     tcg_temp_free_ptr(fpst);
4636     tcg_temp_free_i64(tcg_op1);
4637     tcg_temp_free_i64(tcg_op2);
4638     tcg_temp_free_i64(tcg_res);
4639 }
4640
4641 /* C3.6.26 Floating point data-processing (2 source)
4642  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4643  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4644  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4645  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4646  */
4647 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4648 {
4649     int type = extract32(insn, 22, 2);
4650     int rd = extract32(insn, 0, 5);
4651     int rn = extract32(insn, 5, 5);
4652     int rm = extract32(insn, 16, 5);
4653     int opcode = extract32(insn, 12, 4);
4654
4655     if (opcode > 8) {
4656         unallocated_encoding(s);
4657         return;
4658     }
4659
4660     switch (type) {
4661     case 0:
4662         if (!fp_access_check(s)) {
4663             return;
4664         }
4665         handle_fp_2src_single(s, opcode, rd, rn, rm);
4666         break;
4667     case 1:
4668         if (!fp_access_check(s)) {
4669             return;
4670         }
4671         handle_fp_2src_double(s, opcode, rd, rn, rm);
4672         break;
4673     default:
4674         unallocated_encoding(s);
4675     }
4676 }
4677
4678 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4679 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4680                                   int rd, int rn, int rm, int ra)
4681 {
4682     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4683     TCGv_i32 tcg_res = tcg_temp_new_i32();
4684     TCGv_ptr fpst = get_fpstatus_ptr();
4685
4686     tcg_op1 = read_fp_sreg(s, rn);
4687     tcg_op2 = read_fp_sreg(s, rm);
4688     tcg_op3 = read_fp_sreg(s, ra);
4689
4690     /* These are fused multiply-add, and must be done as one
4691      * floating point operation with no rounding between the
4692      * multiplication and addition steps.
4693      * NB that doing the negations here as separate steps is
4694      * correct : an input NaN should come out with its sign bit
4695      * flipped if it is a negated-input.
4696      */
4697     if (o1 == true) {
4698         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4699     }
4700
4701     if (o0 != o1) {
4702         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4703     }
4704
4705     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4706
4707     write_fp_sreg(s, rd, tcg_res);
4708
4709     tcg_temp_free_ptr(fpst);
4710     tcg_temp_free_i32(tcg_op1);
4711     tcg_temp_free_i32(tcg_op2);
4712     tcg_temp_free_i32(tcg_op3);
4713     tcg_temp_free_i32(tcg_res);
4714 }
4715
4716 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4717 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4718                                   int rd, int rn, int rm, int ra)
4719 {
4720     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4721     TCGv_i64 tcg_res = tcg_temp_new_i64();
4722     TCGv_ptr fpst = get_fpstatus_ptr();
4723
4724     tcg_op1 = read_fp_dreg(s, rn);
4725     tcg_op2 = read_fp_dreg(s, rm);
4726     tcg_op3 = read_fp_dreg(s, ra);
4727
4728     /* These are fused multiply-add, and must be done as one
4729      * floating point operation with no rounding between the
4730      * multiplication and addition steps.
4731      * NB that doing the negations here as separate steps is
4732      * correct : an input NaN should come out with its sign bit
4733      * flipped if it is a negated-input.
4734      */
4735     if (o1 == true) {
4736         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4737     }
4738
4739     if (o0 != o1) {
4740         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4741     }
4742
4743     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4744
4745     write_fp_dreg(s, rd, tcg_res);
4746
4747     tcg_temp_free_ptr(fpst);
4748     tcg_temp_free_i64(tcg_op1);
4749     tcg_temp_free_i64(tcg_op2);
4750     tcg_temp_free_i64(tcg_op3);
4751     tcg_temp_free_i64(tcg_res);
4752 }
4753
4754 /* C3.6.27 Floating point data-processing (3 source)
4755  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4756  * +---+---+---+-----------+------+----+------+----+------+------+------+
4757  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4758  * +---+---+---+-----------+------+----+------+----+------+------+------+
4759  */
4760 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4761 {
4762     int type = extract32(insn, 22, 2);
4763     int rd = extract32(insn, 0, 5);
4764     int rn = extract32(insn, 5, 5);
4765     int ra = extract32(insn, 10, 5);
4766     int rm = extract32(insn, 16, 5);
4767     bool o0 = extract32(insn, 15, 1);
4768     bool o1 = extract32(insn, 21, 1);
4769
4770     switch (type) {
4771     case 0:
4772         if (!fp_access_check(s)) {
4773             return;
4774         }
4775         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4776         break;
4777     case 1:
4778         if (!fp_access_check(s)) {
4779             return;
4780         }
4781         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4782         break;
4783     default:
4784         unallocated_encoding(s);
4785     }
4786 }
4787
4788 /* C3.6.28 Floating point immediate
4789  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4790  * +---+---+---+-----------+------+---+------------+-------+------+------+
4791  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4792  * +---+---+---+-----------+------+---+------------+-------+------+------+
4793  */
4794 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4795 {
4796     int rd = extract32(insn, 0, 5);
4797     int imm8 = extract32(insn, 13, 8);
4798     int is_double = extract32(insn, 22, 2);
4799     uint64_t imm;
4800     TCGv_i64 tcg_res;
4801
4802     if (is_double > 1) {
4803         unallocated_encoding(s);
4804         return;
4805     }
4806
4807     if (!fp_access_check(s)) {
4808         return;
4809     }
4810
4811     /* The imm8 encodes the sign bit, enough bits to represent
4812      * an exponent in the range 01....1xx to 10....0xx,
4813      * and the most significant 4 bits of the mantissa; see
4814      * VFPExpandImm() in the v8 ARM ARM.
4815      */
4816     if (is_double) {
4817         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4818             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4819             extract32(imm8, 0, 6);
4820         imm <<= 48;
4821     } else {
4822         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4823             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4824             (extract32(imm8, 0, 6) << 3);
4825         imm <<= 16;
4826     }
4827
4828     tcg_res = tcg_const_i64(imm);
4829     write_fp_dreg(s, rd, tcg_res);
4830     tcg_temp_free_i64(tcg_res);
4831 }
4832
4833 /* Handle floating point <=> fixed point conversions. Note that we can
4834  * also deal with fp <=> integer conversions as a special case (scale == 64)
4835  * OPTME: consider handling that special case specially or at least skipping
4836  * the call to scalbn in the helpers for zero shifts.
4837  */
4838 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4839                            bool itof, int rmode, int scale, int sf, int type)
4840 {
4841     bool is_signed = !(opcode & 1);
4842     bool is_double = type;
4843     TCGv_ptr tcg_fpstatus;
4844     TCGv_i32 tcg_shift;
4845
4846     tcg_fpstatus = get_fpstatus_ptr();
4847
4848     tcg_shift = tcg_const_i32(64 - scale);
4849
4850     if (itof) {
4851         TCGv_i64 tcg_int = cpu_reg(s, rn);
4852         if (!sf) {
4853             TCGv_i64 tcg_extend = new_tmp_a64(s);
4854
4855             if (is_signed) {
4856                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4857             } else {
4858                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4859             }
4860
4861             tcg_int = tcg_extend;
4862         }
4863
4864         if (is_double) {
4865             TCGv_i64 tcg_double = tcg_temp_new_i64();
4866             if (is_signed) {
4867                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4868                                      tcg_shift, tcg_fpstatus);
4869             } else {
4870                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4871                                      tcg_shift, tcg_fpstatus);
4872             }
4873             write_fp_dreg(s, rd, tcg_double);
4874             tcg_temp_free_i64(tcg_double);
4875         } else {
4876             TCGv_i32 tcg_single = tcg_temp_new_i32();
4877             if (is_signed) {
4878                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4879                                      tcg_shift, tcg_fpstatus);
4880             } else {
4881                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
4882                                      tcg_shift, tcg_fpstatus);
4883             }
4884             write_fp_sreg(s, rd, tcg_single);
4885             tcg_temp_free_i32(tcg_single);
4886         }
4887     } else {
4888         TCGv_i64 tcg_int = cpu_reg(s, rd);
4889         TCGv_i32 tcg_rmode;
4890
4891         if (extract32(opcode, 2, 1)) {
4892             /* There are too many rounding modes to all fit into rmode,
4893              * so FCVTA[US] is a special case.
4894              */
4895             rmode = FPROUNDING_TIEAWAY;
4896         }
4897
4898         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
4899
4900         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4901
4902         if (is_double) {
4903             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
4904             if (is_signed) {
4905                 if (!sf) {
4906                     gen_helper_vfp_tosld(tcg_int, tcg_double,
4907                                          tcg_shift, tcg_fpstatus);
4908                 } else {
4909                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
4910                                          tcg_shift, tcg_fpstatus);
4911                 }
4912             } else {
4913                 if (!sf) {
4914                     gen_helper_vfp_tould(tcg_int, tcg_double,
4915                                          tcg_shift, tcg_fpstatus);
4916                 } else {
4917                     gen_helper_vfp_touqd(tcg_int, tcg_double,
4918                                          tcg_shift, tcg_fpstatus);
4919                 }
4920             }
4921             tcg_temp_free_i64(tcg_double);
4922         } else {
4923             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
4924             if (sf) {
4925                 if (is_signed) {
4926                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
4927                                          tcg_shift, tcg_fpstatus);
4928                 } else {
4929                     gen_helper_vfp_touqs(tcg_int, tcg_single,
4930                                          tcg_shift, tcg_fpstatus);
4931                 }
4932             } else {
4933                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
4934                 if (is_signed) {
4935                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
4936                                          tcg_shift, tcg_fpstatus);
4937                 } else {
4938                     gen_helper_vfp_touls(tcg_dest, tcg_single,
4939                                          tcg_shift, tcg_fpstatus);
4940                 }
4941                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
4942                 tcg_temp_free_i32(tcg_dest);
4943             }
4944             tcg_temp_free_i32(tcg_single);
4945         }
4946
4947         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4948         tcg_temp_free_i32(tcg_rmode);
4949
4950         if (!sf) {
4951             tcg_gen_ext32u_i64(tcg_int, tcg_int);
4952         }
4953     }
4954
4955     tcg_temp_free_ptr(tcg_fpstatus);
4956     tcg_temp_free_i32(tcg_shift);
4957 }
4958
4959 /* C3.6.29 Floating point <-> fixed point conversions
4960  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
4961  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4962  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
4963  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
4964  */
4965 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
4966 {
4967     int rd = extract32(insn, 0, 5);
4968     int rn = extract32(insn, 5, 5);
4969     int scale = extract32(insn, 10, 6);
4970     int opcode = extract32(insn, 16, 3);
4971     int rmode = extract32(insn, 19, 2);
4972     int type = extract32(insn, 22, 2);
4973     bool sbit = extract32(insn, 29, 1);
4974     bool sf = extract32(insn, 31, 1);
4975     bool itof;
4976
4977     if (sbit || (type > 1)
4978         || (!sf && scale < 32)) {
4979         unallocated_encoding(s);
4980         return;
4981     }
4982
4983     switch ((rmode << 3) | opcode) {
4984     case 0x2: /* SCVTF */
4985     case 0x3: /* UCVTF */
4986         itof = true;
4987         break;
4988     case 0x18: /* FCVTZS */
4989     case 0x19: /* FCVTZU */
4990         itof = false;
4991         break;
4992     default:
4993         unallocated_encoding(s);
4994         return;
4995     }
4996
4997     if (!fp_access_check(s)) {
4998         return;
4999     }
5000
5001     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5002 }
5003
5004 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5005 {
5006     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5007      * without conversion.
5008      */
5009
5010     if (itof) {
5011         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5012
5013         switch (type) {
5014         case 0:
5015         {
5016             /* 32 bit */
5017             TCGv_i64 tmp = tcg_temp_new_i64();
5018             tcg_gen_ext32u_i64(tmp, tcg_rn);
5019             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5020             tcg_gen_movi_i64(tmp, 0);
5021             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5022             tcg_temp_free_i64(tmp);
5023             break;
5024         }
5025         case 1:
5026         {
5027             /* 64 bit */
5028             TCGv_i64 tmp = tcg_const_i64(0);
5029             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5030             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5031             tcg_temp_free_i64(tmp);
5032             break;
5033         }
5034         case 2:
5035             /* 64 bit to top half. */
5036             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5037             break;
5038         }
5039     } else {
5040         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5041
5042         switch (type) {
5043         case 0:
5044             /* 32 bit */
5045             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5046             break;
5047         case 1:
5048             /* 64 bit */
5049             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5050             break;
5051         case 2:
5052             /* 64 bits from top half */
5053             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5054             break;
5055         }
5056     }
5057 }
5058
5059 /* C3.6.30 Floating point <-> integer conversions
5060  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5061  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5062  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5063  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5064  */
5065 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5066 {
5067     int rd = extract32(insn, 0, 5);
5068     int rn = extract32(insn, 5, 5);
5069     int opcode = extract32(insn, 16, 3);
5070     int rmode = extract32(insn, 19, 2);
5071     int type = extract32(insn, 22, 2);
5072     bool sbit = extract32(insn, 29, 1);
5073     bool sf = extract32(insn, 31, 1);
5074
5075     if (sbit) {
5076         unallocated_encoding(s);
5077         return;
5078     }
5079
5080     if (opcode > 5) {
5081         /* FMOV */
5082         bool itof = opcode & 1;
5083
5084         if (rmode >= 2) {
5085             unallocated_encoding(s);
5086             return;
5087         }
5088
5089         switch (sf << 3 | type << 1 | rmode) {
5090         case 0x0: /* 32 bit */
5091         case 0xa: /* 64 bit */
5092         case 0xd: /* 64 bit to top half of quad */
5093             break;
5094         default:
5095             /* all other sf/type/rmode combinations are invalid */
5096             unallocated_encoding(s);
5097             break;
5098         }
5099
5100         if (!fp_access_check(s)) {
5101             return;
5102         }
5103         handle_fmov(s, rd, rn, type, itof);
5104     } else {
5105         /* actual FP conversions */
5106         bool itof = extract32(opcode, 1, 1);
5107
5108         if (type > 1 || (rmode != 0 && opcode > 1)) {
5109             unallocated_encoding(s);
5110             return;
5111         }
5112
5113         if (!fp_access_check(s)) {
5114             return;
5115         }
5116         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5117     }
5118 }
5119
5120 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5121  *   31  30  29 28     25 24                          0
5122  * +---+---+---+---------+-----------------------------+
5123  * |   | 0 |   | 1 1 1 1 |                             |
5124  * +---+---+---+---------+-----------------------------+
5125  */
5126 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5127 {
5128     if (extract32(insn, 24, 1)) {
5129         /* Floating point data-processing (3 source) */
5130         disas_fp_3src(s, insn);
5131     } else if (extract32(insn, 21, 1) == 0) {
5132         /* Floating point to fixed point conversions */
5133         disas_fp_fixed_conv(s, insn);
5134     } else {
5135         switch (extract32(insn, 10, 2)) {
5136         case 1:
5137             /* Floating point conditional compare */
5138             disas_fp_ccomp(s, insn);
5139             break;
5140         case 2:
5141             /* Floating point data-processing (2 source) */
5142             disas_fp_2src(s, insn);
5143             break;
5144         case 3:
5145             /* Floating point conditional select */
5146             disas_fp_csel(s, insn);
5147             break;
5148         case 0:
5149             switch (ctz32(extract32(insn, 12, 4))) {
5150             case 0: /* [15:12] == xxx1 */
5151                 /* Floating point immediate */
5152                 disas_fp_imm(s, insn);
5153                 break;
5154             case 1: /* [15:12] == xx10 */
5155                 /* Floating point compare */
5156                 disas_fp_compare(s, insn);
5157                 break;
5158             case 2: /* [15:12] == x100 */
5159                 /* Floating point data-processing (1 source) */
5160                 disas_fp_1src(s, insn);
5161                 break;
5162             case 3: /* [15:12] == 1000 */
5163                 unallocated_encoding(s);
5164                 break;
5165             default: /* [15:12] == 0000 */
5166                 /* Floating point <-> integer conversions */
5167                 disas_fp_int_conv(s, insn);
5168                 break;
5169             }
5170             break;
5171         }
5172     }
5173 }
5174
5175 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5176                      int pos)
5177 {
5178     /* Extract 64 bits from the middle of two concatenated 64 bit
5179      * vector register slices left:right. The extracted bits start
5180      * at 'pos' bits into the right (least significant) side.
5181      * We return the result in tcg_right, and guarantee not to
5182      * trash tcg_left.
5183      */
5184     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5185     assert(pos > 0 && pos < 64);
5186
5187     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5188     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5189     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5190
5191     tcg_temp_free_i64(tcg_tmp);
5192 }
5193
5194 /* C3.6.1 EXT
5195  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5196  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5197  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5198  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5199  */
5200 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5201 {
5202     int is_q = extract32(insn, 30, 1);
5203     int op2 = extract32(insn, 22, 2);
5204     int imm4 = extract32(insn, 11, 4);
5205     int rm = extract32(insn, 16, 5);
5206     int rn = extract32(insn, 5, 5);
5207     int rd = extract32(insn, 0, 5);
5208     int pos = imm4 << 3;
5209     TCGv_i64 tcg_resl, tcg_resh;
5210
5211     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5212         unallocated_encoding(s);
5213         return;
5214     }
5215
5216     if (!fp_access_check(s)) {
5217         return;
5218     }
5219
5220     tcg_resh = tcg_temp_new_i64();
5221     tcg_resl = tcg_temp_new_i64();
5222
5223     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5224      * either extracting 128 bits from a 128:128 concatenation, or
5225      * extracting 64 bits from a 64:64 concatenation.
5226      */
5227     if (!is_q) {
5228         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5229         if (pos != 0) {
5230             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5231             do_ext64(s, tcg_resh, tcg_resl, pos);
5232         }
5233         tcg_gen_movi_i64(tcg_resh, 0);
5234     } else {
5235         TCGv_i64 tcg_hh;
5236         typedef struct {
5237             int reg;
5238             int elt;
5239         } EltPosns;
5240         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5241         EltPosns *elt = eltposns;
5242
5243         if (pos >= 64) {
5244             elt++;
5245             pos -= 64;
5246         }
5247
5248         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5249         elt++;
5250         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5251         elt++;
5252         if (pos != 0) {
5253             do_ext64(s, tcg_resh, tcg_resl, pos);
5254             tcg_hh = tcg_temp_new_i64();
5255             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5256             do_ext64(s, tcg_hh, tcg_resh, pos);
5257             tcg_temp_free_i64(tcg_hh);
5258         }
5259     }
5260
5261     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5262     tcg_temp_free_i64(tcg_resl);
5263     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5264     tcg_temp_free_i64(tcg_resh);
5265 }
5266
5267 /* C3.6.2 TBL/TBX
5268  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5269  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5270  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5271  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5272  */
5273 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5274 {
5275     int op2 = extract32(insn, 22, 2);
5276     int is_q = extract32(insn, 30, 1);
5277     int rm = extract32(insn, 16, 5);
5278     int rn = extract32(insn, 5, 5);
5279     int rd = extract32(insn, 0, 5);
5280     int is_tblx = extract32(insn, 12, 1);
5281     int len = extract32(insn, 13, 2);
5282     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5283     TCGv_i32 tcg_regno, tcg_numregs;
5284
5285     if (op2 != 0) {
5286         unallocated_encoding(s);
5287         return;
5288     }
5289
5290     if (!fp_access_check(s)) {
5291         return;
5292     }
5293
5294     /* This does a table lookup: for every byte element in the input
5295      * we index into a table formed from up to four vector registers,
5296      * and then the output is the result of the lookups. Our helper
5297      * function does the lookup operation for a single 64 bit part of
5298      * the input.
5299      */
5300     tcg_resl = tcg_temp_new_i64();
5301     tcg_resh = tcg_temp_new_i64();
5302
5303     if (is_tblx) {
5304         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5305     } else {
5306         tcg_gen_movi_i64(tcg_resl, 0);
5307     }
5308     if (is_tblx && is_q) {
5309         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5310     } else {
5311         tcg_gen_movi_i64(tcg_resh, 0);
5312     }
5313
5314     tcg_idx = tcg_temp_new_i64();
5315     tcg_regno = tcg_const_i32(rn);
5316     tcg_numregs = tcg_const_i32(len + 1);
5317     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5318     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5319                         tcg_regno, tcg_numregs);
5320     if (is_q) {
5321         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5322         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5323                             tcg_regno, tcg_numregs);
5324     }
5325     tcg_temp_free_i64(tcg_idx);
5326     tcg_temp_free_i32(tcg_regno);
5327     tcg_temp_free_i32(tcg_numregs);
5328
5329     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5330     tcg_temp_free_i64(tcg_resl);
5331     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5332     tcg_temp_free_i64(tcg_resh);
5333 }
5334
5335 /* C3.6.3 ZIP/UZP/TRN
5336  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5337  * +---+---+-------------+------+---+------+---+------------------+------+
5338  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5339  * +---+---+-------------+------+---+------+---+------------------+------+
5340  */
5341 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5342 {
5343     int rd = extract32(insn, 0, 5);
5344     int rn = extract32(insn, 5, 5);
5345     int rm = extract32(insn, 16, 5);
5346     int size = extract32(insn, 22, 2);
5347     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5348      * bit 2 indicates 1 vs 2 variant of the insn.
5349      */
5350     int opcode = extract32(insn, 12, 2);
5351     bool part = extract32(insn, 14, 1);
5352     bool is_q = extract32(insn, 30, 1);
5353     int esize = 8 << size;
5354     int i, ofs;
5355     int datasize = is_q ? 128 : 64;
5356     int elements = datasize / esize;
5357     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5358
5359     if (opcode == 0 || (size == 3 && !is_q)) {
5360         unallocated_encoding(s);
5361         return;
5362     }
5363
5364     if (!fp_access_check(s)) {
5365         return;
5366     }
5367
5368     tcg_resl = tcg_const_i64(0);
5369     tcg_resh = tcg_const_i64(0);
5370     tcg_res = tcg_temp_new_i64();
5371
5372     for (i = 0; i < elements; i++) {
5373         switch (opcode) {
5374         case 1: /* UZP1/2 */
5375         {
5376             int midpoint = elements / 2;
5377             if (i < midpoint) {
5378                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5379             } else {
5380                 read_vec_element(s, tcg_res, rm,
5381                                  2 * (i - midpoint) + part, size);
5382             }
5383             break;
5384         }
5385         case 2: /* TRN1/2 */
5386             if (i & 1) {
5387                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5388             } else {
5389                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5390             }
5391             break;
5392         case 3: /* ZIP1/2 */
5393         {
5394             int base = part * elements / 2;
5395             if (i & 1) {
5396                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5397             } else {
5398                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5399             }
5400             break;
5401         }
5402         default:
5403             g_assert_not_reached();
5404         }
5405
5406         ofs = i * esize;
5407         if (ofs < 64) {
5408             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5409             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5410         } else {
5411             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5412             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5413         }
5414     }
5415
5416     tcg_temp_free_i64(tcg_res);
5417
5418     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5419     tcg_temp_free_i64(tcg_resl);
5420     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5421     tcg_temp_free_i64(tcg_resh);
5422 }
5423
5424 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5425                         int opc, bool is_min, TCGv_ptr fpst)
5426 {
5427     /* Helper function for disas_simd_across_lanes: do a single precision
5428      * min/max operation on the specified two inputs,
5429      * and return the result in tcg_elt1.
5430      */
5431     if (opc == 0xc) {
5432         if (is_min) {
5433             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5434         } else {
5435             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5436         }
5437     } else {
5438         assert(opc == 0xf);
5439         if (is_min) {
5440             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5441         } else {
5442             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5443         }
5444     }
5445 }
5446
5447 /* C3.6.4 AdvSIMD across lanes
5448  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5449  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5450  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5451  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5452  */
5453 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5454 {
5455     int rd = extract32(insn, 0, 5);
5456     int rn = extract32(insn, 5, 5);
5457     int size = extract32(insn, 22, 2);
5458     int opcode = extract32(insn, 12, 5);
5459     bool is_q = extract32(insn, 30, 1);
5460     bool is_u = extract32(insn, 29, 1);
5461     bool is_fp = false;
5462     bool is_min = false;
5463     int esize;
5464     int elements;
5465     int i;
5466     TCGv_i64 tcg_res, tcg_elt;
5467
5468     switch (opcode) {
5469     case 0x1b: /* ADDV */
5470         if (is_u) {
5471             unallocated_encoding(s);
5472             return;
5473         }
5474         /* fall through */
5475     case 0x3: /* SADDLV, UADDLV */
5476     case 0xa: /* SMAXV, UMAXV */
5477     case 0x1a: /* SMINV, UMINV */
5478         if (size == 3 || (size == 2 && !is_q)) {
5479             unallocated_encoding(s);
5480             return;
5481         }
5482         break;
5483     case 0xc: /* FMAXNMV, FMINNMV */
5484     case 0xf: /* FMAXV, FMINV */
5485         if (!is_u || !is_q || extract32(size, 0, 1)) {
5486             unallocated_encoding(s);
5487             return;
5488         }
5489         /* Bit 1 of size field encodes min vs max, and actual size is always
5490          * 32 bits: adjust the size variable so following code can rely on it
5491          */
5492         is_min = extract32(size, 1, 1);
5493         is_fp = true;
5494         size = 2;
5495         break;
5496     default:
5497         unallocated_encoding(s);
5498         return;
5499     }
5500
5501     if (!fp_access_check(s)) {
5502         return;
5503     }
5504
5505     esize = 8 << size;
5506     elements = (is_q ? 128 : 64) / esize;
5507
5508     tcg_res = tcg_temp_new_i64();
5509     tcg_elt = tcg_temp_new_i64();
5510
5511     /* These instructions operate across all lanes of a vector
5512      * to produce a single result. We can guarantee that a 64
5513      * bit intermediate is sufficient:
5514      *  + for [US]ADDLV the maximum element size is 32 bits, and
5515      *    the result type is 64 bits
5516      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5517      *    same as the element size, which is 32 bits at most
5518      * For the integer operations we can choose to work at 64
5519      * or 32 bits and truncate at the end; for simplicity
5520      * we use 64 bits always. The floating point
5521      * ops do require 32 bit intermediates, though.
5522      */
5523     if (!is_fp) {
5524         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5525
5526         for (i = 1; i < elements; i++) {
5527             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5528
5529             switch (opcode) {
5530             case 0x03: /* SADDLV / UADDLV */
5531             case 0x1b: /* ADDV */
5532                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5533                 break;
5534             case 0x0a: /* SMAXV / UMAXV */
5535                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5536                                     tcg_res,
5537                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5538                 break;
5539             case 0x1a: /* SMINV / UMINV */
5540                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5541                                     tcg_res,
5542                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5543                 break;
5544                 break;
5545             default:
5546                 g_assert_not_reached();
5547             }
5548
5549         }
5550     } else {
5551         /* Floating point ops which work on 32 bit (single) intermediates.
5552          * Note that correct NaN propagation requires that we do these
5553          * operations in exactly the order specified by the pseudocode.
5554          */
5555         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5556         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5557         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5558         TCGv_ptr fpst = get_fpstatus_ptr();
5559
5560         assert(esize == 32);
5561         assert(elements == 4);
5562
5563         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5564         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5565         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5566         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5567
5568         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5569
5570         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5571         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5572         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5573         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5574
5575         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5576
5577         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5578
5579         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5580         tcg_temp_free_i32(tcg_elt1);
5581         tcg_temp_free_i32(tcg_elt2);
5582         tcg_temp_free_i32(tcg_elt3);
5583         tcg_temp_free_ptr(fpst);
5584     }
5585
5586     tcg_temp_free_i64(tcg_elt);
5587
5588     /* Now truncate the result to the width required for the final output */
5589     if (opcode == 0x03) {
5590         /* SADDLV, UADDLV: result is 2*esize */
5591         size++;
5592     }
5593
5594     switch (size) {
5595     case 0:
5596         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5597         break;
5598     case 1:
5599         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5600         break;
5601     case 2:
5602         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5603         break;
5604     case 3:
5605         break;
5606     default:
5607         g_assert_not_reached();
5608     }
5609
5610     write_fp_dreg(s, rd, tcg_res);
5611     tcg_temp_free_i64(tcg_res);
5612 }
5613
5614 /* C6.3.31 DUP (Element, Vector)
5615  *
5616  *  31  30   29              21 20    16 15        10  9    5 4    0
5617  * +---+---+-------------------+--------+-------------+------+------+
5618  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5619  * +---+---+-------------------+--------+-------------+------+------+
5620  *
5621  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5622  */
5623 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5624                              int imm5)
5625 {
5626     int size = ctz32(imm5);
5627     int esize = 8 << size;
5628     int elements = (is_q ? 128 : 64) / esize;
5629     int index, i;
5630     TCGv_i64 tmp;
5631
5632     if (size > 3 || (size == 3 && !is_q)) {
5633         unallocated_encoding(s);
5634         return;
5635     }
5636
5637     if (!fp_access_check(s)) {
5638         return;
5639     }
5640
5641     index = imm5 >> (size + 1);
5642
5643     tmp = tcg_temp_new_i64();
5644     read_vec_element(s, tmp, rn, index, size);
5645
5646     for (i = 0; i < elements; i++) {
5647         write_vec_element(s, tmp, rd, i, size);
5648     }
5649
5650     if (!is_q) {
5651         clear_vec_high(s, rd);
5652     }
5653
5654     tcg_temp_free_i64(tmp);
5655 }
5656
5657 /* C6.3.31 DUP (element, scalar)
5658  *  31                   21 20    16 15        10  9    5 4    0
5659  * +-----------------------+--------+-------------+------+------+
5660  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5661  * +-----------------------+--------+-------------+------+------+
5662  */
5663 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5664                               int imm5)
5665 {
5666     int size = ctz32(imm5);
5667     int index;
5668     TCGv_i64 tmp;
5669
5670     if (size > 3) {
5671         unallocated_encoding(s);
5672         return;
5673     }
5674
5675     if (!fp_access_check(s)) {
5676         return;
5677     }
5678
5679     index = imm5 >> (size + 1);
5680
5681     /* This instruction just extracts the specified element and
5682      * zero-extends it into the bottom of the destination register.
5683      */
5684     tmp = tcg_temp_new_i64();
5685     read_vec_element(s, tmp, rn, index, size);
5686     write_fp_dreg(s, rd, tmp);
5687     tcg_temp_free_i64(tmp);
5688 }
5689
5690 /* C6.3.32 DUP (General)
5691  *
5692  *  31  30   29              21 20    16 15        10  9    5 4    0
5693  * +---+---+-------------------+--------+-------------+------+------+
5694  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5695  * +---+---+-------------------+--------+-------------+------+------+
5696  *
5697  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5698  */
5699 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5700                              int imm5)
5701 {
5702     int size = ctz32(imm5);
5703     int esize = 8 << size;
5704     int elements = (is_q ? 128 : 64)/esize;
5705     int i = 0;
5706
5707     if (size > 3 || ((size == 3) && !is_q)) {
5708         unallocated_encoding(s);
5709         return;
5710     }
5711
5712     if (!fp_access_check(s)) {
5713         return;
5714     }
5715
5716     for (i = 0; i < elements; i++) {
5717         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5718     }
5719     if (!is_q) {
5720         clear_vec_high(s, rd);
5721     }
5722 }
5723
5724 /* C6.3.150 INS (Element)
5725  *
5726  *  31                   21 20    16 15  14    11  10 9    5 4    0
5727  * +-----------------------+--------+------------+---+------+------+
5728  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5729  * +-----------------------+--------+------------+---+------+------+
5730  *
5731  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5732  * index: encoded in imm5<4:size+1>
5733  */
5734 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5735                              int imm4, int imm5)
5736 {
5737     int size = ctz32(imm5);
5738     int src_index, dst_index;
5739     TCGv_i64 tmp;
5740
5741     if (size > 3) {
5742         unallocated_encoding(s);
5743         return;
5744     }
5745
5746     if (!fp_access_check(s)) {
5747         return;
5748     }
5749
5750     dst_index = extract32(imm5, 1+size, 5);
5751     src_index = extract32(imm4, size, 4);
5752
5753     tmp = tcg_temp_new_i64();
5754
5755     read_vec_element(s, tmp, rn, src_index, size);
5756     write_vec_element(s, tmp, rd, dst_index, size);
5757
5758     tcg_temp_free_i64(tmp);
5759 }
5760
5761
5762 /* C6.3.151 INS (General)
5763  *
5764  *  31                   21 20    16 15        10  9    5 4    0
5765  * +-----------------------+--------+-------------+------+------+
5766  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5767  * +-----------------------+--------+-------------+------+------+
5768  *
5769  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5770  * index: encoded in imm5<4:size+1>
5771  */
5772 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5773 {
5774     int size = ctz32(imm5);
5775     int idx;
5776
5777     if (size > 3) {
5778         unallocated_encoding(s);
5779         return;
5780     }
5781
5782     if (!fp_access_check(s)) {
5783         return;
5784     }
5785
5786     idx = extract32(imm5, 1 + size, 4 - size);
5787     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5788 }
5789
5790 /*
5791  * C6.3.321 UMOV (General)
5792  * C6.3.237 SMOV (General)
5793  *
5794  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5795  * +---+---+-------------------+--------+-------------+------+------+
5796  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5797  * +---+---+-------------------+--------+-------------+------+------+
5798  *
5799  * U: unsigned when set
5800  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5801  */
5802 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5803                                   int rn, int rd, int imm5)
5804 {
5805     int size = ctz32(imm5);
5806     int element;
5807     TCGv_i64 tcg_rd;
5808
5809     /* Check for UnallocatedEncodings */
5810     if (is_signed) {
5811         if (size > 2 || (size == 2 && !is_q)) {
5812             unallocated_encoding(s);
5813             return;
5814         }
5815     } else {
5816         if (size > 3
5817             || (size < 3 && is_q)
5818             || (size == 3 && !is_q)) {
5819             unallocated_encoding(s);
5820             return;
5821         }
5822     }
5823
5824     if (!fp_access_check(s)) {
5825         return;
5826     }
5827
5828     element = extract32(imm5, 1+size, 4);
5829
5830     tcg_rd = cpu_reg(s, rd);
5831     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5832     if (is_signed && !is_q) {
5833         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5834     }
5835 }
5836
5837 /* C3.6.5 AdvSIMD copy
5838  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5839  * +---+---+----+-----------------+------+---+------+---+------+------+
5840  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5841  * +---+---+----+-----------------+------+---+------+---+------+------+
5842  */
5843 static void disas_simd_copy(DisasContext *s, uint32_t insn)
5844 {
5845     int rd = extract32(insn, 0, 5);
5846     int rn = extract32(insn, 5, 5);
5847     int imm4 = extract32(insn, 11, 4);
5848     int op = extract32(insn, 29, 1);
5849     int is_q = extract32(insn, 30, 1);
5850     int imm5 = extract32(insn, 16, 5);
5851
5852     if (op) {
5853         if (is_q) {
5854             /* INS (element) */
5855             handle_simd_inse(s, rd, rn, imm4, imm5);
5856         } else {
5857             unallocated_encoding(s);
5858         }
5859     } else {
5860         switch (imm4) {
5861         case 0:
5862             /* DUP (element - vector) */
5863             handle_simd_dupe(s, is_q, rd, rn, imm5);
5864             break;
5865         case 1:
5866             /* DUP (general) */
5867             handle_simd_dupg(s, is_q, rd, rn, imm5);
5868             break;
5869         case 3:
5870             if (is_q) {
5871                 /* INS (general) */
5872                 handle_simd_insg(s, rd, rn, imm5);
5873             } else {
5874                 unallocated_encoding(s);
5875             }
5876             break;
5877         case 5:
5878         case 7:
5879             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5880             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
5881             break;
5882         default:
5883             unallocated_encoding(s);
5884             break;
5885         }
5886     }
5887 }
5888
5889 /* C3.6.6 AdvSIMD modified immediate
5890  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
5891  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5892  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
5893  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
5894  *
5895  * There are a number of operations that can be carried out here:
5896  *   MOVI - move (shifted) imm into register
5897  *   MVNI - move inverted (shifted) imm into register
5898  *   ORR  - bitwise OR of (shifted) imm with register
5899  *   BIC  - bitwise clear of (shifted) imm with register
5900  */
5901 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
5902 {
5903     int rd = extract32(insn, 0, 5);
5904     int cmode = extract32(insn, 12, 4);
5905     int cmode_3_1 = extract32(cmode, 1, 3);
5906     int cmode_0 = extract32(cmode, 0, 1);
5907     int o2 = extract32(insn, 11, 1);
5908     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
5909     bool is_neg = extract32(insn, 29, 1);
5910     bool is_q = extract32(insn, 30, 1);
5911     uint64_t imm = 0;
5912     TCGv_i64 tcg_rd, tcg_imm;
5913     int i;
5914
5915     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
5916         unallocated_encoding(s);
5917         return;
5918     }
5919
5920     if (!fp_access_check(s)) {
5921         return;
5922     }
5923
5924     /* See AdvSIMDExpandImm() in ARM ARM */
5925     switch (cmode_3_1) {
5926     case 0: /* Replicate(Zeros(24):imm8, 2) */
5927     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
5928     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
5929     case 3: /* Replicate(imm8:Zeros(24), 2) */
5930     {
5931         int shift = cmode_3_1 * 8;
5932         imm = bitfield_replicate(abcdefgh << shift, 32);
5933         break;
5934     }
5935     case 4: /* Replicate(Zeros(8):imm8, 4) */
5936     case 5: /* Replicate(imm8:Zeros(8), 4) */
5937     {
5938         int shift = (cmode_3_1 & 0x1) * 8;
5939         imm = bitfield_replicate(abcdefgh << shift, 16);
5940         break;
5941     }
5942     case 6:
5943         if (cmode_0) {
5944             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
5945             imm = (abcdefgh << 16) | 0xffff;
5946         } else {
5947             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
5948             imm = (abcdefgh << 8) | 0xff;
5949         }
5950         imm = bitfield_replicate(imm, 32);
5951         break;
5952     case 7:
5953         if (!cmode_0 && !is_neg) {
5954             imm = bitfield_replicate(abcdefgh, 8);
5955         } else if (!cmode_0 && is_neg) {
5956             int i;
5957             imm = 0;
5958             for (i = 0; i < 8; i++) {
5959                 if ((abcdefgh) & (1 << i)) {
5960                     imm |= 0xffULL << (i * 8);
5961                 }
5962             }
5963         } else if (cmode_0) {
5964             if (is_neg) {
5965                 imm = (abcdefgh & 0x3f) << 48;
5966                 if (abcdefgh & 0x80) {
5967                     imm |= 0x8000000000000000ULL;
5968                 }
5969                 if (abcdefgh & 0x40) {
5970                     imm |= 0x3fc0000000000000ULL;
5971                 } else {
5972                     imm |= 0x4000000000000000ULL;
5973                 }
5974             } else {
5975                 imm = (abcdefgh & 0x3f) << 19;
5976                 if (abcdefgh & 0x80) {
5977                     imm |= 0x80000000;
5978                 }
5979                 if (abcdefgh & 0x40) {
5980                     imm |= 0x3e000000;
5981                 } else {
5982                     imm |= 0x40000000;
5983                 }
5984                 imm |= (imm << 32);
5985             }
5986         }
5987         break;
5988     }
5989
5990     if (cmode_3_1 != 7 && is_neg) {
5991         imm = ~imm;
5992     }
5993
5994     tcg_imm = tcg_const_i64(imm);
5995     tcg_rd = new_tmp_a64(s);
5996
5997     for (i = 0; i < 2; i++) {
5998         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
5999
6000         if (i == 1 && !is_q) {
6001             /* non-quad ops clear high half of vector */
6002             tcg_gen_movi_i64(tcg_rd, 0);
6003         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6004             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6005             if (is_neg) {
6006                 /* AND (BIC) */
6007                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6008             } else {
6009                 /* ORR */
6010                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6011             }
6012         } else {
6013             /* MOVI */
6014             tcg_gen_mov_i64(tcg_rd, tcg_imm);
6015         }
6016         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6017     }
6018
6019     tcg_temp_free_i64(tcg_imm);
6020 }
6021
6022 /* C3.6.7 AdvSIMD scalar copy
6023  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6024  * +-----+----+-----------------+------+---+------+---+------+------+
6025  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6026  * +-----+----+-----------------+------+---+------+---+------+------+
6027  */
6028 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6029 {
6030     int rd = extract32(insn, 0, 5);
6031     int rn = extract32(insn, 5, 5);
6032     int imm4 = extract32(insn, 11, 4);
6033     int imm5 = extract32(insn, 16, 5);
6034     int op = extract32(insn, 29, 1);
6035
6036     if (op != 0 || imm4 != 0) {
6037         unallocated_encoding(s);
6038         return;
6039     }
6040
6041     /* DUP (element, scalar) */
6042     handle_simd_dupes(s, rd, rn, imm5);
6043 }
6044
6045 /* C3.6.8 AdvSIMD scalar pairwise
6046  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6047  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6048  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6049  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6050  */
6051 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6052 {
6053     int u = extract32(insn, 29, 1);
6054     int size = extract32(insn, 22, 2);
6055     int opcode = extract32(insn, 12, 5);
6056     int rn = extract32(insn, 5, 5);
6057     int rd = extract32(insn, 0, 5);
6058     TCGv_ptr fpst;
6059
6060     /* For some ops (the FP ones), size[1] is part of the encoding.
6061      * For ADDP strictly it is not but size[1] is always 1 for valid
6062      * encodings.
6063      */
6064     opcode |= (extract32(size, 1, 1) << 5);
6065
6066     switch (opcode) {
6067     case 0x3b: /* ADDP */
6068         if (u || size != 3) {
6069             unallocated_encoding(s);
6070             return;
6071         }
6072         if (!fp_access_check(s)) {
6073             return;
6074         }
6075
6076         TCGV_UNUSED_PTR(fpst);
6077         break;
6078     case 0xc: /* FMAXNMP */
6079     case 0xd: /* FADDP */
6080     case 0xf: /* FMAXP */
6081     case 0x2c: /* FMINNMP */
6082     case 0x2f: /* FMINP */
6083         /* FP op, size[0] is 32 or 64 bit */
6084         if (!u) {
6085             unallocated_encoding(s);
6086             return;
6087         }
6088         if (!fp_access_check(s)) {
6089             return;
6090         }
6091
6092         size = extract32(size, 0, 1) ? 3 : 2;
6093         fpst = get_fpstatus_ptr();
6094         break;
6095     default:
6096         unallocated_encoding(s);
6097         return;
6098     }
6099
6100     if (size == 3) {
6101         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6102         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6103         TCGv_i64 tcg_res = tcg_temp_new_i64();
6104
6105         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6106         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6107
6108         switch (opcode) {
6109         case 0x3b: /* ADDP */
6110             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6111             break;
6112         case 0xc: /* FMAXNMP */
6113             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6114             break;
6115         case 0xd: /* FADDP */
6116             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6117             break;
6118         case 0xf: /* FMAXP */
6119             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6120             break;
6121         case 0x2c: /* FMINNMP */
6122             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6123             break;
6124         case 0x2f: /* FMINP */
6125             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6126             break;
6127         default:
6128             g_assert_not_reached();
6129         }
6130
6131         write_fp_dreg(s, rd, tcg_res);
6132
6133         tcg_temp_free_i64(tcg_op1);
6134         tcg_temp_free_i64(tcg_op2);
6135         tcg_temp_free_i64(tcg_res);
6136     } else {
6137         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6138         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6139         TCGv_i32 tcg_res = tcg_temp_new_i32();
6140
6141         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6142         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6143
6144         switch (opcode) {
6145         case 0xc: /* FMAXNMP */
6146             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6147             break;
6148         case 0xd: /* FADDP */
6149             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6150             break;
6151         case 0xf: /* FMAXP */
6152             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6153             break;
6154         case 0x2c: /* FMINNMP */
6155             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6156             break;
6157         case 0x2f: /* FMINP */
6158             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6159             break;
6160         default:
6161             g_assert_not_reached();
6162         }
6163
6164         write_fp_sreg(s, rd, tcg_res);
6165
6166         tcg_temp_free_i32(tcg_op1);
6167         tcg_temp_free_i32(tcg_op2);
6168         tcg_temp_free_i32(tcg_res);
6169     }
6170
6171     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6172         tcg_temp_free_ptr(fpst);
6173     }
6174 }
6175
6176 /*
6177  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6178  *
6179  * This code is handles the common shifting code and is used by both
6180  * the vector and scalar code.
6181  */
6182 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6183                                     TCGv_i64 tcg_rnd, bool accumulate,
6184                                     bool is_u, int size, int shift)
6185 {
6186     bool extended_result = false;
6187     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6188     int ext_lshift = 0;
6189     TCGv_i64 tcg_src_hi;
6190
6191     if (round && size == 3) {
6192         extended_result = true;
6193         ext_lshift = 64 - shift;
6194         tcg_src_hi = tcg_temp_new_i64();
6195     } else if (shift == 64) {
6196         if (!accumulate && is_u) {
6197             /* result is zero */
6198             tcg_gen_movi_i64(tcg_res, 0);
6199             return;
6200         }
6201     }
6202
6203     /* Deal with the rounding step */
6204     if (round) {
6205         if (extended_result) {
6206             TCGv_i64 tcg_zero = tcg_const_i64(0);
6207             if (!is_u) {
6208                 /* take care of sign extending tcg_res */
6209                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6210                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6211                                  tcg_src, tcg_src_hi,
6212                                  tcg_rnd, tcg_zero);
6213             } else {
6214                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6215                                  tcg_src, tcg_zero,
6216                                  tcg_rnd, tcg_zero);
6217             }
6218             tcg_temp_free_i64(tcg_zero);
6219         } else {
6220             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6221         }
6222     }
6223
6224     /* Now do the shift right */
6225     if (round && extended_result) {
6226         /* extended case, >64 bit precision required */
6227         if (ext_lshift == 0) {
6228             /* special case, only high bits matter */
6229             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6230         } else {
6231             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6232             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6233             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6234         }
6235     } else {
6236         if (is_u) {
6237             if (shift == 64) {
6238                 /* essentially shifting in 64 zeros */
6239                 tcg_gen_movi_i64(tcg_src, 0);
6240             } else {
6241                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6242             }
6243         } else {
6244             if (shift == 64) {
6245                 /* effectively extending the sign-bit */
6246                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6247             } else {
6248                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6249             }
6250         }
6251     }
6252
6253     if (accumulate) {
6254         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6255     } else {
6256         tcg_gen_mov_i64(tcg_res, tcg_src);
6257     }
6258
6259     if (extended_result) {
6260         tcg_temp_free_i64(tcg_src_hi);
6261     }
6262 }
6263
6264 /* Common SHL/SLI - Shift left with an optional insert */
6265 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6266                                  bool insert, int shift)
6267 {
6268     if (insert) { /* SLI */
6269         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6270     } else { /* SHL */
6271         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6272     }
6273 }
6274
6275 /* SRI: shift right with insert */
6276 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6277                                  int size, int shift)
6278 {
6279     int esize = 8 << size;
6280
6281     /* shift count same as element size is valid but does nothing;
6282      * special case to avoid potential shift by 64.
6283      */
6284     if (shift != esize) {
6285         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6286         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6287     }
6288 }
6289
6290 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6291 static void handle_scalar_simd_shri(DisasContext *s,
6292                                     bool is_u, int immh, int immb,
6293                                     int opcode, int rn, int rd)
6294 {
6295     const int size = 3;
6296     int immhb = immh << 3 | immb;
6297     int shift = 2 * (8 << size) - immhb;
6298     bool accumulate = false;
6299     bool round = false;
6300     bool insert = false;
6301     TCGv_i64 tcg_rn;
6302     TCGv_i64 tcg_rd;
6303     TCGv_i64 tcg_round;
6304
6305     if (!extract32(immh, 3, 1)) {
6306         unallocated_encoding(s);
6307         return;
6308     }
6309
6310     if (!fp_access_check(s)) {
6311         return;
6312     }
6313
6314     switch (opcode) {
6315     case 0x02: /* SSRA / USRA (accumulate) */
6316         accumulate = true;
6317         break;
6318     case 0x04: /* SRSHR / URSHR (rounding) */
6319         round = true;
6320         break;
6321     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6322         accumulate = round = true;
6323         break;
6324     case 0x08: /* SRI */
6325         insert = true;
6326         break;
6327     }
6328
6329     if (round) {
6330         uint64_t round_const = 1ULL << (shift - 1);
6331         tcg_round = tcg_const_i64(round_const);
6332     } else {
6333         TCGV_UNUSED_I64(tcg_round);
6334     }
6335
6336     tcg_rn = read_fp_dreg(s, rn);
6337     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6338
6339     if (insert) {
6340         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6341     } else {
6342         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6343                                 accumulate, is_u, size, shift);
6344     }
6345
6346     write_fp_dreg(s, rd, tcg_rd);
6347
6348     tcg_temp_free_i64(tcg_rn);
6349     tcg_temp_free_i64(tcg_rd);
6350     if (round) {
6351         tcg_temp_free_i64(tcg_round);
6352     }
6353 }
6354
6355 /* SHL/SLI - Scalar shift left */
6356 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6357                                     int immh, int immb, int opcode,
6358                                     int rn, int rd)
6359 {
6360     int size = 32 - clz32(immh) - 1;
6361     int immhb = immh << 3 | immb;
6362     int shift = immhb - (8 << size);
6363     TCGv_i64 tcg_rn = new_tmp_a64(s);
6364     TCGv_i64 tcg_rd = new_tmp_a64(s);
6365
6366     if (!extract32(immh, 3, 1)) {
6367         unallocated_encoding(s);
6368         return;
6369     }
6370
6371     if (!fp_access_check(s)) {
6372         return;
6373     }
6374
6375     tcg_rn = read_fp_dreg(s, rn);
6376     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6377
6378     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6379
6380     write_fp_dreg(s, rd, tcg_rd);
6381
6382     tcg_temp_free_i64(tcg_rn);
6383     tcg_temp_free_i64(tcg_rd);
6384 }
6385
6386 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6387  * (signed/unsigned) narrowing */
6388 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6389                                    bool is_u_shift, bool is_u_narrow,
6390                                    int immh, int immb, int opcode,
6391                                    int rn, int rd)
6392 {
6393     int immhb = immh << 3 | immb;
6394     int size = 32 - clz32(immh) - 1;
6395     int esize = 8 << size;
6396     int shift = (2 * esize) - immhb;
6397     int elements = is_scalar ? 1 : (64 / esize);
6398     bool round = extract32(opcode, 0, 1);
6399     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6400     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6401     TCGv_i32 tcg_rd_narrowed;
6402     TCGv_i64 tcg_final;
6403
6404     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6405         { gen_helper_neon_narrow_sat_s8,
6406           gen_helper_neon_unarrow_sat8 },
6407         { gen_helper_neon_narrow_sat_s16,
6408           gen_helper_neon_unarrow_sat16 },
6409         { gen_helper_neon_narrow_sat_s32,
6410           gen_helper_neon_unarrow_sat32 },
6411         { NULL, NULL },
6412     };
6413     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6414         gen_helper_neon_narrow_sat_u8,
6415         gen_helper_neon_narrow_sat_u16,
6416         gen_helper_neon_narrow_sat_u32,
6417         NULL
6418     };
6419     NeonGenNarrowEnvFn *narrowfn;
6420
6421     int i;
6422
6423     assert(size < 4);
6424
6425     if (extract32(immh, 3, 1)) {
6426         unallocated_encoding(s);
6427         return;
6428     }
6429
6430     if (!fp_access_check(s)) {
6431         return;
6432     }
6433
6434     if (is_u_shift) {
6435         narrowfn = unsigned_narrow_fns[size];
6436     } else {
6437         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6438     }
6439
6440     tcg_rn = tcg_temp_new_i64();
6441     tcg_rd = tcg_temp_new_i64();
6442     tcg_rd_narrowed = tcg_temp_new_i32();
6443     tcg_final = tcg_const_i64(0);
6444
6445     if (round) {
6446         uint64_t round_const = 1ULL << (shift - 1);
6447         tcg_round = tcg_const_i64(round_const);
6448     } else {
6449         TCGV_UNUSED_I64(tcg_round);
6450     }
6451
6452     for (i = 0; i < elements; i++) {
6453         read_vec_element(s, tcg_rn, rn, i, ldop);
6454         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6455                                 false, is_u_shift, size+1, shift);
6456         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6457         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6458         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6459     }
6460
6461     if (!is_q) {
6462         clear_vec_high(s, rd);
6463         write_vec_element(s, tcg_final, rd, 0, MO_64);
6464     } else {
6465         write_vec_element(s, tcg_final, rd, 1, MO_64);
6466     }
6467
6468     if (round) {
6469         tcg_temp_free_i64(tcg_round);
6470     }
6471     tcg_temp_free_i64(tcg_rn);
6472     tcg_temp_free_i64(tcg_rd);
6473     tcg_temp_free_i32(tcg_rd_narrowed);
6474     tcg_temp_free_i64(tcg_final);
6475     return;
6476 }
6477
6478 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6479 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6480                              bool src_unsigned, bool dst_unsigned,
6481                              int immh, int immb, int rn, int rd)
6482 {
6483     int immhb = immh << 3 | immb;
6484     int size = 32 - clz32(immh) - 1;
6485     int shift = immhb - (8 << size);
6486     int pass;
6487
6488     assert(immh != 0);
6489     assert(!(scalar && is_q));
6490
6491     if (!scalar) {
6492         if (!is_q && extract32(immh, 3, 1)) {
6493             unallocated_encoding(s);
6494             return;
6495         }
6496
6497         /* Since we use the variable-shift helpers we must
6498          * replicate the shift count into each element of
6499          * the tcg_shift value.
6500          */
6501         switch (size) {
6502         case 0:
6503             shift |= shift << 8;
6504             /* fall through */
6505         case 1:
6506             shift |= shift << 16;
6507             break;
6508         case 2:
6509         case 3:
6510             break;
6511         default:
6512             g_assert_not_reached();
6513         }
6514     }
6515
6516     if (!fp_access_check(s)) {
6517         return;
6518     }
6519
6520     if (size == 3) {
6521         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6522         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6523             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6524             { NULL, gen_helper_neon_qshl_u64 },
6525         };
6526         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6527         int maxpass = is_q ? 2 : 1;
6528
6529         for (pass = 0; pass < maxpass; pass++) {
6530             TCGv_i64 tcg_op = tcg_temp_new_i64();
6531
6532             read_vec_element(s, tcg_op, rn, pass, MO_64);
6533             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6534             write_vec_element(s, tcg_op, rd, pass, MO_64);
6535
6536             tcg_temp_free_i64(tcg_op);
6537         }
6538         tcg_temp_free_i64(tcg_shift);
6539
6540         if (!is_q) {
6541             clear_vec_high(s, rd);
6542         }
6543     } else {
6544         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6545         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6546             {
6547                 { gen_helper_neon_qshl_s8,
6548                   gen_helper_neon_qshl_s16,
6549                   gen_helper_neon_qshl_s32 },
6550                 { gen_helper_neon_qshlu_s8,
6551                   gen_helper_neon_qshlu_s16,
6552                   gen_helper_neon_qshlu_s32 }
6553             }, {
6554                 { NULL, NULL, NULL },
6555                 { gen_helper_neon_qshl_u8,
6556                   gen_helper_neon_qshl_u16,
6557                   gen_helper_neon_qshl_u32 }
6558             }
6559         };
6560         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6561         TCGMemOp memop = scalar ? size : MO_32;
6562         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6563
6564         for (pass = 0; pass < maxpass; pass++) {
6565             TCGv_i32 tcg_op = tcg_temp_new_i32();
6566
6567             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6568             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6569             if (scalar) {
6570                 switch (size) {
6571                 case 0:
6572                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6573                     break;
6574                 case 1:
6575                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6576                     break;
6577                 case 2:
6578                     break;
6579                 default:
6580                     g_assert_not_reached();
6581                 }
6582                 write_fp_sreg(s, rd, tcg_op);
6583             } else {
6584                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6585             }
6586
6587             tcg_temp_free_i32(tcg_op);
6588         }
6589         tcg_temp_free_i32(tcg_shift);
6590
6591         if (!is_q && !scalar) {
6592             clear_vec_high(s, rd);
6593         }
6594     }
6595 }
6596
6597 /* Common vector code for handling integer to FP conversion */
6598 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6599                                    int elements, int is_signed,
6600                                    int fracbits, int size)
6601 {
6602     bool is_double = size == 3 ? true : false;
6603     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6604     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6605     TCGv_i64 tcg_int = tcg_temp_new_i64();
6606     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6607     int pass;
6608
6609     for (pass = 0; pass < elements; pass++) {
6610         read_vec_element(s, tcg_int, rn, pass, mop);
6611
6612         if (is_double) {
6613             TCGv_i64 tcg_double = tcg_temp_new_i64();
6614             if (is_signed) {
6615                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6616                                      tcg_shift, tcg_fpst);
6617             } else {
6618                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6619                                      tcg_shift, tcg_fpst);
6620             }
6621             if (elements == 1) {
6622                 write_fp_dreg(s, rd, tcg_double);
6623             } else {
6624                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6625             }
6626             tcg_temp_free_i64(tcg_double);
6627         } else {
6628             TCGv_i32 tcg_single = tcg_temp_new_i32();
6629             if (is_signed) {
6630                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6631                                      tcg_shift, tcg_fpst);
6632             } else {
6633                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6634                                      tcg_shift, tcg_fpst);
6635             }
6636             if (elements == 1) {
6637                 write_fp_sreg(s, rd, tcg_single);
6638             } else {
6639                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6640             }
6641             tcg_temp_free_i32(tcg_single);
6642         }
6643     }
6644
6645     if (!is_double && elements == 2) {
6646         clear_vec_high(s, rd);
6647     }
6648
6649     tcg_temp_free_i64(tcg_int);
6650     tcg_temp_free_ptr(tcg_fpst);
6651     tcg_temp_free_i32(tcg_shift);
6652 }
6653
6654 /* UCVTF/SCVTF - Integer to FP conversion */
6655 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6656                                          bool is_q, bool is_u,
6657                                          int immh, int immb, int opcode,
6658                                          int rn, int rd)
6659 {
6660     bool is_double = extract32(immh, 3, 1);
6661     int size = is_double ? MO_64 : MO_32;
6662     int elements;
6663     int immhb = immh << 3 | immb;
6664     int fracbits = (is_double ? 128 : 64) - immhb;
6665
6666     if (!extract32(immh, 2, 2)) {
6667         unallocated_encoding(s);
6668         return;
6669     }
6670
6671     if (is_scalar) {
6672         elements = 1;
6673     } else {
6674         elements = is_double ? 2 : is_q ? 4 : 2;
6675         if (is_double && !is_q) {
6676             unallocated_encoding(s);
6677             return;
6678         }
6679     }
6680
6681     if (!fp_access_check(s)) {
6682         return;
6683     }
6684
6685     /* immh == 0 would be a failure of the decode logic */
6686     g_assert(immh);
6687
6688     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6689 }
6690
6691 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6692 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6693                                          bool is_q, bool is_u,
6694                                          int immh, int immb, int rn, int rd)
6695 {
6696     bool is_double = extract32(immh, 3, 1);
6697     int immhb = immh << 3 | immb;
6698     int fracbits = (is_double ? 128 : 64) - immhb;
6699     int pass;
6700     TCGv_ptr tcg_fpstatus;
6701     TCGv_i32 tcg_rmode, tcg_shift;
6702
6703     if (!extract32(immh, 2, 2)) {
6704         unallocated_encoding(s);
6705         return;
6706     }
6707
6708     if (!is_scalar && !is_q && is_double) {
6709         unallocated_encoding(s);
6710         return;
6711     }
6712
6713     if (!fp_access_check(s)) {
6714         return;
6715     }
6716
6717     assert(!(is_scalar && is_q));
6718
6719     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6720     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6721     tcg_fpstatus = get_fpstatus_ptr();
6722     tcg_shift = tcg_const_i32(fracbits);
6723
6724     if (is_double) {
6725         int maxpass = is_scalar ? 1 : 2;
6726
6727         for (pass = 0; pass < maxpass; pass++) {
6728             TCGv_i64 tcg_op = tcg_temp_new_i64();
6729
6730             read_vec_element(s, tcg_op, rn, pass, MO_64);
6731             if (is_u) {
6732                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6733             } else {
6734                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6735             }
6736             write_vec_element(s, tcg_op, rd, pass, MO_64);
6737             tcg_temp_free_i64(tcg_op);
6738         }
6739         if (!is_q) {
6740             clear_vec_high(s, rd);
6741         }
6742     } else {
6743         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6744         for (pass = 0; pass < maxpass; pass++) {
6745             TCGv_i32 tcg_op = tcg_temp_new_i32();
6746
6747             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6748             if (is_u) {
6749                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6750             } else {
6751                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6752             }
6753             if (is_scalar) {
6754                 write_fp_sreg(s, rd, tcg_op);
6755             } else {
6756                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6757             }
6758             tcg_temp_free_i32(tcg_op);
6759         }
6760         if (!is_q && !is_scalar) {
6761             clear_vec_high(s, rd);
6762         }
6763     }
6764
6765     tcg_temp_free_ptr(tcg_fpstatus);
6766     tcg_temp_free_i32(tcg_shift);
6767     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6768     tcg_temp_free_i32(tcg_rmode);
6769 }
6770
6771 /* C3.6.9 AdvSIMD scalar shift by immediate
6772  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6773  * +-----+---+-------------+------+------+--------+---+------+------+
6774  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6775  * +-----+---+-------------+------+------+--------+---+------+------+
6776  *
6777  * This is the scalar version so it works on a fixed sized registers
6778  */
6779 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6780 {
6781     int rd = extract32(insn, 0, 5);
6782     int rn = extract32(insn, 5, 5);
6783     int opcode = extract32(insn, 11, 5);
6784     int immb = extract32(insn, 16, 3);
6785     int immh = extract32(insn, 19, 4);
6786     bool is_u = extract32(insn, 29, 1);
6787
6788     if (immh == 0) {
6789         unallocated_encoding(s);
6790         return;
6791     }
6792
6793     switch (opcode) {
6794     case 0x08: /* SRI */
6795         if (!is_u) {
6796             unallocated_encoding(s);
6797             return;
6798         }
6799         /* fall through */
6800     case 0x00: /* SSHR / USHR */
6801     case 0x02: /* SSRA / USRA */
6802     case 0x04: /* SRSHR / URSHR */
6803     case 0x06: /* SRSRA / URSRA */
6804         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6805         break;
6806     case 0x0a: /* SHL / SLI */
6807         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6808         break;
6809     case 0x1c: /* SCVTF, UCVTF */
6810         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6811                                      opcode, rn, rd);
6812         break;
6813     case 0x10: /* SQSHRUN, SQSHRUN2 */
6814     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6815         if (!is_u) {
6816             unallocated_encoding(s);
6817             return;
6818         }
6819         handle_vec_simd_sqshrn(s, true, false, false, true,
6820                                immh, immb, opcode, rn, rd);
6821         break;
6822     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6823     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6824         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6825                                immh, immb, opcode, rn, rd);
6826         break;
6827     case 0xc: /* SQSHLU */
6828         if (!is_u) {
6829             unallocated_encoding(s);
6830             return;
6831         }
6832         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6833         break;
6834     case 0xe: /* SQSHL, UQSHL */
6835         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6836         break;
6837     case 0x1f: /* FCVTZS, FCVTZU */
6838         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6839         break;
6840     default:
6841         unallocated_encoding(s);
6842         break;
6843     }
6844 }
6845
6846 /* C3.6.10 AdvSIMD scalar three different
6847  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6848  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6849  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6850  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6851  */
6852 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6853 {
6854     bool is_u = extract32(insn, 29, 1);
6855     int size = extract32(insn, 22, 2);
6856     int opcode = extract32(insn, 12, 4);
6857     int rm = extract32(insn, 16, 5);
6858     int rn = extract32(insn, 5, 5);
6859     int rd = extract32(insn, 0, 5);
6860
6861     if (is_u) {
6862         unallocated_encoding(s);
6863         return;
6864     }
6865
6866     switch (opcode) {
6867     case 0x9: /* SQDMLAL, SQDMLAL2 */
6868     case 0xb: /* SQDMLSL, SQDMLSL2 */
6869     case 0xd: /* SQDMULL, SQDMULL2 */
6870         if (size == 0 || size == 3) {
6871             unallocated_encoding(s);
6872             return;
6873         }
6874         break;
6875     default:
6876         unallocated_encoding(s);
6877         return;
6878     }
6879
6880     if (!fp_access_check(s)) {
6881         return;
6882     }
6883
6884     if (size == 2) {
6885         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6886         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6887         TCGv_i64 tcg_res = tcg_temp_new_i64();
6888
6889         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
6890         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
6891
6892         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
6893         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
6894
6895         switch (opcode) {
6896         case 0xd: /* SQDMULL, SQDMULL2 */
6897             break;
6898         case 0xb: /* SQDMLSL, SQDMLSL2 */
6899             tcg_gen_neg_i64(tcg_res, tcg_res);
6900             /* fall through */
6901         case 0x9: /* SQDMLAL, SQDMLAL2 */
6902             read_vec_element(s, tcg_op1, rd, 0, MO_64);
6903             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
6904                                               tcg_res, tcg_op1);
6905             break;
6906         default:
6907             g_assert_not_reached();
6908         }
6909
6910         write_fp_dreg(s, rd, tcg_res);
6911
6912         tcg_temp_free_i64(tcg_op1);
6913         tcg_temp_free_i64(tcg_op2);
6914         tcg_temp_free_i64(tcg_res);
6915     } else {
6916         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6917         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6918         TCGv_i64 tcg_res = tcg_temp_new_i64();
6919
6920         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
6921         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
6922
6923         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
6924         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
6925
6926         switch (opcode) {
6927         case 0xd: /* SQDMULL, SQDMULL2 */
6928             break;
6929         case 0xb: /* SQDMLSL, SQDMLSL2 */
6930             gen_helper_neon_negl_u32(tcg_res, tcg_res);
6931             /* fall through */
6932         case 0x9: /* SQDMLAL, SQDMLAL2 */
6933         {
6934             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
6935             read_vec_element(s, tcg_op3, rd, 0, MO_32);
6936             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
6937                                               tcg_res, tcg_op3);
6938             tcg_temp_free_i64(tcg_op3);
6939             break;
6940         }
6941         default:
6942             g_assert_not_reached();
6943         }
6944
6945         tcg_gen_ext32u_i64(tcg_res, tcg_res);
6946         write_fp_dreg(s, rd, tcg_res);
6947
6948         tcg_temp_free_i32(tcg_op1);
6949         tcg_temp_free_i32(tcg_op2);
6950         tcg_temp_free_i64(tcg_res);
6951     }
6952 }
6953
6954 static void handle_3same_64(DisasContext *s, int opcode, bool u,
6955                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
6956 {
6957     /* Handle 64x64->64 opcodes which are shared between the scalar
6958      * and vector 3-same groups. We cover every opcode where size == 3
6959      * is valid in either the three-reg-same (integer, not pairwise)
6960      * or scalar-three-reg-same groups. (Some opcodes are not yet
6961      * implemented.)
6962      */
6963     TCGCond cond;
6964
6965     switch (opcode) {
6966     case 0x1: /* SQADD */
6967         if (u) {
6968             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6969         } else {
6970             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6971         }
6972         break;
6973     case 0x5: /* SQSUB */
6974         if (u) {
6975             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6976         } else {
6977             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
6978         }
6979         break;
6980     case 0x6: /* CMGT, CMHI */
6981         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
6982          * We implement this using setcond (test) and then negating.
6983          */
6984         cond = u ? TCG_COND_GTU : TCG_COND_GT;
6985     do_cmop:
6986         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
6987         tcg_gen_neg_i64(tcg_rd, tcg_rd);
6988         break;
6989     case 0x7: /* CMGE, CMHS */
6990         cond = u ? TCG_COND_GEU : TCG_COND_GE;
6991         goto do_cmop;
6992     case 0x11: /* CMTST, CMEQ */
6993         if (u) {
6994             cond = TCG_COND_EQ;
6995             goto do_cmop;
6996         }
6997         /* CMTST : test is "if (X & Y != 0)". */
6998         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
6999         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7000         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7001         break;
7002     case 0x8: /* SSHL, USHL */
7003         if (u) {
7004             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7005         } else {
7006             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7007         }
7008         break;
7009     case 0x9: /* SQSHL, UQSHL */
7010         if (u) {
7011             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7012         } else {
7013             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7014         }
7015         break;
7016     case 0xa: /* SRSHL, URSHL */
7017         if (u) {
7018             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7019         } else {
7020             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7021         }
7022         break;
7023     case 0xb: /* SQRSHL, UQRSHL */
7024         if (u) {
7025             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7026         } else {
7027             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7028         }
7029         break;
7030     case 0x10: /* ADD, SUB */
7031         if (u) {
7032             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7033         } else {
7034             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7035         }
7036         break;
7037     default:
7038         g_assert_not_reached();
7039     }
7040 }
7041
7042 /* Handle the 3-same-operands float operations; shared by the scalar
7043  * and vector encodings. The caller must filter out any encodings
7044  * not allocated for the encoding it is dealing with.
7045  */
7046 static void handle_3same_float(DisasContext *s, int size, int elements,
7047                                int fpopcode, int rd, int rn, int rm)
7048 {
7049     int pass;
7050     TCGv_ptr fpst = get_fpstatus_ptr();
7051
7052     for (pass = 0; pass < elements; pass++) {
7053         if (size) {
7054             /* Double */
7055             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7056             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7057             TCGv_i64 tcg_res = tcg_temp_new_i64();
7058
7059             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7060             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7061
7062             switch (fpopcode) {
7063             case 0x39: /* FMLS */
7064                 /* As usual for ARM, separate negation for fused multiply-add */
7065                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7066                 /* fall through */
7067             case 0x19: /* FMLA */
7068                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7069                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7070                                        tcg_res, fpst);
7071                 break;
7072             case 0x18: /* FMAXNM */
7073                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7074                 break;
7075             case 0x1a: /* FADD */
7076                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7077                 break;
7078             case 0x1b: /* FMULX */
7079                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7080                 break;
7081             case 0x1c: /* FCMEQ */
7082                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7083                 break;
7084             case 0x1e: /* FMAX */
7085                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7086                 break;
7087             case 0x1f: /* FRECPS */
7088                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7089                 break;
7090             case 0x38: /* FMINNM */
7091                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7092                 break;
7093             case 0x3a: /* FSUB */
7094                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7095                 break;
7096             case 0x3e: /* FMIN */
7097                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7098                 break;
7099             case 0x3f: /* FRSQRTS */
7100                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7101                 break;
7102             case 0x5b: /* FMUL */
7103                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7104                 break;
7105             case 0x5c: /* FCMGE */
7106                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7107                 break;
7108             case 0x5d: /* FACGE */
7109                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7110                 break;
7111             case 0x5f: /* FDIV */
7112                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7113                 break;
7114             case 0x7a: /* FABD */
7115                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7116                 gen_helper_vfp_absd(tcg_res, tcg_res);
7117                 break;
7118             case 0x7c: /* FCMGT */
7119                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7120                 break;
7121             case 0x7d: /* FACGT */
7122                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7123                 break;
7124             default:
7125                 g_assert_not_reached();
7126             }
7127
7128             write_vec_element(s, tcg_res, rd, pass, MO_64);
7129
7130             tcg_temp_free_i64(tcg_res);
7131             tcg_temp_free_i64(tcg_op1);
7132             tcg_temp_free_i64(tcg_op2);
7133         } else {
7134             /* Single */
7135             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7136             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7137             TCGv_i32 tcg_res = tcg_temp_new_i32();
7138
7139             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7140             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7141
7142             switch (fpopcode) {
7143             case 0x39: /* FMLS */
7144                 /* As usual for ARM, separate negation for fused multiply-add */
7145                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7146                 /* fall through */
7147             case 0x19: /* FMLA */
7148                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7149                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7150                                        tcg_res, fpst);
7151                 break;
7152             case 0x1a: /* FADD */
7153                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7154                 break;
7155             case 0x1b: /* FMULX */
7156                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7157                 break;
7158             case 0x1c: /* FCMEQ */
7159                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7160                 break;
7161             case 0x1e: /* FMAX */
7162                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7163                 break;
7164             case 0x1f: /* FRECPS */
7165                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7166                 break;
7167             case 0x18: /* FMAXNM */
7168                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7169                 break;
7170             case 0x38: /* FMINNM */
7171                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7172                 break;
7173             case 0x3a: /* FSUB */
7174                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7175                 break;
7176             case 0x3e: /* FMIN */
7177                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7178                 break;
7179             case 0x3f: /* FRSQRTS */
7180                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7181                 break;
7182             case 0x5b: /* FMUL */
7183                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7184                 break;
7185             case 0x5c: /* FCMGE */
7186                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7187                 break;
7188             case 0x5d: /* FACGE */
7189                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7190                 break;
7191             case 0x5f: /* FDIV */
7192                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7193                 break;
7194             case 0x7a: /* FABD */
7195                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7196                 gen_helper_vfp_abss(tcg_res, tcg_res);
7197                 break;
7198             case 0x7c: /* FCMGT */
7199                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7200                 break;
7201             case 0x7d: /* FACGT */
7202                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7203                 break;
7204             default:
7205                 g_assert_not_reached();
7206             }
7207
7208             if (elements == 1) {
7209                 /* scalar single so clear high part */
7210                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7211
7212                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7213                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7214                 tcg_temp_free_i64(tcg_tmp);
7215             } else {
7216                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7217             }
7218
7219             tcg_temp_free_i32(tcg_res);
7220             tcg_temp_free_i32(tcg_op1);
7221             tcg_temp_free_i32(tcg_op2);
7222         }
7223     }
7224
7225     tcg_temp_free_ptr(fpst);
7226
7227     if ((elements << size) < 4) {
7228         /* scalar, or non-quad vector op */
7229         clear_vec_high(s, rd);
7230     }
7231 }
7232
7233 /* C3.6.11 AdvSIMD scalar three same
7234  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7235  * +-----+---+-----------+------+---+------+--------+---+------+------+
7236  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7237  * +-----+---+-----------+------+---+------+--------+---+------+------+
7238  */
7239 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7240 {
7241     int rd = extract32(insn, 0, 5);
7242     int rn = extract32(insn, 5, 5);
7243     int opcode = extract32(insn, 11, 5);
7244     int rm = extract32(insn, 16, 5);
7245     int size = extract32(insn, 22, 2);
7246     bool u = extract32(insn, 29, 1);
7247     TCGv_i64 tcg_rd;
7248
7249     if (opcode >= 0x18) {
7250         /* Floating point: U, size[1] and opcode indicate operation */
7251         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7252         switch (fpopcode) {
7253         case 0x1b: /* FMULX */
7254         case 0x1f: /* FRECPS */
7255         case 0x3f: /* FRSQRTS */
7256         case 0x5d: /* FACGE */
7257         case 0x7d: /* FACGT */
7258         case 0x1c: /* FCMEQ */
7259         case 0x5c: /* FCMGE */
7260         case 0x7c: /* FCMGT */
7261         case 0x7a: /* FABD */
7262             break;
7263         default:
7264             unallocated_encoding(s);
7265             return;
7266         }
7267
7268         if (!fp_access_check(s)) {
7269             return;
7270         }
7271
7272         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7273         return;
7274     }
7275
7276     switch (opcode) {
7277     case 0x1: /* SQADD, UQADD */
7278     case 0x5: /* SQSUB, UQSUB */
7279     case 0x9: /* SQSHL, UQSHL */
7280     case 0xb: /* SQRSHL, UQRSHL */
7281         break;
7282     case 0x8: /* SSHL, USHL */
7283     case 0xa: /* SRSHL, URSHL */
7284     case 0x6: /* CMGT, CMHI */
7285     case 0x7: /* CMGE, CMHS */
7286     case 0x11: /* CMTST, CMEQ */
7287     case 0x10: /* ADD, SUB (vector) */
7288         if (size != 3) {
7289             unallocated_encoding(s);
7290             return;
7291         }
7292         break;
7293     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7294         if (size != 1 && size != 2) {
7295             unallocated_encoding(s);
7296             return;
7297         }
7298         break;
7299     default:
7300         unallocated_encoding(s);
7301         return;
7302     }
7303
7304     if (!fp_access_check(s)) {
7305         return;
7306     }
7307
7308     tcg_rd = tcg_temp_new_i64();
7309
7310     if (size == 3) {
7311         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7312         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7313
7314         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7315         tcg_temp_free_i64(tcg_rn);
7316         tcg_temp_free_i64(tcg_rm);
7317     } else {
7318         /* Do a single operation on the lowest element in the vector.
7319          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7320          * no side effects for all these operations.
7321          * OPTME: special-purpose helpers would avoid doing some
7322          * unnecessary work in the helper for the 8 and 16 bit cases.
7323          */
7324         NeonGenTwoOpEnvFn *genenvfn;
7325         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7326         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7327         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7328
7329         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7330         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7331
7332         switch (opcode) {
7333         case 0x1: /* SQADD, UQADD */
7334         {
7335             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7336                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7337                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7338                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7339             };
7340             genenvfn = fns[size][u];
7341             break;
7342         }
7343         case 0x5: /* SQSUB, UQSUB */
7344         {
7345             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7346                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7347                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7348                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7349             };
7350             genenvfn = fns[size][u];
7351             break;
7352         }
7353         case 0x9: /* SQSHL, UQSHL */
7354         {
7355             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7356                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7357                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7358                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7359             };
7360             genenvfn = fns[size][u];
7361             break;
7362         }
7363         case 0xb: /* SQRSHL, UQRSHL */
7364         {
7365             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7366                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7367                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7368                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7369             };
7370             genenvfn = fns[size][u];
7371             break;
7372         }
7373         case 0x16: /* SQDMULH, SQRDMULH */
7374         {
7375             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7376                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7377                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7378             };
7379             assert(size == 1 || size == 2);
7380             genenvfn = fns[size - 1][u];
7381             break;
7382         }
7383         default:
7384             g_assert_not_reached();
7385         }
7386
7387         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7388         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7389         tcg_temp_free_i32(tcg_rd32);
7390         tcg_temp_free_i32(tcg_rn);
7391         tcg_temp_free_i32(tcg_rm);
7392     }
7393
7394     write_fp_dreg(s, rd, tcg_rd);
7395
7396     tcg_temp_free_i64(tcg_rd);
7397 }
7398
7399 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7400                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7401                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7402 {
7403     /* Handle 64->64 opcodes which are shared between the scalar and
7404      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7405      * is valid in either group and also the double-precision fp ops.
7406      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7407      * requires them.
7408      */
7409     TCGCond cond;
7410
7411     switch (opcode) {
7412     case 0x4: /* CLS, CLZ */
7413         if (u) {
7414             gen_helper_clz64(tcg_rd, tcg_rn);
7415         } else {
7416             gen_helper_cls64(tcg_rd, tcg_rn);
7417         }
7418         break;
7419     case 0x5: /* NOT */
7420         /* This opcode is shared with CNT and RBIT but we have earlier
7421          * enforced that size == 3 if and only if this is the NOT insn.
7422          */
7423         tcg_gen_not_i64(tcg_rd, tcg_rn);
7424         break;
7425     case 0x7: /* SQABS, SQNEG */
7426         if (u) {
7427             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7428         } else {
7429             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7430         }
7431         break;
7432     case 0xa: /* CMLT */
7433         /* 64 bit integer comparison against zero, result is
7434          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7435          * subtracting 1.
7436          */
7437         cond = TCG_COND_LT;
7438     do_cmop:
7439         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7440         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7441         break;
7442     case 0x8: /* CMGT, CMGE */
7443         cond = u ? TCG_COND_GE : TCG_COND_GT;
7444         goto do_cmop;
7445     case 0x9: /* CMEQ, CMLE */
7446         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7447         goto do_cmop;
7448     case 0xb: /* ABS, NEG */
7449         if (u) {
7450             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7451         } else {
7452             TCGv_i64 tcg_zero = tcg_const_i64(0);
7453             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7454             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7455                                 tcg_rn, tcg_rd);
7456             tcg_temp_free_i64(tcg_zero);
7457         }
7458         break;
7459     case 0x2f: /* FABS */
7460         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7461         break;
7462     case 0x6f: /* FNEG */
7463         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7464         break;
7465     case 0x7f: /* FSQRT */
7466         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7467         break;
7468     case 0x1a: /* FCVTNS */
7469     case 0x1b: /* FCVTMS */
7470     case 0x1c: /* FCVTAS */
7471     case 0x3a: /* FCVTPS */
7472     case 0x3b: /* FCVTZS */
7473     {
7474         TCGv_i32 tcg_shift = tcg_const_i32(0);
7475         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7476         tcg_temp_free_i32(tcg_shift);
7477         break;
7478     }
7479     case 0x5a: /* FCVTNU */
7480     case 0x5b: /* FCVTMU */
7481     case 0x5c: /* FCVTAU */
7482     case 0x7a: /* FCVTPU */
7483     case 0x7b: /* FCVTZU */
7484     {
7485         TCGv_i32 tcg_shift = tcg_const_i32(0);
7486         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7487         tcg_temp_free_i32(tcg_shift);
7488         break;
7489     }
7490     case 0x18: /* FRINTN */
7491     case 0x19: /* FRINTM */
7492     case 0x38: /* FRINTP */
7493     case 0x39: /* FRINTZ */
7494     case 0x58: /* FRINTA */
7495     case 0x79: /* FRINTI */
7496         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7497         break;
7498     case 0x59: /* FRINTX */
7499         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7500         break;
7501     default:
7502         g_assert_not_reached();
7503     }
7504 }
7505
7506 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7507                                    bool is_scalar, bool is_u, bool is_q,
7508                                    int size, int rn, int rd)
7509 {
7510     bool is_double = (size == 3);
7511     TCGv_ptr fpst;
7512
7513     if (!fp_access_check(s)) {
7514         return;
7515     }
7516
7517     fpst = get_fpstatus_ptr();
7518
7519     if (is_double) {
7520         TCGv_i64 tcg_op = tcg_temp_new_i64();
7521         TCGv_i64 tcg_zero = tcg_const_i64(0);
7522         TCGv_i64 tcg_res = tcg_temp_new_i64();
7523         NeonGenTwoDoubleOPFn *genfn;
7524         bool swap = false;
7525         int pass;
7526
7527         switch (opcode) {
7528         case 0x2e: /* FCMLT (zero) */
7529             swap = true;
7530             /* fallthrough */
7531         case 0x2c: /* FCMGT (zero) */
7532             genfn = gen_helper_neon_cgt_f64;
7533             break;
7534         case 0x2d: /* FCMEQ (zero) */
7535             genfn = gen_helper_neon_ceq_f64;
7536             break;
7537         case 0x6d: /* FCMLE (zero) */
7538             swap = true;
7539             /* fall through */
7540         case 0x6c: /* FCMGE (zero) */
7541             genfn = gen_helper_neon_cge_f64;
7542             break;
7543         default:
7544             g_assert_not_reached();
7545         }
7546
7547         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7548             read_vec_element(s, tcg_op, rn, pass, MO_64);
7549             if (swap) {
7550                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7551             } else {
7552                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7553             }
7554             write_vec_element(s, tcg_res, rd, pass, MO_64);
7555         }
7556         if (is_scalar) {
7557             clear_vec_high(s, rd);
7558         }
7559
7560         tcg_temp_free_i64(tcg_res);
7561         tcg_temp_free_i64(tcg_zero);
7562         tcg_temp_free_i64(tcg_op);
7563     } else {
7564         TCGv_i32 tcg_op = tcg_temp_new_i32();
7565         TCGv_i32 tcg_zero = tcg_const_i32(0);
7566         TCGv_i32 tcg_res = tcg_temp_new_i32();
7567         NeonGenTwoSingleOPFn *genfn;
7568         bool swap = false;
7569         int pass, maxpasses;
7570
7571         switch (opcode) {
7572         case 0x2e: /* FCMLT (zero) */
7573             swap = true;
7574             /* fall through */
7575         case 0x2c: /* FCMGT (zero) */
7576             genfn = gen_helper_neon_cgt_f32;
7577             break;
7578         case 0x2d: /* FCMEQ (zero) */
7579             genfn = gen_helper_neon_ceq_f32;
7580             break;
7581         case 0x6d: /* FCMLE (zero) */
7582             swap = true;
7583             /* fall through */
7584         case 0x6c: /* FCMGE (zero) */
7585             genfn = gen_helper_neon_cge_f32;
7586             break;
7587         default:
7588             g_assert_not_reached();
7589         }
7590
7591         if (is_scalar) {
7592             maxpasses = 1;
7593         } else {
7594             maxpasses = is_q ? 4 : 2;
7595         }
7596
7597         for (pass = 0; pass < maxpasses; pass++) {
7598             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7599             if (swap) {
7600                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7601             } else {
7602                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7603             }
7604             if (is_scalar) {
7605                 write_fp_sreg(s, rd, tcg_res);
7606             } else {
7607                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7608             }
7609         }
7610         tcg_temp_free_i32(tcg_res);
7611         tcg_temp_free_i32(tcg_zero);
7612         tcg_temp_free_i32(tcg_op);
7613         if (!is_q && !is_scalar) {
7614             clear_vec_high(s, rd);
7615         }
7616     }
7617
7618     tcg_temp_free_ptr(fpst);
7619 }
7620
7621 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7622                                     bool is_scalar, bool is_u, bool is_q,
7623                                     int size, int rn, int rd)
7624 {
7625     bool is_double = (size == 3);
7626     TCGv_ptr fpst = get_fpstatus_ptr();
7627
7628     if (is_double) {
7629         TCGv_i64 tcg_op = tcg_temp_new_i64();
7630         TCGv_i64 tcg_res = tcg_temp_new_i64();
7631         int pass;
7632
7633         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7634             read_vec_element(s, tcg_op, rn, pass, MO_64);
7635             switch (opcode) {
7636             case 0x3d: /* FRECPE */
7637                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7638                 break;
7639             case 0x3f: /* FRECPX */
7640                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7641                 break;
7642             case 0x7d: /* FRSQRTE */
7643                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7644                 break;
7645             default:
7646                 g_assert_not_reached();
7647             }
7648             write_vec_element(s, tcg_res, rd, pass, MO_64);
7649         }
7650         if (is_scalar) {
7651             clear_vec_high(s, rd);
7652         }
7653
7654         tcg_temp_free_i64(tcg_res);
7655         tcg_temp_free_i64(tcg_op);
7656     } else {
7657         TCGv_i32 tcg_op = tcg_temp_new_i32();
7658         TCGv_i32 tcg_res = tcg_temp_new_i32();
7659         int pass, maxpasses;
7660
7661         if (is_scalar) {
7662             maxpasses = 1;
7663         } else {
7664             maxpasses = is_q ? 4 : 2;
7665         }
7666
7667         for (pass = 0; pass < maxpasses; pass++) {
7668             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7669
7670             switch (opcode) {
7671             case 0x3c: /* URECPE */
7672                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7673                 break;
7674             case 0x3d: /* FRECPE */
7675                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7676                 break;
7677             case 0x3f: /* FRECPX */
7678                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7679                 break;
7680             case 0x7d: /* FRSQRTE */
7681                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7682                 break;
7683             default:
7684                 g_assert_not_reached();
7685             }
7686
7687             if (is_scalar) {
7688                 write_fp_sreg(s, rd, tcg_res);
7689             } else {
7690                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7691             }
7692         }
7693         tcg_temp_free_i32(tcg_res);
7694         tcg_temp_free_i32(tcg_op);
7695         if (!is_q && !is_scalar) {
7696             clear_vec_high(s, rd);
7697         }
7698     }
7699     tcg_temp_free_ptr(fpst);
7700 }
7701
7702 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7703                                 int opcode, bool u, bool is_q,
7704                                 int size, int rn, int rd)
7705 {
7706     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7707      * in the source becomes a size element in the destination).
7708      */
7709     int pass;
7710     TCGv_i32 tcg_res[2];
7711     int destelt = is_q ? 2 : 0;
7712     int passes = scalar ? 1 : 2;
7713
7714     if (scalar) {
7715         tcg_res[1] = tcg_const_i32(0);
7716     }
7717
7718     for (pass = 0; pass < passes; pass++) {
7719         TCGv_i64 tcg_op = tcg_temp_new_i64();
7720         NeonGenNarrowFn *genfn = NULL;
7721         NeonGenNarrowEnvFn *genenvfn = NULL;
7722
7723         if (scalar) {
7724             read_vec_element(s, tcg_op, rn, pass, size + 1);
7725         } else {
7726             read_vec_element(s, tcg_op, rn, pass, MO_64);
7727         }
7728         tcg_res[pass] = tcg_temp_new_i32();
7729
7730         switch (opcode) {
7731         case 0x12: /* XTN, SQXTUN */
7732         {
7733             static NeonGenNarrowFn * const xtnfns[3] = {
7734                 gen_helper_neon_narrow_u8,
7735                 gen_helper_neon_narrow_u16,
7736                 tcg_gen_extrl_i64_i32,
7737             };
7738             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7739                 gen_helper_neon_unarrow_sat8,
7740                 gen_helper_neon_unarrow_sat16,
7741                 gen_helper_neon_unarrow_sat32,
7742             };
7743             if (u) {
7744                 genenvfn = sqxtunfns[size];
7745             } else {
7746                 genfn = xtnfns[size];
7747             }
7748             break;
7749         }
7750         case 0x14: /* SQXTN, UQXTN */
7751         {
7752             static NeonGenNarrowEnvFn * const fns[3][2] = {
7753                 { gen_helper_neon_narrow_sat_s8,
7754                   gen_helper_neon_narrow_sat_u8 },
7755                 { gen_helper_neon_narrow_sat_s16,
7756                   gen_helper_neon_narrow_sat_u16 },
7757                 { gen_helper_neon_narrow_sat_s32,
7758                   gen_helper_neon_narrow_sat_u32 },
7759             };
7760             genenvfn = fns[size][u];
7761             break;
7762         }
7763         case 0x16: /* FCVTN, FCVTN2 */
7764             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7765             if (size == 2) {
7766                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7767             } else {
7768                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7769                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7770                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7771                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7772                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7773                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7774                 tcg_temp_free_i32(tcg_lo);
7775                 tcg_temp_free_i32(tcg_hi);
7776             }
7777             break;
7778         case 0x56:  /* FCVTXN, FCVTXN2 */
7779             /* 64 bit to 32 bit float conversion
7780              * with von Neumann rounding (round to odd)
7781              */
7782             assert(size == 2);
7783             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7784             break;
7785         default:
7786             g_assert_not_reached();
7787         }
7788
7789         if (genfn) {
7790             genfn(tcg_res[pass], tcg_op);
7791         } else if (genenvfn) {
7792             genenvfn(tcg_res[pass], cpu_env, tcg_op);
7793         }
7794
7795         tcg_temp_free_i64(tcg_op);
7796     }
7797
7798     for (pass = 0; pass < 2; pass++) {
7799         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7800         tcg_temp_free_i32(tcg_res[pass]);
7801     }
7802     if (!is_q) {
7803         clear_vec_high(s, rd);
7804     }
7805 }
7806
7807 /* Remaining saturating accumulating ops */
7808 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7809                                 bool is_q, int size, int rn, int rd)
7810 {
7811     bool is_double = (size == 3);
7812
7813     if (is_double) {
7814         TCGv_i64 tcg_rn = tcg_temp_new_i64();
7815         TCGv_i64 tcg_rd = tcg_temp_new_i64();
7816         int pass;
7817
7818         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7819             read_vec_element(s, tcg_rn, rn, pass, MO_64);
7820             read_vec_element(s, tcg_rd, rd, pass, MO_64);
7821
7822             if (is_u) { /* USQADD */
7823                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7824             } else { /* SUQADD */
7825                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7826             }
7827             write_vec_element(s, tcg_rd, rd, pass, MO_64);
7828         }
7829         if (is_scalar) {
7830             clear_vec_high(s, rd);
7831         }
7832
7833         tcg_temp_free_i64(tcg_rd);
7834         tcg_temp_free_i64(tcg_rn);
7835     } else {
7836         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7837         TCGv_i32 tcg_rd = tcg_temp_new_i32();
7838         int pass, maxpasses;
7839
7840         if (is_scalar) {
7841             maxpasses = 1;
7842         } else {
7843             maxpasses = is_q ? 4 : 2;
7844         }
7845
7846         for (pass = 0; pass < maxpasses; pass++) {
7847             if (is_scalar) {
7848                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
7849                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
7850             } else {
7851                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7852                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7853             }
7854
7855             if (is_u) { /* USQADD */
7856                 switch (size) {
7857                 case 0:
7858                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7859                     break;
7860                 case 1:
7861                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7862                     break;
7863                 case 2:
7864                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7865                     break;
7866                 default:
7867                     g_assert_not_reached();
7868                 }
7869             } else { /* SUQADD */
7870                 switch (size) {
7871                 case 0:
7872                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7873                     break;
7874                 case 1:
7875                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7876                     break;
7877                 case 2:
7878                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7879                     break;
7880                 default:
7881                     g_assert_not_reached();
7882                 }
7883             }
7884
7885             if (is_scalar) {
7886                 TCGv_i64 tcg_zero = tcg_const_i64(0);
7887                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
7888                 tcg_temp_free_i64(tcg_zero);
7889             }
7890             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7891         }
7892
7893         if (!is_q) {
7894             clear_vec_high(s, rd);
7895         }
7896
7897         tcg_temp_free_i32(tcg_rd);
7898         tcg_temp_free_i32(tcg_rn);
7899     }
7900 }
7901
7902 /* C3.6.12 AdvSIMD scalar two reg misc
7903  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
7904  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7905  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
7906  * +-----+---+-----------+------+-----------+--------+-----+------+------+
7907  */
7908 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
7909 {
7910     int rd = extract32(insn, 0, 5);
7911     int rn = extract32(insn, 5, 5);
7912     int opcode = extract32(insn, 12, 5);
7913     int size = extract32(insn, 22, 2);
7914     bool u = extract32(insn, 29, 1);
7915     bool is_fcvt = false;
7916     int rmode;
7917     TCGv_i32 tcg_rmode;
7918     TCGv_ptr tcg_fpstatus;
7919
7920     switch (opcode) {
7921     case 0x3: /* USQADD / SUQADD*/
7922         if (!fp_access_check(s)) {
7923             return;
7924         }
7925         handle_2misc_satacc(s, true, u, false, size, rn, rd);
7926         return;
7927     case 0x7: /* SQABS / SQNEG */
7928         break;
7929     case 0xa: /* CMLT */
7930         if (u) {
7931             unallocated_encoding(s);
7932             return;
7933         }
7934         /* fall through */
7935     case 0x8: /* CMGT, CMGE */
7936     case 0x9: /* CMEQ, CMLE */
7937     case 0xb: /* ABS, NEG */
7938         if (size != 3) {
7939             unallocated_encoding(s);
7940             return;
7941         }
7942         break;
7943     case 0x12: /* SQXTUN */
7944         if (!u) {
7945             unallocated_encoding(s);
7946             return;
7947         }
7948         /* fall through */
7949     case 0x14: /* SQXTN, UQXTN */
7950         if (size == 3) {
7951             unallocated_encoding(s);
7952             return;
7953         }
7954         if (!fp_access_check(s)) {
7955             return;
7956         }
7957         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
7958         return;
7959     case 0xc ... 0xf:
7960     case 0x16 ... 0x1d:
7961     case 0x1f:
7962         /* Floating point: U, size[1] and opcode indicate operation;
7963          * size[0] indicates single or double precision.
7964          */
7965         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
7966         size = extract32(size, 0, 1) ? 3 : 2;
7967         switch (opcode) {
7968         case 0x2c: /* FCMGT (zero) */
7969         case 0x2d: /* FCMEQ (zero) */
7970         case 0x2e: /* FCMLT (zero) */
7971         case 0x6c: /* FCMGE (zero) */
7972         case 0x6d: /* FCMLE (zero) */
7973             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
7974             return;
7975         case 0x1d: /* SCVTF */
7976         case 0x5d: /* UCVTF */
7977         {
7978             bool is_signed = (opcode == 0x1d);
7979             if (!fp_access_check(s)) {
7980                 return;
7981             }
7982             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
7983             return;
7984         }
7985         case 0x3d: /* FRECPE */
7986         case 0x3f: /* FRECPX */
7987         case 0x7d: /* FRSQRTE */
7988             if (!fp_access_check(s)) {
7989                 return;
7990             }
7991             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
7992             return;
7993         case 0x1a: /* FCVTNS */
7994         case 0x1b: /* FCVTMS */
7995         case 0x3a: /* FCVTPS */
7996         case 0x3b: /* FCVTZS */
7997         case 0x5a: /* FCVTNU */
7998         case 0x5b: /* FCVTMU */
7999         case 0x7a: /* FCVTPU */
8000         case 0x7b: /* FCVTZU */
8001             is_fcvt = true;
8002             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8003             break;
8004         case 0x1c: /* FCVTAS */
8005         case 0x5c: /* FCVTAU */
8006             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8007             is_fcvt = true;
8008             rmode = FPROUNDING_TIEAWAY;
8009             break;
8010         case 0x56: /* FCVTXN, FCVTXN2 */
8011             if (size == 2) {
8012                 unallocated_encoding(s);
8013                 return;
8014             }
8015             if (!fp_access_check(s)) {
8016                 return;
8017             }
8018             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8019             return;
8020         default:
8021             unallocated_encoding(s);
8022             return;
8023         }
8024         break;
8025     default:
8026         unallocated_encoding(s);
8027         return;
8028     }
8029
8030     if (!fp_access_check(s)) {
8031         return;
8032     }
8033
8034     if (is_fcvt) {
8035         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8036         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8037         tcg_fpstatus = get_fpstatus_ptr();
8038     } else {
8039         TCGV_UNUSED_I32(tcg_rmode);
8040         TCGV_UNUSED_PTR(tcg_fpstatus);
8041     }
8042
8043     if (size == 3) {
8044         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8045         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8046
8047         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8048         write_fp_dreg(s, rd, tcg_rd);
8049         tcg_temp_free_i64(tcg_rd);
8050         tcg_temp_free_i64(tcg_rn);
8051     } else {
8052         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8053         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8054
8055         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8056
8057         switch (opcode) {
8058         case 0x7: /* SQABS, SQNEG */
8059         {
8060             NeonGenOneOpEnvFn *genfn;
8061             static NeonGenOneOpEnvFn * const fns[3][2] = {
8062                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8063                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8064                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8065             };
8066             genfn = fns[size][u];
8067             genfn(tcg_rd, cpu_env, tcg_rn);
8068             break;
8069         }
8070         case 0x1a: /* FCVTNS */
8071         case 0x1b: /* FCVTMS */
8072         case 0x1c: /* FCVTAS */
8073         case 0x3a: /* FCVTPS */
8074         case 0x3b: /* FCVTZS */
8075         {
8076             TCGv_i32 tcg_shift = tcg_const_i32(0);
8077             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8078             tcg_temp_free_i32(tcg_shift);
8079             break;
8080         }
8081         case 0x5a: /* FCVTNU */
8082         case 0x5b: /* FCVTMU */
8083         case 0x5c: /* FCVTAU */
8084         case 0x7a: /* FCVTPU */
8085         case 0x7b: /* FCVTZU */
8086         {
8087             TCGv_i32 tcg_shift = tcg_const_i32(0);
8088             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8089             tcg_temp_free_i32(tcg_shift);
8090             break;
8091         }
8092         default:
8093             g_assert_not_reached();
8094         }
8095
8096         write_fp_sreg(s, rd, tcg_rd);
8097         tcg_temp_free_i32(tcg_rd);
8098         tcg_temp_free_i32(tcg_rn);
8099     }
8100
8101     if (is_fcvt) {
8102         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8103         tcg_temp_free_i32(tcg_rmode);
8104         tcg_temp_free_ptr(tcg_fpstatus);
8105     }
8106 }
8107
8108 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8109 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8110                                  int immh, int immb, int opcode, int rn, int rd)
8111 {
8112     int size = 32 - clz32(immh) - 1;
8113     int immhb = immh << 3 | immb;
8114     int shift = 2 * (8 << size) - immhb;
8115     bool accumulate = false;
8116     bool round = false;
8117     bool insert = false;
8118     int dsize = is_q ? 128 : 64;
8119     int esize = 8 << size;
8120     int elements = dsize/esize;
8121     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8122     TCGv_i64 tcg_rn = new_tmp_a64(s);
8123     TCGv_i64 tcg_rd = new_tmp_a64(s);
8124     TCGv_i64 tcg_round;
8125     int i;
8126
8127     if (extract32(immh, 3, 1) && !is_q) {
8128         unallocated_encoding(s);
8129         return;
8130     }
8131
8132     if (size > 3 && !is_q) {
8133         unallocated_encoding(s);
8134         return;
8135     }
8136
8137     if (!fp_access_check(s)) {
8138         return;
8139     }
8140
8141     switch (opcode) {
8142     case 0x02: /* SSRA / USRA (accumulate) */
8143         accumulate = true;
8144         break;
8145     case 0x04: /* SRSHR / URSHR (rounding) */
8146         round = true;
8147         break;
8148     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8149         accumulate = round = true;
8150         break;
8151     case 0x08: /* SRI */
8152         insert = true;
8153         break;
8154     }
8155
8156     if (round) {
8157         uint64_t round_const = 1ULL << (shift - 1);
8158         tcg_round = tcg_const_i64(round_const);
8159     } else {
8160         TCGV_UNUSED_I64(tcg_round);
8161     }
8162
8163     for (i = 0; i < elements; i++) {
8164         read_vec_element(s, tcg_rn, rn, i, memop);
8165         if (accumulate || insert) {
8166             read_vec_element(s, tcg_rd, rd, i, memop);
8167         }
8168
8169         if (insert) {
8170             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8171         } else {
8172             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8173                                     accumulate, is_u, size, shift);
8174         }
8175
8176         write_vec_element(s, tcg_rd, rd, i, size);
8177     }
8178
8179     if (!is_q) {
8180         clear_vec_high(s, rd);
8181     }
8182
8183     if (round) {
8184         tcg_temp_free_i64(tcg_round);
8185     }
8186 }
8187
8188 /* SHL/SLI - Vector shift left */
8189 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8190                                 int immh, int immb, int opcode, int rn, int rd)
8191 {
8192     int size = 32 - clz32(immh) - 1;
8193     int immhb = immh << 3 | immb;
8194     int shift = immhb - (8 << size);
8195     int dsize = is_q ? 128 : 64;
8196     int esize = 8 << size;
8197     int elements = dsize/esize;
8198     TCGv_i64 tcg_rn = new_tmp_a64(s);
8199     TCGv_i64 tcg_rd = new_tmp_a64(s);
8200     int i;
8201
8202     if (extract32(immh, 3, 1) && !is_q) {
8203         unallocated_encoding(s);
8204         return;
8205     }
8206
8207     if (size > 3 && !is_q) {
8208         unallocated_encoding(s);
8209         return;
8210     }
8211
8212     if (!fp_access_check(s)) {
8213         return;
8214     }
8215
8216     for (i = 0; i < elements; i++) {
8217         read_vec_element(s, tcg_rn, rn, i, size);
8218         if (insert) {
8219             read_vec_element(s, tcg_rd, rd, i, size);
8220         }
8221
8222         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8223
8224         write_vec_element(s, tcg_rd, rd, i, size);
8225     }
8226
8227     if (!is_q) {
8228         clear_vec_high(s, rd);
8229     }
8230 }
8231
8232 /* USHLL/SHLL - Vector shift left with widening */
8233 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8234                                  int immh, int immb, int opcode, int rn, int rd)
8235 {
8236     int size = 32 - clz32(immh) - 1;
8237     int immhb = immh << 3 | immb;
8238     int shift = immhb - (8 << size);
8239     int dsize = 64;
8240     int esize = 8 << size;
8241     int elements = dsize/esize;
8242     TCGv_i64 tcg_rn = new_tmp_a64(s);
8243     TCGv_i64 tcg_rd = new_tmp_a64(s);
8244     int i;
8245
8246     if (size >= 3) {
8247         unallocated_encoding(s);
8248         return;
8249     }
8250
8251     if (!fp_access_check(s)) {
8252         return;
8253     }
8254
8255     /* For the LL variants the store is larger than the load,
8256      * so if rd == rn we would overwrite parts of our input.
8257      * So load everything right now and use shifts in the main loop.
8258      */
8259     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8260
8261     for (i = 0; i < elements; i++) {
8262         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8263         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8264         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8265         write_vec_element(s, tcg_rd, rd, i, size + 1);
8266     }
8267 }
8268
8269 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8270 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8271                                  int immh, int immb, int opcode, int rn, int rd)
8272 {
8273     int immhb = immh << 3 | immb;
8274     int size = 32 - clz32(immh) - 1;
8275     int dsize = 64;
8276     int esize = 8 << size;
8277     int elements = dsize/esize;
8278     int shift = (2 * esize) - immhb;
8279     bool round = extract32(opcode, 0, 1);
8280     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8281     TCGv_i64 tcg_round;
8282     int i;
8283
8284     if (extract32(immh, 3, 1)) {
8285         unallocated_encoding(s);
8286         return;
8287     }
8288
8289     if (!fp_access_check(s)) {
8290         return;
8291     }
8292
8293     tcg_rn = tcg_temp_new_i64();
8294     tcg_rd = tcg_temp_new_i64();
8295     tcg_final = tcg_temp_new_i64();
8296     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8297
8298     if (round) {
8299         uint64_t round_const = 1ULL << (shift - 1);
8300         tcg_round = tcg_const_i64(round_const);
8301     } else {
8302         TCGV_UNUSED_I64(tcg_round);
8303     }
8304
8305     for (i = 0; i < elements; i++) {
8306         read_vec_element(s, tcg_rn, rn, i, size+1);
8307         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8308                                 false, true, size+1, shift);
8309
8310         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8311     }
8312
8313     if (!is_q) {
8314         clear_vec_high(s, rd);
8315         write_vec_element(s, tcg_final, rd, 0, MO_64);
8316     } else {
8317         write_vec_element(s, tcg_final, rd, 1, MO_64);
8318     }
8319
8320     if (round) {
8321         tcg_temp_free_i64(tcg_round);
8322     }
8323     tcg_temp_free_i64(tcg_rn);
8324     tcg_temp_free_i64(tcg_rd);
8325     tcg_temp_free_i64(tcg_final);
8326     return;
8327 }
8328
8329
8330 /* C3.6.14 AdvSIMD shift by immediate
8331  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8332  * +---+---+---+-------------+------+------+--------+---+------+------+
8333  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8334  * +---+---+---+-------------+------+------+--------+---+------+------+
8335  */
8336 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8337 {
8338     int rd = extract32(insn, 0, 5);
8339     int rn = extract32(insn, 5, 5);
8340     int opcode = extract32(insn, 11, 5);
8341     int immb = extract32(insn, 16, 3);
8342     int immh = extract32(insn, 19, 4);
8343     bool is_u = extract32(insn, 29, 1);
8344     bool is_q = extract32(insn, 30, 1);
8345
8346     switch (opcode) {
8347     case 0x08: /* SRI */
8348         if (!is_u) {
8349             unallocated_encoding(s);
8350             return;
8351         }
8352         /* fall through */
8353     case 0x00: /* SSHR / USHR */
8354     case 0x02: /* SSRA / USRA (accumulate) */
8355     case 0x04: /* SRSHR / URSHR (rounding) */
8356     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8357         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8358         break;
8359     case 0x0a: /* SHL / SLI */
8360         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8361         break;
8362     case 0x10: /* SHRN */
8363     case 0x11: /* RSHRN / SQRSHRUN */
8364         if (is_u) {
8365             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8366                                    opcode, rn, rd);
8367         } else {
8368             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8369         }
8370         break;
8371     case 0x12: /* SQSHRN / UQSHRN */
8372     case 0x13: /* SQRSHRN / UQRSHRN */
8373         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8374                                opcode, rn, rd);
8375         break;
8376     case 0x14: /* SSHLL / USHLL */
8377         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8378         break;
8379     case 0x1c: /* SCVTF / UCVTF */
8380         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8381                                      opcode, rn, rd);
8382         break;
8383     case 0xc: /* SQSHLU */
8384         if (!is_u) {
8385             unallocated_encoding(s);
8386             return;
8387         }
8388         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8389         break;
8390     case 0xe: /* SQSHL, UQSHL */
8391         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8392         break;
8393     case 0x1f: /* FCVTZS/ FCVTZU */
8394         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8395         return;
8396     default:
8397         unallocated_encoding(s);
8398         return;
8399     }
8400 }
8401
8402 /* Generate code to do a "long" addition or subtraction, ie one done in
8403  * TCGv_i64 on vector lanes twice the width specified by size.
8404  */
8405 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8406                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8407 {
8408     static NeonGenTwo64OpFn * const fns[3][2] = {
8409         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8410         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8411         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8412     };
8413     NeonGenTwo64OpFn *genfn;
8414     assert(size < 3);
8415
8416     genfn = fns[size][is_sub];
8417     genfn(tcg_res, tcg_op1, tcg_op2);
8418 }
8419
8420 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8421                                 int opcode, int rd, int rn, int rm)
8422 {
8423     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8424     TCGv_i64 tcg_res[2];
8425     int pass, accop;
8426
8427     tcg_res[0] = tcg_temp_new_i64();
8428     tcg_res[1] = tcg_temp_new_i64();
8429
8430     /* Does this op do an adding accumulate, a subtracting accumulate,
8431      * or no accumulate at all?
8432      */
8433     switch (opcode) {
8434     case 5:
8435     case 8:
8436     case 9:
8437         accop = 1;
8438         break;
8439     case 10:
8440     case 11:
8441         accop = -1;
8442         break;
8443     default:
8444         accop = 0;
8445         break;
8446     }
8447
8448     if (accop != 0) {
8449         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8450         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8451     }
8452
8453     /* size == 2 means two 32x32->64 operations; this is worth special
8454      * casing because we can generally handle it inline.
8455      */
8456     if (size == 2) {
8457         for (pass = 0; pass < 2; pass++) {
8458             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8459             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8460             TCGv_i64 tcg_passres;
8461             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8462
8463             int elt = pass + is_q * 2;
8464
8465             read_vec_element(s, tcg_op1, rn, elt, memop);
8466             read_vec_element(s, tcg_op2, rm, elt, memop);
8467
8468             if (accop == 0) {
8469                 tcg_passres = tcg_res[pass];
8470             } else {
8471                 tcg_passres = tcg_temp_new_i64();
8472             }
8473
8474             switch (opcode) {
8475             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8476                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8477                 break;
8478             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8479                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8480                 break;
8481             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8482             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8483             {
8484                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8485                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8486
8487                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8488                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8489                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8490                                     tcg_passres,
8491                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8492                 tcg_temp_free_i64(tcg_tmp1);
8493                 tcg_temp_free_i64(tcg_tmp2);
8494                 break;
8495             }
8496             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8497             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8498             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8499                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8500                 break;
8501             case 9: /* SQDMLAL, SQDMLAL2 */
8502             case 11: /* SQDMLSL, SQDMLSL2 */
8503             case 13: /* SQDMULL, SQDMULL2 */
8504                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8505                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8506                                                   tcg_passres, tcg_passres);
8507                 break;
8508             default:
8509                 g_assert_not_reached();
8510             }
8511
8512             if (opcode == 9 || opcode == 11) {
8513                 /* saturating accumulate ops */
8514                 if (accop < 0) {
8515                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8516                 }
8517                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8518                                                   tcg_res[pass], tcg_passres);
8519             } else if (accop > 0) {
8520                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8521             } else if (accop < 0) {
8522                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8523             }
8524
8525             if (accop != 0) {
8526                 tcg_temp_free_i64(tcg_passres);
8527             }
8528
8529             tcg_temp_free_i64(tcg_op1);
8530             tcg_temp_free_i64(tcg_op2);
8531         }
8532     } else {
8533         /* size 0 or 1, generally helper functions */
8534         for (pass = 0; pass < 2; pass++) {
8535             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8536             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8537             TCGv_i64 tcg_passres;
8538             int elt = pass + is_q * 2;
8539
8540             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8541             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8542
8543             if (accop == 0) {
8544                 tcg_passres = tcg_res[pass];
8545             } else {
8546                 tcg_passres = tcg_temp_new_i64();
8547             }
8548
8549             switch (opcode) {
8550             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8551             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8552             {
8553                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8554                 static NeonGenWidenFn * const widenfns[2][2] = {
8555                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8556                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8557                 };
8558                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8559
8560                 widenfn(tcg_op2_64, tcg_op2);
8561                 widenfn(tcg_passres, tcg_op1);
8562                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8563                               tcg_passres, tcg_op2_64);
8564                 tcg_temp_free_i64(tcg_op2_64);
8565                 break;
8566             }
8567             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8568             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8569                 if (size == 0) {
8570                     if (is_u) {
8571                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8572                     } else {
8573                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8574                     }
8575                 } else {
8576                     if (is_u) {
8577                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8578                     } else {
8579                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8580                     }
8581                 }
8582                 break;
8583             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8584             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8585             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8586                 if (size == 0) {
8587                     if (is_u) {
8588                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8589                     } else {
8590                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8591                     }
8592                 } else {
8593                     if (is_u) {
8594                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8595                     } else {
8596                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8597                     }
8598                 }
8599                 break;
8600             case 9: /* SQDMLAL, SQDMLAL2 */
8601             case 11: /* SQDMLSL, SQDMLSL2 */
8602             case 13: /* SQDMULL, SQDMULL2 */
8603                 assert(size == 1);
8604                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8605                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8606                                                   tcg_passres, tcg_passres);
8607                 break;
8608             case 14: /* PMULL */
8609                 assert(size == 0);
8610                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8611                 break;
8612             default:
8613                 g_assert_not_reached();
8614             }
8615             tcg_temp_free_i32(tcg_op1);
8616             tcg_temp_free_i32(tcg_op2);
8617
8618             if (accop != 0) {
8619                 if (opcode == 9 || opcode == 11) {
8620                     /* saturating accumulate ops */
8621                     if (accop < 0) {
8622                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8623                     }
8624                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8625                                                       tcg_res[pass],
8626                                                       tcg_passres);
8627                 } else {
8628                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8629                                   tcg_res[pass], tcg_passres);
8630                 }
8631                 tcg_temp_free_i64(tcg_passres);
8632             }
8633         }
8634     }
8635
8636     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8637     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8638     tcg_temp_free_i64(tcg_res[0]);
8639     tcg_temp_free_i64(tcg_res[1]);
8640 }
8641
8642 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8643                             int opcode, int rd, int rn, int rm)
8644 {
8645     TCGv_i64 tcg_res[2];
8646     int part = is_q ? 2 : 0;
8647     int pass;
8648
8649     for (pass = 0; pass < 2; pass++) {
8650         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8651         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8652         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8653         static NeonGenWidenFn * const widenfns[3][2] = {
8654             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8655             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8656             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8657         };
8658         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8659
8660         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8661         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8662         widenfn(tcg_op2_wide, tcg_op2);
8663         tcg_temp_free_i32(tcg_op2);
8664         tcg_res[pass] = tcg_temp_new_i64();
8665         gen_neon_addl(size, (opcode == 3),
8666                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8667         tcg_temp_free_i64(tcg_op1);
8668         tcg_temp_free_i64(tcg_op2_wide);
8669     }
8670
8671     for (pass = 0; pass < 2; pass++) {
8672         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8673         tcg_temp_free_i64(tcg_res[pass]);
8674     }
8675 }
8676
8677 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8678 {
8679     tcg_gen_addi_i64(in, in, 1U << 31);
8680     tcg_gen_extrh_i64_i32(res, in);
8681 }
8682
8683 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8684                                  int opcode, int rd, int rn, int rm)
8685 {
8686     TCGv_i32 tcg_res[2];
8687     int part = is_q ? 2 : 0;
8688     int pass;
8689
8690     for (pass = 0; pass < 2; pass++) {
8691         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8692         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8693         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8694         static NeonGenNarrowFn * const narrowfns[3][2] = {
8695             { gen_helper_neon_narrow_high_u8,
8696               gen_helper_neon_narrow_round_high_u8 },
8697             { gen_helper_neon_narrow_high_u16,
8698               gen_helper_neon_narrow_round_high_u16 },
8699             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8700         };
8701         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8702
8703         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8704         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8705
8706         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8707
8708         tcg_temp_free_i64(tcg_op1);
8709         tcg_temp_free_i64(tcg_op2);
8710
8711         tcg_res[pass] = tcg_temp_new_i32();
8712         gennarrow(tcg_res[pass], tcg_wideres);
8713         tcg_temp_free_i64(tcg_wideres);
8714     }
8715
8716     for (pass = 0; pass < 2; pass++) {
8717         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8718         tcg_temp_free_i32(tcg_res[pass]);
8719     }
8720     if (!is_q) {
8721         clear_vec_high(s, rd);
8722     }
8723 }
8724
8725 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8726 {
8727     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8728      * is the only three-reg-diff instruction which produces a
8729      * 128-bit wide result from a single operation. However since
8730      * it's possible to calculate the two halves more or less
8731      * separately we just use two helper calls.
8732      */
8733     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8734     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8735     TCGv_i64 tcg_res = tcg_temp_new_i64();
8736
8737     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8738     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8739     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8740     write_vec_element(s, tcg_res, rd, 0, MO_64);
8741     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8742     write_vec_element(s, tcg_res, rd, 1, MO_64);
8743
8744     tcg_temp_free_i64(tcg_op1);
8745     tcg_temp_free_i64(tcg_op2);
8746     tcg_temp_free_i64(tcg_res);
8747 }
8748
8749 /* C3.6.15 AdvSIMD three different
8750  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8751  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8752  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8753  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8754  */
8755 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8756 {
8757     /* Instructions in this group fall into three basic classes
8758      * (in each case with the operation working on each element in
8759      * the input vectors):
8760      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8761      *     128 bit input)
8762      * (2) wide 64 x 128 -> 128
8763      * (3) narrowing 128 x 128 -> 64
8764      * Here we do initial decode, catch unallocated cases and
8765      * dispatch to separate functions for each class.
8766      */
8767     int is_q = extract32(insn, 30, 1);
8768     int is_u = extract32(insn, 29, 1);
8769     int size = extract32(insn, 22, 2);
8770     int opcode = extract32(insn, 12, 4);
8771     int rm = extract32(insn, 16, 5);
8772     int rn = extract32(insn, 5, 5);
8773     int rd = extract32(insn, 0, 5);
8774
8775     switch (opcode) {
8776     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8777     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8778         /* 64 x 128 -> 128 */
8779         if (size == 3) {
8780             unallocated_encoding(s);
8781             return;
8782         }
8783         if (!fp_access_check(s)) {
8784             return;
8785         }
8786         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8787         break;
8788     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8789     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8790         /* 128 x 128 -> 64 */
8791         if (size == 3) {
8792             unallocated_encoding(s);
8793             return;
8794         }
8795         if (!fp_access_check(s)) {
8796             return;
8797         }
8798         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8799         break;
8800     case 14: /* PMULL, PMULL2 */
8801         if (is_u || size == 1 || size == 2) {
8802             unallocated_encoding(s);
8803             return;
8804         }
8805         if (size == 3) {
8806             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8807                 unallocated_encoding(s);
8808                 return;
8809             }
8810             if (!fp_access_check(s)) {
8811                 return;
8812             }
8813             handle_pmull_64(s, is_q, rd, rn, rm);
8814             return;
8815         }
8816         goto is_widening;
8817     case 9: /* SQDMLAL, SQDMLAL2 */
8818     case 11: /* SQDMLSL, SQDMLSL2 */
8819     case 13: /* SQDMULL, SQDMULL2 */
8820         if (is_u || size == 0) {
8821             unallocated_encoding(s);
8822             return;
8823         }
8824         /* fall through */
8825     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8826     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8827     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8828     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8829     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8830     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8831     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8832         /* 64 x 64 -> 128 */
8833         if (size == 3) {
8834             unallocated_encoding(s);
8835             return;
8836         }
8837     is_widening:
8838         if (!fp_access_check(s)) {
8839             return;
8840         }
8841
8842         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8843         break;
8844     default:
8845         /* opcode 15 not allocated */
8846         unallocated_encoding(s);
8847         break;
8848     }
8849 }
8850
8851 /* Logic op (opcode == 3) subgroup of C3.6.16. */
8852 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8853 {
8854     int rd = extract32(insn, 0, 5);
8855     int rn = extract32(insn, 5, 5);
8856     int rm = extract32(insn, 16, 5);
8857     int size = extract32(insn, 22, 2);
8858     bool is_u = extract32(insn, 29, 1);
8859     bool is_q = extract32(insn, 30, 1);
8860     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8861     int pass;
8862
8863     if (!fp_access_check(s)) {
8864         return;
8865     }
8866
8867     tcg_op1 = tcg_temp_new_i64();
8868     tcg_op2 = tcg_temp_new_i64();
8869     tcg_res[0] = tcg_temp_new_i64();
8870     tcg_res[1] = tcg_temp_new_i64();
8871
8872     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8873         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8874         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8875
8876         if (!is_u) {
8877             switch (size) {
8878             case 0: /* AND */
8879                 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
8880                 break;
8881             case 1: /* BIC */
8882                 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8883                 break;
8884             case 2: /* ORR */
8885                 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
8886                 break;
8887             case 3: /* ORN */
8888                 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
8889                 break;
8890             }
8891         } else {
8892             if (size != 0) {
8893                 /* B* ops need res loaded to operate on */
8894                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8895             }
8896
8897             switch (size) {
8898             case 0: /* EOR */
8899                 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
8900                 break;
8901             case 1: /* BSL bitwise select */
8902                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
8903                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8904                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
8905                 break;
8906             case 2: /* BIT, bitwise insert if true */
8907                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8908                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
8909                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8910                 break;
8911             case 3: /* BIF, bitwise insert if false */
8912                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
8913                 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
8914                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
8915                 break;
8916             }
8917         }
8918     }
8919
8920     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8921     if (!is_q) {
8922         tcg_gen_movi_i64(tcg_res[1], 0);
8923     }
8924     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8925
8926     tcg_temp_free_i64(tcg_op1);
8927     tcg_temp_free_i64(tcg_op2);
8928     tcg_temp_free_i64(tcg_res[0]);
8929     tcg_temp_free_i64(tcg_res[1]);
8930 }
8931
8932 /* Helper functions for 32 bit comparisons */
8933 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8934 {
8935     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
8936 }
8937
8938 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8939 {
8940     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
8941 }
8942
8943 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8944 {
8945     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
8946 }
8947
8948 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
8949 {
8950     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
8951 }
8952
8953 /* Pairwise op subgroup of C3.6.16.
8954  *
8955  * This is called directly or via the handle_3same_float for float pairwise
8956  * operations where the opcode and size are calculated differently.
8957  */
8958 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
8959                                    int size, int rn, int rm, int rd)
8960 {
8961     TCGv_ptr fpst;
8962     int pass;
8963
8964     /* Floating point operations need fpst */
8965     if (opcode >= 0x58) {
8966         fpst = get_fpstatus_ptr();
8967     } else {
8968         TCGV_UNUSED_PTR(fpst);
8969     }
8970
8971     if (!fp_access_check(s)) {
8972         return;
8973     }
8974
8975     /* These operations work on the concatenated rm:rn, with each pair of
8976      * adjacent elements being operated on to produce an element in the result.
8977      */
8978     if (size == 3) {
8979         TCGv_i64 tcg_res[2];
8980
8981         for (pass = 0; pass < 2; pass++) {
8982             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8983             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8984             int passreg = (pass == 0) ? rn : rm;
8985
8986             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
8987             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
8988             tcg_res[pass] = tcg_temp_new_i64();
8989
8990             switch (opcode) {
8991             case 0x17: /* ADDP */
8992                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
8993                 break;
8994             case 0x58: /* FMAXNMP */
8995                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8996                 break;
8997             case 0x5a: /* FADDP */
8998                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
8999                 break;
9000             case 0x5e: /* FMAXP */
9001                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9002                 break;
9003             case 0x78: /* FMINNMP */
9004                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9005                 break;
9006             case 0x7e: /* FMINP */
9007                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9008                 break;
9009             default:
9010                 g_assert_not_reached();
9011             }
9012
9013             tcg_temp_free_i64(tcg_op1);
9014             tcg_temp_free_i64(tcg_op2);
9015         }
9016
9017         for (pass = 0; pass < 2; pass++) {
9018             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9019             tcg_temp_free_i64(tcg_res[pass]);
9020         }
9021     } else {
9022         int maxpass = is_q ? 4 : 2;
9023         TCGv_i32 tcg_res[4];
9024
9025         for (pass = 0; pass < maxpass; pass++) {
9026             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9027             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9028             NeonGenTwoOpFn *genfn = NULL;
9029             int passreg = pass < (maxpass / 2) ? rn : rm;
9030             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9031
9032             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9033             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9034             tcg_res[pass] = tcg_temp_new_i32();
9035
9036             switch (opcode) {
9037             case 0x17: /* ADDP */
9038             {
9039                 static NeonGenTwoOpFn * const fns[3] = {
9040                     gen_helper_neon_padd_u8,
9041                     gen_helper_neon_padd_u16,
9042                     tcg_gen_add_i32,
9043                 };
9044                 genfn = fns[size];
9045                 break;
9046             }
9047             case 0x14: /* SMAXP, UMAXP */
9048             {
9049                 static NeonGenTwoOpFn * const fns[3][2] = {
9050                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9051                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9052                     { gen_max_s32, gen_max_u32 },
9053                 };
9054                 genfn = fns[size][u];
9055                 break;
9056             }
9057             case 0x15: /* SMINP, UMINP */
9058             {
9059                 static NeonGenTwoOpFn * const fns[3][2] = {
9060                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9061                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9062                     { gen_min_s32, gen_min_u32 },
9063                 };
9064                 genfn = fns[size][u];
9065                 break;
9066             }
9067             /* The FP operations are all on single floats (32 bit) */
9068             case 0x58: /* FMAXNMP */
9069                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9070                 break;
9071             case 0x5a: /* FADDP */
9072                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9073                 break;
9074             case 0x5e: /* FMAXP */
9075                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9076                 break;
9077             case 0x78: /* FMINNMP */
9078                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9079                 break;
9080             case 0x7e: /* FMINP */
9081                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9082                 break;
9083             default:
9084                 g_assert_not_reached();
9085             }
9086
9087             /* FP ops called directly, otherwise call now */
9088             if (genfn) {
9089                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9090             }
9091
9092             tcg_temp_free_i32(tcg_op1);
9093             tcg_temp_free_i32(tcg_op2);
9094         }
9095
9096         for (pass = 0; pass < maxpass; pass++) {
9097             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9098             tcg_temp_free_i32(tcg_res[pass]);
9099         }
9100         if (!is_q) {
9101             clear_vec_high(s, rd);
9102         }
9103     }
9104
9105     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9106         tcg_temp_free_ptr(fpst);
9107     }
9108 }
9109
9110 /* Floating point op subgroup of C3.6.16. */
9111 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9112 {
9113     /* For floating point ops, the U, size[1] and opcode bits
9114      * together indicate the operation. size[0] indicates single
9115      * or double.
9116      */
9117     int fpopcode = extract32(insn, 11, 5)
9118         | (extract32(insn, 23, 1) << 5)
9119         | (extract32(insn, 29, 1) << 6);
9120     int is_q = extract32(insn, 30, 1);
9121     int size = extract32(insn, 22, 1);
9122     int rm = extract32(insn, 16, 5);
9123     int rn = extract32(insn, 5, 5);
9124     int rd = extract32(insn, 0, 5);
9125
9126     int datasize = is_q ? 128 : 64;
9127     int esize = 32 << size;
9128     int elements = datasize / esize;
9129
9130     if (size == 1 && !is_q) {
9131         unallocated_encoding(s);
9132         return;
9133     }
9134
9135     switch (fpopcode) {
9136     case 0x58: /* FMAXNMP */
9137     case 0x5a: /* FADDP */
9138     case 0x5e: /* FMAXP */
9139     case 0x78: /* FMINNMP */
9140     case 0x7e: /* FMINP */
9141         if (size && !is_q) {
9142             unallocated_encoding(s);
9143             return;
9144         }
9145         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9146                                rn, rm, rd);
9147         return;
9148     case 0x1b: /* FMULX */
9149     case 0x1f: /* FRECPS */
9150     case 0x3f: /* FRSQRTS */
9151     case 0x5d: /* FACGE */
9152     case 0x7d: /* FACGT */
9153     case 0x19: /* FMLA */
9154     case 0x39: /* FMLS */
9155     case 0x18: /* FMAXNM */
9156     case 0x1a: /* FADD */
9157     case 0x1c: /* FCMEQ */
9158     case 0x1e: /* FMAX */
9159     case 0x38: /* FMINNM */
9160     case 0x3a: /* FSUB */
9161     case 0x3e: /* FMIN */
9162     case 0x5b: /* FMUL */
9163     case 0x5c: /* FCMGE */
9164     case 0x5f: /* FDIV */
9165     case 0x7a: /* FABD */
9166     case 0x7c: /* FCMGT */
9167         if (!fp_access_check(s)) {
9168             return;
9169         }
9170
9171         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9172         return;
9173     default:
9174         unallocated_encoding(s);
9175         return;
9176     }
9177 }
9178
9179 /* Integer op subgroup of C3.6.16. */
9180 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9181 {
9182     int is_q = extract32(insn, 30, 1);
9183     int u = extract32(insn, 29, 1);
9184     int size = extract32(insn, 22, 2);
9185     int opcode = extract32(insn, 11, 5);
9186     int rm = extract32(insn, 16, 5);
9187     int rn = extract32(insn, 5, 5);
9188     int rd = extract32(insn, 0, 5);
9189     int pass;
9190
9191     switch (opcode) {
9192     case 0x13: /* MUL, PMUL */
9193         if (u && size != 0) {
9194             unallocated_encoding(s);
9195             return;
9196         }
9197         /* fall through */
9198     case 0x0: /* SHADD, UHADD */
9199     case 0x2: /* SRHADD, URHADD */
9200     case 0x4: /* SHSUB, UHSUB */
9201     case 0xc: /* SMAX, UMAX */
9202     case 0xd: /* SMIN, UMIN */
9203     case 0xe: /* SABD, UABD */
9204     case 0xf: /* SABA, UABA */
9205     case 0x12: /* MLA, MLS */
9206         if (size == 3) {
9207             unallocated_encoding(s);
9208             return;
9209         }
9210         break;
9211     case 0x16: /* SQDMULH, SQRDMULH */
9212         if (size == 0 || size == 3) {
9213             unallocated_encoding(s);
9214             return;
9215         }
9216         break;
9217     default:
9218         if (size == 3 && !is_q) {
9219             unallocated_encoding(s);
9220             return;
9221         }
9222         break;
9223     }
9224
9225     if (!fp_access_check(s)) {
9226         return;
9227     }
9228
9229     if (size == 3) {
9230         assert(is_q);
9231         for (pass = 0; pass < 2; pass++) {
9232             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9233             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9234             TCGv_i64 tcg_res = tcg_temp_new_i64();
9235
9236             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9237             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9238
9239             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9240
9241             write_vec_element(s, tcg_res, rd, pass, MO_64);
9242
9243             tcg_temp_free_i64(tcg_res);
9244             tcg_temp_free_i64(tcg_op1);
9245             tcg_temp_free_i64(tcg_op2);
9246         }
9247     } else {
9248         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9249             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9250             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9251             TCGv_i32 tcg_res = tcg_temp_new_i32();
9252             NeonGenTwoOpFn *genfn = NULL;
9253             NeonGenTwoOpEnvFn *genenvfn = NULL;
9254
9255             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9256             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9257
9258             switch (opcode) {
9259             case 0x0: /* SHADD, UHADD */
9260             {
9261                 static NeonGenTwoOpFn * const fns[3][2] = {
9262                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9263                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9264                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9265                 };
9266                 genfn = fns[size][u];
9267                 break;
9268             }
9269             case 0x1: /* SQADD, UQADD */
9270             {
9271                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9272                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9273                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9274                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9275                 };
9276                 genenvfn = fns[size][u];
9277                 break;
9278             }
9279             case 0x2: /* SRHADD, URHADD */
9280             {
9281                 static NeonGenTwoOpFn * const fns[3][2] = {
9282                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9283                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9284                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9285                 };
9286                 genfn = fns[size][u];
9287                 break;
9288             }
9289             case 0x4: /* SHSUB, UHSUB */
9290             {
9291                 static NeonGenTwoOpFn * const fns[3][2] = {
9292                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9293                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9294                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9295                 };
9296                 genfn = fns[size][u];
9297                 break;
9298             }
9299             case 0x5: /* SQSUB, UQSUB */
9300             {
9301                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9302                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9303                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9304                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9305                 };
9306                 genenvfn = fns[size][u];
9307                 break;
9308             }
9309             case 0x6: /* CMGT, CMHI */
9310             {
9311                 static NeonGenTwoOpFn * const fns[3][2] = {
9312                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9313                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9314                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9315                 };
9316                 genfn = fns[size][u];
9317                 break;
9318             }
9319             case 0x7: /* CMGE, CMHS */
9320             {
9321                 static NeonGenTwoOpFn * const fns[3][2] = {
9322                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9323                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9324                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9325                 };
9326                 genfn = fns[size][u];
9327                 break;
9328             }
9329             case 0x8: /* SSHL, USHL */
9330             {
9331                 static NeonGenTwoOpFn * const fns[3][2] = {
9332                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9333                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9334                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9335                 };
9336                 genfn = fns[size][u];
9337                 break;
9338             }
9339             case 0x9: /* SQSHL, UQSHL */
9340             {
9341                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9342                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9343                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9344                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9345                 };
9346                 genenvfn = fns[size][u];
9347                 break;
9348             }
9349             case 0xa: /* SRSHL, URSHL */
9350             {
9351                 static NeonGenTwoOpFn * const fns[3][2] = {
9352                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9353                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9354                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9355                 };
9356                 genfn = fns[size][u];
9357                 break;
9358             }
9359             case 0xb: /* SQRSHL, UQRSHL */
9360             {
9361                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9362                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9363                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9364                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9365                 };
9366                 genenvfn = fns[size][u];
9367                 break;
9368             }
9369             case 0xc: /* SMAX, UMAX */
9370             {
9371                 static NeonGenTwoOpFn * const fns[3][2] = {
9372                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9373                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9374                     { gen_max_s32, gen_max_u32 },
9375                 };
9376                 genfn = fns[size][u];
9377                 break;
9378             }
9379
9380             case 0xd: /* SMIN, UMIN */
9381             {
9382                 static NeonGenTwoOpFn * const fns[3][2] = {
9383                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9384                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9385                     { gen_min_s32, gen_min_u32 },
9386                 };
9387                 genfn = fns[size][u];
9388                 break;
9389             }
9390             case 0xe: /* SABD, UABD */
9391             case 0xf: /* SABA, UABA */
9392             {
9393                 static NeonGenTwoOpFn * const fns[3][2] = {
9394                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9395                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9396                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9397                 };
9398                 genfn = fns[size][u];
9399                 break;
9400             }
9401             case 0x10: /* ADD, SUB */
9402             {
9403                 static NeonGenTwoOpFn * const fns[3][2] = {
9404                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9405                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9406                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9407                 };
9408                 genfn = fns[size][u];
9409                 break;
9410             }
9411             case 0x11: /* CMTST, CMEQ */
9412             {
9413                 static NeonGenTwoOpFn * const fns[3][2] = {
9414                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9415                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9416                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9417                 };
9418                 genfn = fns[size][u];
9419                 break;
9420             }
9421             case 0x13: /* MUL, PMUL */
9422                 if (u) {
9423                     /* PMUL */
9424                     assert(size == 0);
9425                     genfn = gen_helper_neon_mul_p8;
9426                     break;
9427                 }
9428                 /* fall through : MUL */
9429             case 0x12: /* MLA, MLS */
9430             {
9431                 static NeonGenTwoOpFn * const fns[3] = {
9432                     gen_helper_neon_mul_u8,
9433                     gen_helper_neon_mul_u16,
9434                     tcg_gen_mul_i32,
9435                 };
9436                 genfn = fns[size];
9437                 break;
9438             }
9439             case 0x16: /* SQDMULH, SQRDMULH */
9440             {
9441                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9442                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9443                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9444                 };
9445                 assert(size == 1 || size == 2);
9446                 genenvfn = fns[size - 1][u];
9447                 break;
9448             }
9449             default:
9450                 g_assert_not_reached();
9451             }
9452
9453             if (genenvfn) {
9454                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9455             } else {
9456                 genfn(tcg_res, tcg_op1, tcg_op2);
9457             }
9458
9459             if (opcode == 0xf || opcode == 0x12) {
9460                 /* SABA, UABA, MLA, MLS: accumulating ops */
9461                 static NeonGenTwoOpFn * const fns[3][2] = {
9462                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9463                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9464                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9465                 };
9466                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9467
9468                 genfn = fns[size][is_sub];
9469                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9470                 genfn(tcg_res, tcg_op1, tcg_res);
9471             }
9472
9473             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9474
9475             tcg_temp_free_i32(tcg_res);
9476             tcg_temp_free_i32(tcg_op1);
9477             tcg_temp_free_i32(tcg_op2);
9478         }
9479     }
9480
9481     if (!is_q) {
9482         clear_vec_high(s, rd);
9483     }
9484 }
9485
9486 /* C3.6.16 AdvSIMD three same
9487  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9488  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9489  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9490  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9491  */
9492 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9493 {
9494     int opcode = extract32(insn, 11, 5);
9495
9496     switch (opcode) {
9497     case 0x3: /* logic ops */
9498         disas_simd_3same_logic(s, insn);
9499         break;
9500     case 0x17: /* ADDP */
9501     case 0x14: /* SMAXP, UMAXP */
9502     case 0x15: /* SMINP, UMINP */
9503     {
9504         /* Pairwise operations */
9505         int is_q = extract32(insn, 30, 1);
9506         int u = extract32(insn, 29, 1);
9507         int size = extract32(insn, 22, 2);
9508         int rm = extract32(insn, 16, 5);
9509         int rn = extract32(insn, 5, 5);
9510         int rd = extract32(insn, 0, 5);
9511         if (opcode == 0x17) {
9512             if (u || (size == 3 && !is_q)) {
9513                 unallocated_encoding(s);
9514                 return;
9515             }
9516         } else {
9517             if (size == 3) {
9518                 unallocated_encoding(s);
9519                 return;
9520             }
9521         }
9522         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9523         break;
9524     }
9525     case 0x18 ... 0x31:
9526         /* floating point ops, sz[1] and U are part of opcode */
9527         disas_simd_3same_float(s, insn);
9528         break;
9529     default:
9530         disas_simd_3same_int(s, insn);
9531         break;
9532     }
9533 }
9534
9535 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9536                                   int size, int rn, int rd)
9537 {
9538     /* Handle 2-reg-misc ops which are widening (so each size element
9539      * in the source becomes a 2*size element in the destination.
9540      * The only instruction like this is FCVTL.
9541      */
9542     int pass;
9543
9544     if (size == 3) {
9545         /* 32 -> 64 bit fp conversion */
9546         TCGv_i64 tcg_res[2];
9547         int srcelt = is_q ? 2 : 0;
9548
9549         for (pass = 0; pass < 2; pass++) {
9550             TCGv_i32 tcg_op = tcg_temp_new_i32();
9551             tcg_res[pass] = tcg_temp_new_i64();
9552
9553             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9554             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9555             tcg_temp_free_i32(tcg_op);
9556         }
9557         for (pass = 0; pass < 2; pass++) {
9558             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9559             tcg_temp_free_i64(tcg_res[pass]);
9560         }
9561     } else {
9562         /* 16 -> 32 bit fp conversion */
9563         int srcelt = is_q ? 4 : 0;
9564         TCGv_i32 tcg_res[4];
9565
9566         for (pass = 0; pass < 4; pass++) {
9567             tcg_res[pass] = tcg_temp_new_i32();
9568
9569             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9570             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9571                                            cpu_env);
9572         }
9573         for (pass = 0; pass < 4; pass++) {
9574             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9575             tcg_temp_free_i32(tcg_res[pass]);
9576         }
9577     }
9578 }
9579
9580 static void handle_rev(DisasContext *s, int opcode, bool u,
9581                        bool is_q, int size, int rn, int rd)
9582 {
9583     int op = (opcode << 1) | u;
9584     int opsz = op + size;
9585     int grp_size = 3 - opsz;
9586     int dsize = is_q ? 128 : 64;
9587     int i;
9588
9589     if (opsz >= 3) {
9590         unallocated_encoding(s);
9591         return;
9592     }
9593
9594     if (!fp_access_check(s)) {
9595         return;
9596     }
9597
9598     if (size == 0) {
9599         /* Special case bytes, use bswap op on each group of elements */
9600         int groups = dsize / (8 << grp_size);
9601
9602         for (i = 0; i < groups; i++) {
9603             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9604
9605             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9606             switch (grp_size) {
9607             case MO_16:
9608                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9609                 break;
9610             case MO_32:
9611                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9612                 break;
9613             case MO_64:
9614                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9615                 break;
9616             default:
9617                 g_assert_not_reached();
9618             }
9619             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9620             tcg_temp_free_i64(tcg_tmp);
9621         }
9622         if (!is_q) {
9623             clear_vec_high(s, rd);
9624         }
9625     } else {
9626         int revmask = (1 << grp_size) - 1;
9627         int esize = 8 << size;
9628         int elements = dsize / esize;
9629         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9630         TCGv_i64 tcg_rd = tcg_const_i64(0);
9631         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9632
9633         for (i = 0; i < elements; i++) {
9634             int e_rev = (i & 0xf) ^ revmask;
9635             int off = e_rev * esize;
9636             read_vec_element(s, tcg_rn, rn, i, size);
9637             if (off >= 64) {
9638                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9639                                     tcg_rn, off - 64, esize);
9640             } else {
9641                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9642             }
9643         }
9644         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9645         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9646
9647         tcg_temp_free_i64(tcg_rd_hi);
9648         tcg_temp_free_i64(tcg_rd);
9649         tcg_temp_free_i64(tcg_rn);
9650     }
9651 }
9652
9653 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9654                                   bool is_q, int size, int rn, int rd)
9655 {
9656     /* Implement the pairwise operations from 2-misc:
9657      * SADDLP, UADDLP, SADALP, UADALP.
9658      * These all add pairs of elements in the input to produce a
9659      * double-width result element in the output (possibly accumulating).
9660      */
9661     bool accum = (opcode == 0x6);
9662     int maxpass = is_q ? 2 : 1;
9663     int pass;
9664     TCGv_i64 tcg_res[2];
9665
9666     if (size == 2) {
9667         /* 32 + 32 -> 64 op */
9668         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9669
9670         for (pass = 0; pass < maxpass; pass++) {
9671             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9672             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9673
9674             tcg_res[pass] = tcg_temp_new_i64();
9675
9676             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9677             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9678             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9679             if (accum) {
9680                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9681                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9682             }
9683
9684             tcg_temp_free_i64(tcg_op1);
9685             tcg_temp_free_i64(tcg_op2);
9686         }
9687     } else {
9688         for (pass = 0; pass < maxpass; pass++) {
9689             TCGv_i64 tcg_op = tcg_temp_new_i64();
9690             NeonGenOneOpFn *genfn;
9691             static NeonGenOneOpFn * const fns[2][2] = {
9692                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9693                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9694             };
9695
9696             genfn = fns[size][u];
9697
9698             tcg_res[pass] = tcg_temp_new_i64();
9699
9700             read_vec_element(s, tcg_op, rn, pass, MO_64);
9701             genfn(tcg_res[pass], tcg_op);
9702
9703             if (accum) {
9704                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9705                 if (size == 0) {
9706                     gen_helper_neon_addl_u16(tcg_res[pass],
9707                                              tcg_res[pass], tcg_op);
9708                 } else {
9709                     gen_helper_neon_addl_u32(tcg_res[pass],
9710                                              tcg_res[pass], tcg_op);
9711                 }
9712             }
9713             tcg_temp_free_i64(tcg_op);
9714         }
9715     }
9716     if (!is_q) {
9717         tcg_res[1] = tcg_const_i64(0);
9718     }
9719     for (pass = 0; pass < 2; pass++) {
9720         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9721         tcg_temp_free_i64(tcg_res[pass]);
9722     }
9723 }
9724
9725 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9726 {
9727     /* Implement SHLL and SHLL2 */
9728     int pass;
9729     int part = is_q ? 2 : 0;
9730     TCGv_i64 tcg_res[2];
9731
9732     for (pass = 0; pass < 2; pass++) {
9733         static NeonGenWidenFn * const widenfns[3] = {
9734             gen_helper_neon_widen_u8,
9735             gen_helper_neon_widen_u16,
9736             tcg_gen_extu_i32_i64,
9737         };
9738         NeonGenWidenFn *widenfn = widenfns[size];
9739         TCGv_i32 tcg_op = tcg_temp_new_i32();
9740
9741         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9742         tcg_res[pass] = tcg_temp_new_i64();
9743         widenfn(tcg_res[pass], tcg_op);
9744         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9745
9746         tcg_temp_free_i32(tcg_op);
9747     }
9748
9749     for (pass = 0; pass < 2; pass++) {
9750         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9751         tcg_temp_free_i64(tcg_res[pass]);
9752     }
9753 }
9754
9755 /* C3.6.17 AdvSIMD two reg misc
9756  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9757  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9758  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9759  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9760  */
9761 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9762 {
9763     int size = extract32(insn, 22, 2);
9764     int opcode = extract32(insn, 12, 5);
9765     bool u = extract32(insn, 29, 1);
9766     bool is_q = extract32(insn, 30, 1);
9767     int rn = extract32(insn, 5, 5);
9768     int rd = extract32(insn, 0, 5);
9769     bool need_fpstatus = false;
9770     bool need_rmode = false;
9771     int rmode = -1;
9772     TCGv_i32 tcg_rmode;
9773     TCGv_ptr tcg_fpstatus;
9774
9775     switch (opcode) {
9776     case 0x0: /* REV64, REV32 */
9777     case 0x1: /* REV16 */
9778         handle_rev(s, opcode, u, is_q, size, rn, rd);
9779         return;
9780     case 0x5: /* CNT, NOT, RBIT */
9781         if (u && size == 0) {
9782             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9783             size = 3;
9784             break;
9785         } else if (u && size == 1) {
9786             /* RBIT */
9787             break;
9788         } else if (!u && size == 0) {
9789             /* CNT */
9790             break;
9791         }
9792         unallocated_encoding(s);
9793         return;
9794     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9795     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9796         if (size == 3) {
9797             unallocated_encoding(s);
9798             return;
9799         }
9800         if (!fp_access_check(s)) {
9801             return;
9802         }
9803
9804         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9805         return;
9806     case 0x4: /* CLS, CLZ */
9807         if (size == 3) {
9808             unallocated_encoding(s);
9809             return;
9810         }
9811         break;
9812     case 0x2: /* SADDLP, UADDLP */
9813     case 0x6: /* SADALP, UADALP */
9814         if (size == 3) {
9815             unallocated_encoding(s);
9816             return;
9817         }
9818         if (!fp_access_check(s)) {
9819             return;
9820         }
9821         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9822         return;
9823     case 0x13: /* SHLL, SHLL2 */
9824         if (u == 0 || size == 3) {
9825             unallocated_encoding(s);
9826             return;
9827         }
9828         if (!fp_access_check(s)) {
9829             return;
9830         }
9831         handle_shll(s, is_q, size, rn, rd);
9832         return;
9833     case 0xa: /* CMLT */
9834         if (u == 1) {
9835             unallocated_encoding(s);
9836             return;
9837         }
9838         /* fall through */
9839     case 0x8: /* CMGT, CMGE */
9840     case 0x9: /* CMEQ, CMLE */
9841     case 0xb: /* ABS, NEG */
9842         if (size == 3 && !is_q) {
9843             unallocated_encoding(s);
9844             return;
9845         }
9846         break;
9847     case 0x3: /* SUQADD, USQADD */
9848         if (size == 3 && !is_q) {
9849             unallocated_encoding(s);
9850             return;
9851         }
9852         if (!fp_access_check(s)) {
9853             return;
9854         }
9855         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9856         return;
9857     case 0x7: /* SQABS, SQNEG */
9858         if (size == 3 && !is_q) {
9859             unallocated_encoding(s);
9860             return;
9861         }
9862         break;
9863     case 0xc ... 0xf:
9864     case 0x16 ... 0x1d:
9865     case 0x1f:
9866     {
9867         /* Floating point: U, size[1] and opcode indicate operation;
9868          * size[0] indicates single or double precision.
9869          */
9870         int is_double = extract32(size, 0, 1);
9871         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9872         size = is_double ? 3 : 2;
9873         switch (opcode) {
9874         case 0x2f: /* FABS */
9875         case 0x6f: /* FNEG */
9876             if (size == 3 && !is_q) {
9877                 unallocated_encoding(s);
9878                 return;
9879             }
9880             break;
9881         case 0x1d: /* SCVTF */
9882         case 0x5d: /* UCVTF */
9883         {
9884             bool is_signed = (opcode == 0x1d) ? true : false;
9885             int elements = is_double ? 2 : is_q ? 4 : 2;
9886             if (is_double && !is_q) {
9887                 unallocated_encoding(s);
9888                 return;
9889             }
9890             if (!fp_access_check(s)) {
9891                 return;
9892             }
9893             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
9894             return;
9895         }
9896         case 0x2c: /* FCMGT (zero) */
9897         case 0x2d: /* FCMEQ (zero) */
9898         case 0x2e: /* FCMLT (zero) */
9899         case 0x6c: /* FCMGE (zero) */
9900         case 0x6d: /* FCMLE (zero) */
9901             if (size == 3 && !is_q) {
9902                 unallocated_encoding(s);
9903                 return;
9904             }
9905             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
9906             return;
9907         case 0x7f: /* FSQRT */
9908             if (size == 3 && !is_q) {
9909                 unallocated_encoding(s);
9910                 return;
9911             }
9912             break;
9913         case 0x1a: /* FCVTNS */
9914         case 0x1b: /* FCVTMS */
9915         case 0x3a: /* FCVTPS */
9916         case 0x3b: /* FCVTZS */
9917         case 0x5a: /* FCVTNU */
9918         case 0x5b: /* FCVTMU */
9919         case 0x7a: /* FCVTPU */
9920         case 0x7b: /* FCVTZU */
9921             need_fpstatus = true;
9922             need_rmode = true;
9923             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9924             if (size == 3 && !is_q) {
9925                 unallocated_encoding(s);
9926                 return;
9927             }
9928             break;
9929         case 0x5c: /* FCVTAU */
9930         case 0x1c: /* FCVTAS */
9931             need_fpstatus = true;
9932             need_rmode = true;
9933             rmode = FPROUNDING_TIEAWAY;
9934             if (size == 3 && !is_q) {
9935                 unallocated_encoding(s);
9936                 return;
9937             }
9938             break;
9939         case 0x3c: /* URECPE */
9940             if (size == 3) {
9941                 unallocated_encoding(s);
9942                 return;
9943             }
9944             /* fall through */
9945         case 0x3d: /* FRECPE */
9946         case 0x7d: /* FRSQRTE */
9947             if (size == 3 && !is_q) {
9948                 unallocated_encoding(s);
9949                 return;
9950             }
9951             if (!fp_access_check(s)) {
9952                 return;
9953             }
9954             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
9955             return;
9956         case 0x56: /* FCVTXN, FCVTXN2 */
9957             if (size == 2) {
9958                 unallocated_encoding(s);
9959                 return;
9960             }
9961             /* fall through */
9962         case 0x16: /* FCVTN, FCVTN2 */
9963             /* handle_2misc_narrow does a 2*size -> size operation, but these
9964              * instructions encode the source size rather than dest size.
9965              */
9966             if (!fp_access_check(s)) {
9967                 return;
9968             }
9969             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
9970             return;
9971         case 0x17: /* FCVTL, FCVTL2 */
9972             if (!fp_access_check(s)) {
9973                 return;
9974             }
9975             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
9976             return;
9977         case 0x18: /* FRINTN */
9978         case 0x19: /* FRINTM */
9979         case 0x38: /* FRINTP */
9980         case 0x39: /* FRINTZ */
9981             need_rmode = true;
9982             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9983             /* fall through */
9984         case 0x59: /* FRINTX */
9985         case 0x79: /* FRINTI */
9986             need_fpstatus = true;
9987             if (size == 3 && !is_q) {
9988                 unallocated_encoding(s);
9989                 return;
9990             }
9991             break;
9992         case 0x58: /* FRINTA */
9993             need_rmode = true;
9994             rmode = FPROUNDING_TIEAWAY;
9995             need_fpstatus = true;
9996             if (size == 3 && !is_q) {
9997                 unallocated_encoding(s);
9998                 return;
9999             }
10000             break;
10001         case 0x7c: /* URSQRTE */
10002             if (size == 3) {
10003                 unallocated_encoding(s);
10004                 return;
10005             }
10006             need_fpstatus = true;
10007             break;
10008         default:
10009             unallocated_encoding(s);
10010             return;
10011         }
10012         break;
10013     }
10014     default:
10015         unallocated_encoding(s);
10016         return;
10017     }
10018
10019     if (!fp_access_check(s)) {
10020         return;
10021     }
10022
10023     if (need_fpstatus) {
10024         tcg_fpstatus = get_fpstatus_ptr();
10025     } else {
10026         TCGV_UNUSED_PTR(tcg_fpstatus);
10027     }
10028     if (need_rmode) {
10029         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10030         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10031     } else {
10032         TCGV_UNUSED_I32(tcg_rmode);
10033     }
10034
10035     if (size == 3) {
10036         /* All 64-bit element operations can be shared with scalar 2misc */
10037         int pass;
10038
10039         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10040             TCGv_i64 tcg_op = tcg_temp_new_i64();
10041             TCGv_i64 tcg_res = tcg_temp_new_i64();
10042
10043             read_vec_element(s, tcg_op, rn, pass, MO_64);
10044
10045             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10046                             tcg_rmode, tcg_fpstatus);
10047
10048             write_vec_element(s, tcg_res, rd, pass, MO_64);
10049
10050             tcg_temp_free_i64(tcg_res);
10051             tcg_temp_free_i64(tcg_op);
10052         }
10053     } else {
10054         int pass;
10055
10056         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10057             TCGv_i32 tcg_op = tcg_temp_new_i32();
10058             TCGv_i32 tcg_res = tcg_temp_new_i32();
10059             TCGCond cond;
10060
10061             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10062
10063             if (size == 2) {
10064                 /* Special cases for 32 bit elements */
10065                 switch (opcode) {
10066                 case 0xa: /* CMLT */
10067                     /* 32 bit integer comparison against zero, result is
10068                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10069                      * and inverting.
10070                      */
10071                     cond = TCG_COND_LT;
10072                 do_cmop:
10073                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10074                     tcg_gen_neg_i32(tcg_res, tcg_res);
10075                     break;
10076                 case 0x8: /* CMGT, CMGE */
10077                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10078                     goto do_cmop;
10079                 case 0x9: /* CMEQ, CMLE */
10080                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10081                     goto do_cmop;
10082                 case 0x4: /* CLS */
10083                     if (u) {
10084                         gen_helper_clz32(tcg_res, tcg_op);
10085                     } else {
10086                         gen_helper_cls32(tcg_res, tcg_op);
10087                     }
10088                     break;
10089                 case 0x7: /* SQABS, SQNEG */
10090                     if (u) {
10091                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10092                     } else {
10093                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10094                     }
10095                     break;
10096                 case 0xb: /* ABS, NEG */
10097                     if (u) {
10098                         tcg_gen_neg_i32(tcg_res, tcg_op);
10099                     } else {
10100                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10101                         tcg_gen_neg_i32(tcg_res, tcg_op);
10102                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10103                                             tcg_zero, tcg_op, tcg_res);
10104                         tcg_temp_free_i32(tcg_zero);
10105                     }
10106                     break;
10107                 case 0x2f: /* FABS */
10108                     gen_helper_vfp_abss(tcg_res, tcg_op);
10109                     break;
10110                 case 0x6f: /* FNEG */
10111                     gen_helper_vfp_negs(tcg_res, tcg_op);
10112                     break;
10113                 case 0x7f: /* FSQRT */
10114                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10115                     break;
10116                 case 0x1a: /* FCVTNS */
10117                 case 0x1b: /* FCVTMS */
10118                 case 0x1c: /* FCVTAS */
10119                 case 0x3a: /* FCVTPS */
10120                 case 0x3b: /* FCVTZS */
10121                 {
10122                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10123                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10124                                          tcg_shift, tcg_fpstatus);
10125                     tcg_temp_free_i32(tcg_shift);
10126                     break;
10127                 }
10128                 case 0x5a: /* FCVTNU */
10129                 case 0x5b: /* FCVTMU */
10130                 case 0x5c: /* FCVTAU */
10131                 case 0x7a: /* FCVTPU */
10132                 case 0x7b: /* FCVTZU */
10133                 {
10134                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10135                     gen_helper_vfp_touls(tcg_res, tcg_op,
10136                                          tcg_shift, tcg_fpstatus);
10137                     tcg_temp_free_i32(tcg_shift);
10138                     break;
10139                 }
10140                 case 0x18: /* FRINTN */
10141                 case 0x19: /* FRINTM */
10142                 case 0x38: /* FRINTP */
10143                 case 0x39: /* FRINTZ */
10144                 case 0x58: /* FRINTA */
10145                 case 0x79: /* FRINTI */
10146                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10147                     break;
10148                 case 0x59: /* FRINTX */
10149                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10150                     break;
10151                 case 0x7c: /* URSQRTE */
10152                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10153                     break;
10154                 default:
10155                     g_assert_not_reached();
10156                 }
10157             } else {
10158                 /* Use helpers for 8 and 16 bit elements */
10159                 switch (opcode) {
10160                 case 0x5: /* CNT, RBIT */
10161                     /* For these two insns size is part of the opcode specifier
10162                      * (handled earlier); they always operate on byte elements.
10163                      */
10164                     if (u) {
10165                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10166                     } else {
10167                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10168                     }
10169                     break;
10170                 case 0x7: /* SQABS, SQNEG */
10171                 {
10172                     NeonGenOneOpEnvFn *genfn;
10173                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10174                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10175                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10176                     };
10177                     genfn = fns[size][u];
10178                     genfn(tcg_res, cpu_env, tcg_op);
10179                     break;
10180                 }
10181                 case 0x8: /* CMGT, CMGE */
10182                 case 0x9: /* CMEQ, CMLE */
10183                 case 0xa: /* CMLT */
10184                 {
10185                     static NeonGenTwoOpFn * const fns[3][2] = {
10186                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10187                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10188                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10189                     };
10190                     NeonGenTwoOpFn *genfn;
10191                     int comp;
10192                     bool reverse;
10193                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10194
10195                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10196                     comp = (opcode - 0x8) * 2 + u;
10197                     /* ...but LE, LT are implemented as reverse GE, GT */
10198                     reverse = (comp > 2);
10199                     if (reverse) {
10200                         comp = 4 - comp;
10201                     }
10202                     genfn = fns[comp][size];
10203                     if (reverse) {
10204                         genfn(tcg_res, tcg_zero, tcg_op);
10205                     } else {
10206                         genfn(tcg_res, tcg_op, tcg_zero);
10207                     }
10208                     tcg_temp_free_i32(tcg_zero);
10209                     break;
10210                 }
10211                 case 0xb: /* ABS, NEG */
10212                     if (u) {
10213                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10214                         if (size) {
10215                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10216                         } else {
10217                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10218                         }
10219                         tcg_temp_free_i32(tcg_zero);
10220                     } else {
10221                         if (size) {
10222                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10223                         } else {
10224                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10225                         }
10226                     }
10227                     break;
10228                 case 0x4: /* CLS, CLZ */
10229                     if (u) {
10230                         if (size == 0) {
10231                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10232                         } else {
10233                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10234                         }
10235                     } else {
10236                         if (size == 0) {
10237                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10238                         } else {
10239                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10240                         }
10241                     }
10242                     break;
10243                 default:
10244                     g_assert_not_reached();
10245                 }
10246             }
10247
10248             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10249
10250             tcg_temp_free_i32(tcg_res);
10251             tcg_temp_free_i32(tcg_op);
10252         }
10253     }
10254     if (!is_q) {
10255         clear_vec_high(s, rd);
10256     }
10257
10258     if (need_rmode) {
10259         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10260         tcg_temp_free_i32(tcg_rmode);
10261     }
10262     if (need_fpstatus) {
10263         tcg_temp_free_ptr(tcg_fpstatus);
10264     }
10265 }
10266
10267 /* C3.6.13 AdvSIMD scalar x indexed element
10268  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10269  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10270  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10271  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10272  * C3.6.18 AdvSIMD vector x indexed element
10273  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10274  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10275  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10276  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10277  */
10278 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10279 {
10280     /* This encoding has two kinds of instruction:
10281      *  normal, where we perform elt x idxelt => elt for each
10282      *     element in the vector
10283      *  long, where we perform elt x idxelt and generate a result of
10284      *     double the width of the input element
10285      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10286      */
10287     bool is_scalar = extract32(insn, 28, 1);
10288     bool is_q = extract32(insn, 30, 1);
10289     bool u = extract32(insn, 29, 1);
10290     int size = extract32(insn, 22, 2);
10291     int l = extract32(insn, 21, 1);
10292     int m = extract32(insn, 20, 1);
10293     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10294     int rm = extract32(insn, 16, 4);
10295     int opcode = extract32(insn, 12, 4);
10296     int h = extract32(insn, 11, 1);
10297     int rn = extract32(insn, 5, 5);
10298     int rd = extract32(insn, 0, 5);
10299     bool is_long = false;
10300     bool is_fp = false;
10301     int index;
10302     TCGv_ptr fpst;
10303
10304     switch (opcode) {
10305     case 0x0: /* MLA */
10306     case 0x4: /* MLS */
10307         if (!u || is_scalar) {
10308             unallocated_encoding(s);
10309             return;
10310         }
10311         break;
10312     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10313     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10314     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10315         if (is_scalar) {
10316             unallocated_encoding(s);
10317             return;
10318         }
10319         is_long = true;
10320         break;
10321     case 0x3: /* SQDMLAL, SQDMLAL2 */
10322     case 0x7: /* SQDMLSL, SQDMLSL2 */
10323     case 0xb: /* SQDMULL, SQDMULL2 */
10324         is_long = true;
10325         /* fall through */
10326     case 0xc: /* SQDMULH */
10327     case 0xd: /* SQRDMULH */
10328         if (u) {
10329             unallocated_encoding(s);
10330             return;
10331         }
10332         break;
10333     case 0x8: /* MUL */
10334         if (u || is_scalar) {
10335             unallocated_encoding(s);
10336             return;
10337         }
10338         break;
10339     case 0x1: /* FMLA */
10340     case 0x5: /* FMLS */
10341         if (u) {
10342             unallocated_encoding(s);
10343             return;
10344         }
10345         /* fall through */
10346     case 0x9: /* FMUL, FMULX */
10347         if (!extract32(size, 1, 1)) {
10348             unallocated_encoding(s);
10349             return;
10350         }
10351         is_fp = true;
10352         break;
10353     default:
10354         unallocated_encoding(s);
10355         return;
10356     }
10357
10358     if (is_fp) {
10359         /* low bit of size indicates single/double */
10360         size = extract32(size, 0, 1) ? 3 : 2;
10361         if (size == 2) {
10362             index = h << 1 | l;
10363         } else {
10364             if (l || !is_q) {
10365                 unallocated_encoding(s);
10366                 return;
10367             }
10368             index = h;
10369         }
10370         rm |= (m << 4);
10371     } else {
10372         switch (size) {
10373         case 1:
10374             index = h << 2 | l << 1 | m;
10375             break;
10376         case 2:
10377             index = h << 1 | l;
10378             rm |= (m << 4);
10379             break;
10380         default:
10381             unallocated_encoding(s);
10382             return;
10383         }
10384     }
10385
10386     if (!fp_access_check(s)) {
10387         return;
10388     }
10389
10390     if (is_fp) {
10391         fpst = get_fpstatus_ptr();
10392     } else {
10393         TCGV_UNUSED_PTR(fpst);
10394     }
10395
10396     if (size == 3) {
10397         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10398         int pass;
10399
10400         assert(is_fp && is_q && !is_long);
10401
10402         read_vec_element(s, tcg_idx, rm, index, MO_64);
10403
10404         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10405             TCGv_i64 tcg_op = tcg_temp_new_i64();
10406             TCGv_i64 tcg_res = tcg_temp_new_i64();
10407
10408             read_vec_element(s, tcg_op, rn, pass, MO_64);
10409
10410             switch (opcode) {
10411             case 0x5: /* FMLS */
10412                 /* As usual for ARM, separate negation for fused multiply-add */
10413                 gen_helper_vfp_negd(tcg_op, tcg_op);
10414                 /* fall through */
10415             case 0x1: /* FMLA */
10416                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10417                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10418                 break;
10419             case 0x9: /* FMUL, FMULX */
10420                 if (u) {
10421                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10422                 } else {
10423                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10424                 }
10425                 break;
10426             default:
10427                 g_assert_not_reached();
10428             }
10429
10430             write_vec_element(s, tcg_res, rd, pass, MO_64);
10431             tcg_temp_free_i64(tcg_op);
10432             tcg_temp_free_i64(tcg_res);
10433         }
10434
10435         if (is_scalar) {
10436             clear_vec_high(s, rd);
10437         }
10438
10439         tcg_temp_free_i64(tcg_idx);
10440     } else if (!is_long) {
10441         /* 32 bit floating point, or 16 or 32 bit integer.
10442          * For the 16 bit scalar case we use the usual Neon helpers and
10443          * rely on the fact that 0 op 0 == 0 with no side effects.
10444          */
10445         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10446         int pass, maxpasses;
10447
10448         if (is_scalar) {
10449             maxpasses = 1;
10450         } else {
10451             maxpasses = is_q ? 4 : 2;
10452         }
10453
10454         read_vec_element_i32(s, tcg_idx, rm, index, size);
10455
10456         if (size == 1 && !is_scalar) {
10457             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10458              * the index into both halves of the 32 bit tcg_idx and then use
10459              * the usual Neon helpers.
10460              */
10461             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10462         }
10463
10464         for (pass = 0; pass < maxpasses; pass++) {
10465             TCGv_i32 tcg_op = tcg_temp_new_i32();
10466             TCGv_i32 tcg_res = tcg_temp_new_i32();
10467
10468             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10469
10470             switch (opcode) {
10471             case 0x0: /* MLA */
10472             case 0x4: /* MLS */
10473             case 0x8: /* MUL */
10474             {
10475                 static NeonGenTwoOpFn * const fns[2][2] = {
10476                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10477                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10478                 };
10479                 NeonGenTwoOpFn *genfn;
10480                 bool is_sub = opcode == 0x4;
10481
10482                 if (size == 1) {
10483                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10484                 } else {
10485                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10486                 }
10487                 if (opcode == 0x8) {
10488                     break;
10489                 }
10490                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10491                 genfn = fns[size - 1][is_sub];
10492                 genfn(tcg_res, tcg_op, tcg_res);
10493                 break;
10494             }
10495             case 0x5: /* FMLS */
10496                 /* As usual for ARM, separate negation for fused multiply-add */
10497                 gen_helper_vfp_negs(tcg_op, tcg_op);
10498                 /* fall through */
10499             case 0x1: /* FMLA */
10500                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10501                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10502                 break;
10503             case 0x9: /* FMUL, FMULX */
10504                 if (u) {
10505                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10506                 } else {
10507                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10508                 }
10509                 break;
10510             case 0xc: /* SQDMULH */
10511                 if (size == 1) {
10512                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10513                                                tcg_op, tcg_idx);
10514                 } else {
10515                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10516                                                tcg_op, tcg_idx);
10517                 }
10518                 break;
10519             case 0xd: /* SQRDMULH */
10520                 if (size == 1) {
10521                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10522                                                 tcg_op, tcg_idx);
10523                 } else {
10524                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10525                                                 tcg_op, tcg_idx);
10526                 }
10527                 break;
10528             default:
10529                 g_assert_not_reached();
10530             }
10531
10532             if (is_scalar) {
10533                 write_fp_sreg(s, rd, tcg_res);
10534             } else {
10535                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10536             }
10537
10538             tcg_temp_free_i32(tcg_op);
10539             tcg_temp_free_i32(tcg_res);
10540         }
10541
10542         tcg_temp_free_i32(tcg_idx);
10543
10544         if (!is_q) {
10545             clear_vec_high(s, rd);
10546         }
10547     } else {
10548         /* long ops: 16x16->32 or 32x32->64 */
10549         TCGv_i64 tcg_res[2];
10550         int pass;
10551         bool satop = extract32(opcode, 0, 1);
10552         TCGMemOp memop = MO_32;
10553
10554         if (satop || !u) {
10555             memop |= MO_SIGN;
10556         }
10557
10558         if (size == 2) {
10559             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10560
10561             read_vec_element(s, tcg_idx, rm, index, memop);
10562
10563             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10564                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10565                 TCGv_i64 tcg_passres;
10566                 int passelt;
10567
10568                 if (is_scalar) {
10569                     passelt = 0;
10570                 } else {
10571                     passelt = pass + (is_q * 2);
10572                 }
10573
10574                 read_vec_element(s, tcg_op, rn, passelt, memop);
10575
10576                 tcg_res[pass] = tcg_temp_new_i64();
10577
10578                 if (opcode == 0xa || opcode == 0xb) {
10579                     /* Non-accumulating ops */
10580                     tcg_passres = tcg_res[pass];
10581                 } else {
10582                     tcg_passres = tcg_temp_new_i64();
10583                 }
10584
10585                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10586                 tcg_temp_free_i64(tcg_op);
10587
10588                 if (satop) {
10589                     /* saturating, doubling */
10590                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10591                                                       tcg_passres, tcg_passres);
10592                 }
10593
10594                 if (opcode == 0xa || opcode == 0xb) {
10595                     continue;
10596                 }
10597
10598                 /* Accumulating op: handle accumulate step */
10599                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10600
10601                 switch (opcode) {
10602                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10603                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10604                     break;
10605                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10606                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10607                     break;
10608                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10609                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10610                     /* fall through */
10611                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10612                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10613                                                       tcg_res[pass],
10614                                                       tcg_passres);
10615                     break;
10616                 default:
10617                     g_assert_not_reached();
10618                 }
10619                 tcg_temp_free_i64(tcg_passres);
10620             }
10621             tcg_temp_free_i64(tcg_idx);
10622
10623             if (is_scalar) {
10624                 clear_vec_high(s, rd);
10625             }
10626         } else {
10627             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10628
10629             assert(size == 1);
10630             read_vec_element_i32(s, tcg_idx, rm, index, size);
10631
10632             if (!is_scalar) {
10633                 /* The simplest way to handle the 16x16 indexed ops is to
10634                  * duplicate the index into both halves of the 32 bit tcg_idx
10635                  * and then use the usual Neon helpers.
10636                  */
10637                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10638             }
10639
10640             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10641                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10642                 TCGv_i64 tcg_passres;
10643
10644                 if (is_scalar) {
10645                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10646                 } else {
10647                     read_vec_element_i32(s, tcg_op, rn,
10648                                          pass + (is_q * 2), MO_32);
10649                 }
10650
10651                 tcg_res[pass] = tcg_temp_new_i64();
10652
10653                 if (opcode == 0xa || opcode == 0xb) {
10654                     /* Non-accumulating ops */
10655                     tcg_passres = tcg_res[pass];
10656                 } else {
10657                     tcg_passres = tcg_temp_new_i64();
10658                 }
10659
10660                 if (memop & MO_SIGN) {
10661                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10662                 } else {
10663                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10664                 }
10665                 if (satop) {
10666                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10667                                                       tcg_passres, tcg_passres);
10668                 }
10669                 tcg_temp_free_i32(tcg_op);
10670
10671                 if (opcode == 0xa || opcode == 0xb) {
10672                     continue;
10673                 }
10674
10675                 /* Accumulating op: handle accumulate step */
10676                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10677
10678                 switch (opcode) {
10679                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10680                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10681                                              tcg_passres);
10682                     break;
10683                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10684                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10685                                              tcg_passres);
10686                     break;
10687                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10688                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10689                     /* fall through */
10690                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10691                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10692                                                       tcg_res[pass],
10693                                                       tcg_passres);
10694                     break;
10695                 default:
10696                     g_assert_not_reached();
10697                 }
10698                 tcg_temp_free_i64(tcg_passres);
10699             }
10700             tcg_temp_free_i32(tcg_idx);
10701
10702             if (is_scalar) {
10703                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10704             }
10705         }
10706
10707         if (is_scalar) {
10708             tcg_res[1] = tcg_const_i64(0);
10709         }
10710
10711         for (pass = 0; pass < 2; pass++) {
10712             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10713             tcg_temp_free_i64(tcg_res[pass]);
10714         }
10715     }
10716
10717     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10718         tcg_temp_free_ptr(fpst);
10719     }
10720 }
10721
10722 /* C3.6.19 Crypto AES
10723  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10724  * +-----------------+------+-----------+--------+-----+------+------+
10725  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10726  * +-----------------+------+-----------+--------+-----+------+------+
10727  */
10728 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10729 {
10730     int size = extract32(insn, 22, 2);
10731     int opcode = extract32(insn, 12, 5);
10732     int rn = extract32(insn, 5, 5);
10733     int rd = extract32(insn, 0, 5);
10734     int decrypt;
10735     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10736     CryptoThreeOpEnvFn *genfn;
10737
10738     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10739         || size != 0) {
10740         unallocated_encoding(s);
10741         return;
10742     }
10743
10744     switch (opcode) {
10745     case 0x4: /* AESE */
10746         decrypt = 0;
10747         genfn = gen_helper_crypto_aese;
10748         break;
10749     case 0x6: /* AESMC */
10750         decrypt = 0;
10751         genfn = gen_helper_crypto_aesmc;
10752         break;
10753     case 0x5: /* AESD */
10754         decrypt = 1;
10755         genfn = gen_helper_crypto_aese;
10756         break;
10757     case 0x7: /* AESIMC */
10758         decrypt = 1;
10759         genfn = gen_helper_crypto_aesmc;
10760         break;
10761     default:
10762         unallocated_encoding(s);
10763         return;
10764     }
10765
10766     /* Note that we convert the Vx register indexes into the
10767      * index within the vfp.regs[] array, so we can share the
10768      * helper with the AArch32 instructions.
10769      */
10770     tcg_rd_regno = tcg_const_i32(rd << 1);
10771     tcg_rn_regno = tcg_const_i32(rn << 1);
10772     tcg_decrypt = tcg_const_i32(decrypt);
10773
10774     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10775
10776     tcg_temp_free_i32(tcg_rd_regno);
10777     tcg_temp_free_i32(tcg_rn_regno);
10778     tcg_temp_free_i32(tcg_decrypt);
10779 }
10780
10781 /* C3.6.20 Crypto three-reg SHA
10782  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10783  * +-----------------+------+---+------+---+--------+-----+------+------+
10784  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10785  * +-----------------+------+---+------+---+--------+-----+------+------+
10786  */
10787 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10788 {
10789     int size = extract32(insn, 22, 2);
10790     int opcode = extract32(insn, 12, 3);
10791     int rm = extract32(insn, 16, 5);
10792     int rn = extract32(insn, 5, 5);
10793     int rd = extract32(insn, 0, 5);
10794     CryptoThreeOpEnvFn *genfn;
10795     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10796     int feature = ARM_FEATURE_V8_SHA256;
10797
10798     if (size != 0) {
10799         unallocated_encoding(s);
10800         return;
10801     }
10802
10803     switch (opcode) {
10804     case 0: /* SHA1C */
10805     case 1: /* SHA1P */
10806     case 2: /* SHA1M */
10807     case 3: /* SHA1SU0 */
10808         genfn = NULL;
10809         feature = ARM_FEATURE_V8_SHA1;
10810         break;
10811     case 4: /* SHA256H */
10812         genfn = gen_helper_crypto_sha256h;
10813         break;
10814     case 5: /* SHA256H2 */
10815         genfn = gen_helper_crypto_sha256h2;
10816         break;
10817     case 6: /* SHA256SU1 */
10818         genfn = gen_helper_crypto_sha256su1;
10819         break;
10820     default:
10821         unallocated_encoding(s);
10822         return;
10823     }
10824
10825     if (!arm_dc_feature(s, feature)) {
10826         unallocated_encoding(s);
10827         return;
10828     }
10829
10830     tcg_rd_regno = tcg_const_i32(rd << 1);
10831     tcg_rn_regno = tcg_const_i32(rn << 1);
10832     tcg_rm_regno = tcg_const_i32(rm << 1);
10833
10834     if (genfn) {
10835         genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10836     } else {
10837         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
10838
10839         gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
10840                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10841         tcg_temp_free_i32(tcg_opcode);
10842     }
10843
10844     tcg_temp_free_i32(tcg_rd_regno);
10845     tcg_temp_free_i32(tcg_rn_regno);
10846     tcg_temp_free_i32(tcg_rm_regno);
10847 }
10848
10849 /* C3.6.21 Crypto two-reg SHA
10850  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10851  * +-----------------+------+-----------+--------+-----+------+------+
10852  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10853  * +-----------------+------+-----------+--------+-----+------+------+
10854  */
10855 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10856 {
10857     int size = extract32(insn, 22, 2);
10858     int opcode = extract32(insn, 12, 5);
10859     int rn = extract32(insn, 5, 5);
10860     int rd = extract32(insn, 0, 5);
10861     CryptoTwoOpEnvFn *genfn;
10862     int feature;
10863     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10864
10865     if (size != 0) {
10866         unallocated_encoding(s);
10867         return;
10868     }
10869
10870     switch (opcode) {
10871     case 0: /* SHA1H */
10872         feature = ARM_FEATURE_V8_SHA1;
10873         genfn = gen_helper_crypto_sha1h;
10874         break;
10875     case 1: /* SHA1SU1 */
10876         feature = ARM_FEATURE_V8_SHA1;
10877         genfn = gen_helper_crypto_sha1su1;
10878         break;
10879     case 2: /* SHA256SU0 */
10880         feature = ARM_FEATURE_V8_SHA256;
10881         genfn = gen_helper_crypto_sha256su0;
10882         break;
10883     default:
10884         unallocated_encoding(s);
10885         return;
10886     }
10887
10888     if (!arm_dc_feature(s, feature)) {
10889         unallocated_encoding(s);
10890         return;
10891     }
10892
10893     tcg_rd_regno = tcg_const_i32(rd << 1);
10894     tcg_rn_regno = tcg_const_i32(rn << 1);
10895
10896     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
10897
10898     tcg_temp_free_i32(tcg_rd_regno);
10899     tcg_temp_free_i32(tcg_rn_regno);
10900 }
10901
10902 /* C3.6 Data processing - SIMD, inc Crypto
10903  *
10904  * As the decode gets a little complex we are using a table based
10905  * approach for this part of the decode.
10906  */
10907 static const AArch64DecodeTable data_proc_simd[] = {
10908     /* pattern  ,  mask     ,  fn                        */
10909     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
10910     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
10911     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
10912     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
10913     { 0x0e000400, 0x9fe08400, disas_simd_copy },
10914     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
10915     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
10916     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
10917     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
10918     { 0x0e000000, 0xbf208c00, disas_simd_tb },
10919     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
10920     { 0x2e000000, 0xbf208400, disas_simd_ext },
10921     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
10922     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
10923     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
10924     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
10925     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
10926     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
10927     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
10928     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
10929     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
10930     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
10931     { 0x00000000, 0x00000000, NULL }
10932 };
10933
10934 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
10935 {
10936     /* Note that this is called with all non-FP cases from
10937      * table C3-6 so it must UNDEF for entries not specifically
10938      * allocated to instructions in that table.
10939      */
10940     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
10941     if (fn) {
10942         fn(s, insn);
10943     } else {
10944         unallocated_encoding(s);
10945     }
10946 }
10947
10948 /* C3.6 Data processing - SIMD and floating point */
10949 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
10950 {
10951     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
10952         disas_data_proc_fp(s, insn);
10953     } else {
10954         /* SIMD, including crypto */
10955         disas_data_proc_simd(s, insn);
10956     }
10957 }
10958
10959 /* C3.1 A64 instruction index by encoding */
10960 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
10961 {
10962     uint32_t insn;
10963
10964     insn = arm_ldl_code(env, s->pc, s->bswap_code);
10965     s->insn = insn;
10966     s->pc += 4;
10967
10968     s->fp_access_checked = false;
10969
10970     switch (extract32(insn, 25, 4)) {
10971     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
10972         unallocated_encoding(s);
10973         break;
10974     case 0x8: case 0x9: /* Data processing - immediate */
10975         disas_data_proc_imm(s, insn);
10976         break;
10977     case 0xa: case 0xb: /* Branch, exception generation and system insns */
10978         disas_b_exc_sys(s, insn);
10979         break;
10980     case 0x4:
10981     case 0x6:
10982     case 0xc:
10983     case 0xe:      /* Loads and stores */
10984         disas_ldst(s, insn);
10985         break;
10986     case 0x5:
10987     case 0xd:      /* Data processing - register */
10988         disas_data_proc_reg(s, insn);
10989         break;
10990     case 0x7:
10991     case 0xf:      /* Data processing - SIMD and floating point */
10992         disas_data_proc_simd_fp(s, insn);
10993         break;
10994     default:
10995         assert(FALSE); /* all 15 cases should be handled above */
10996         break;
10997     }
10998
10999     /* if we allocated any temporaries, free them here */
11000     free_tmp_a64(s);
11001 }
11002
11003 void gen_intermediate_code_internal_a64(ARMCPU *cpu,
11004                                         TranslationBlock *tb,
11005                                         bool search_pc)
11006 {
11007     CPUState *cs = CPU(cpu);
11008     CPUARMState *env = &cpu->env;
11009     DisasContext dc1, *dc = &dc1;
11010     int j, lj;
11011     target_ulong pc_start;
11012     target_ulong next_page_start;
11013     int num_insns;
11014     int max_insns;
11015
11016     pc_start = tb->pc;
11017
11018     dc->tb = tb;
11019
11020     dc->is_jmp = DISAS_NEXT;
11021     dc->pc = pc_start;
11022     dc->singlestep_enabled = cs->singlestep_enabled;
11023     dc->condjmp = 0;
11024
11025     dc->aarch64 = 1;
11026     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11027      * there is no secure EL1, so we route exceptions to EL3.
11028      */
11029     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11030                                !arm_el_is_aa64(env, 3);
11031     dc->thumb = 0;
11032     dc->bswap_code = 0;
11033     dc->condexec_mask = 0;
11034     dc->condexec_cond = 0;
11035     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11036     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11037 #if !defined(CONFIG_USER_ONLY)
11038     dc->user = (dc->current_el == 0);
11039 #endif
11040     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11041     dc->vec_len = 0;
11042     dc->vec_stride = 0;
11043     dc->cp_regs = cpu->cp_regs;
11044     dc->features = env->features;
11045
11046     /* Single step state. The code-generation logic here is:
11047      *  SS_ACTIVE == 0:
11048      *   generate code with no special handling for single-stepping (except
11049      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11050      *   this happens anyway because those changes are all system register or
11051      *   PSTATE writes).
11052      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11053      *   emit code for one insn
11054      *   emit code to clear PSTATE.SS
11055      *   emit code to generate software step exception for completed step
11056      *   end TB (as usual for having generated an exception)
11057      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11058      *   emit code to generate a software step exception
11059      *   end the TB
11060      */
11061     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11062     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11063     dc->is_ldex = false;
11064     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11065
11066     init_tmp_a64_array(dc);
11067
11068     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11069     lj = -1;
11070     num_insns = 0;
11071     max_insns = tb->cflags & CF_COUNT_MASK;
11072     if (max_insns == 0) {
11073         max_insns = CF_COUNT_MASK;
11074     }
11075     if (max_insns > TCG_MAX_INSNS) {
11076         max_insns = TCG_MAX_INSNS;
11077     }
11078
11079     gen_tb_start(tb);
11080
11081     tcg_clear_temp_count();
11082
11083     do {
11084         if (search_pc) {
11085             j = tcg_op_buf_count();
11086             if (lj < j) {
11087                 lj++;
11088                 while (lj < j) {
11089                     tcg_ctx.gen_opc_instr_start[lj++] = 0;
11090                 }
11091             }
11092             tcg_ctx.gen_opc_pc[lj] = dc->pc;
11093             tcg_ctx.gen_opc_instr_start[lj] = 1;
11094             tcg_ctx.gen_opc_icount[lj] = num_insns;
11095         }
11096         tcg_gen_insn_start(dc->pc, 0);
11097         num_insns++;
11098
11099         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11100             CPUBreakpoint *bp;
11101             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11102                 if (bp->pc == dc->pc) {
11103                     gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11104                     /* Advance PC so that clearing the breakpoint will
11105                        invalidate this TB.  */
11106                     dc->pc += 2;
11107                     goto done_generating;
11108                 }
11109             }
11110         }
11111
11112         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11113             gen_io_start();
11114         }
11115
11116         if (dc->ss_active && !dc->pstate_ss) {
11117             /* Singlestep state is Active-pending.
11118              * If we're in this state at the start of a TB then either
11119              *  a) we just took an exception to an EL which is being debugged
11120              *     and this is the first insn in the exception handler
11121              *  b) debug exceptions were masked and we just unmasked them
11122              *     without changing EL (eg by clearing PSTATE.D)
11123              * In either case we're going to take a swstep exception in the
11124              * "did not step an insn" case, and so the syndrome ISV and EX
11125              * bits should be zero.
11126              */
11127             assert(num_insns == 1);
11128             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11129                           default_exception_el(dc));
11130             dc->is_jmp = DISAS_EXC;
11131             break;
11132         }
11133
11134         disas_a64_insn(env, dc);
11135
11136         if (tcg_check_temp_count()) {
11137             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11138                     dc->pc);
11139         }
11140
11141         /* Translation stops when a conditional branch is encountered.
11142          * Otherwise the subsequent code could get translated several times.
11143          * Also stop translation when a page boundary is reached.  This
11144          * ensures prefetch aborts occur at the right place.
11145          */
11146     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11147              !cs->singlestep_enabled &&
11148              !singlestep &&
11149              !dc->ss_active &&
11150              dc->pc < next_page_start &&
11151              num_insns < max_insns);
11152
11153     if (tb->cflags & CF_LAST_IO) {
11154         gen_io_end();
11155     }
11156
11157     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11158         && dc->is_jmp != DISAS_EXC) {
11159         /* Note that this means single stepping WFI doesn't halt the CPU.
11160          * For conditional branch insns this is harmless unreachable code as
11161          * gen_goto_tb() has already handled emitting the debug exception
11162          * (and thus a tb-jump is not possible when singlestepping).
11163          */
11164         assert(dc->is_jmp != DISAS_TB_JUMP);
11165         if (dc->is_jmp != DISAS_JUMP) {
11166             gen_a64_set_pc_im(dc->pc);
11167         }
11168         if (cs->singlestep_enabled) {
11169             gen_exception_internal(EXCP_DEBUG);
11170         } else {
11171             gen_step_complete_exception(dc);
11172         }
11173     } else {
11174         switch (dc->is_jmp) {
11175         case DISAS_NEXT:
11176             gen_goto_tb(dc, 1, dc->pc);
11177             break;
11178         default:
11179         case DISAS_UPDATE:
11180             gen_a64_set_pc_im(dc->pc);
11181             /* fall through */
11182         case DISAS_JUMP:
11183             /* indicate that the hash table must be used to find the next TB */
11184             tcg_gen_exit_tb(0);
11185             break;
11186         case DISAS_TB_JUMP:
11187         case DISAS_EXC:
11188         case DISAS_SWI:
11189             break;
11190         case DISAS_WFE:
11191             gen_a64_set_pc_im(dc->pc);
11192             gen_helper_wfe(cpu_env);
11193             break;
11194         case DISAS_YIELD:
11195             gen_a64_set_pc_im(dc->pc);
11196             gen_helper_yield(cpu_env);
11197             break;
11198         case DISAS_WFI:
11199             /* This is a special case because we don't want to just halt the CPU
11200              * if trying to debug across a WFI.
11201              */
11202             gen_a64_set_pc_im(dc->pc);
11203             gen_helper_wfi(cpu_env);
11204             /* The helper doesn't necessarily throw an exception, but we
11205              * must go back to the main loop to check for interrupts anyway.
11206              */
11207             tcg_gen_exit_tb(0);
11208             break;
11209         }
11210     }
11211
11212 done_generating:
11213     gen_tb_end(tb, num_insns);
11214
11215 #ifdef DEBUG_DISAS
11216     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
11217         qemu_log("----------------\n");
11218         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11219         log_target_disas(cs, pc_start, dc->pc - pc_start,
11220                          4 | (dc->bswap_code << 1));
11221         qemu_log("\n");
11222     }
11223 #endif
11224     if (search_pc) {
11225         j = tcg_op_buf_count();
11226         lj++;
11227         while (lj <= j) {
11228             tcg_ctx.gen_opc_instr_start[lj++] = 0;
11229         }
11230     } else {
11231         tb->size = dc->pc - pc_start;
11232         tb->icount = num_insns;
11233     }
11234 }