target-arm: Stop underdecoding ARM946 PRBS registers

[qemu.git] / target-arm / translate-a64.c
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c

index a96ee4aa4b4657c86973815dc1eaac144325026f..f6500e5181184320d9dcff7cc10208d47589124c 100644 (file)
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -77,6 +77,8 @@ typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
+typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
+typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  
  /* initialize TCG globals.  */
  void a64_translate_init(void)
@@ -1175,7 +1177,12 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
                                                 crn, crm, op0, op1, op2));
  
      if (!ri) {
-        /* Unknown register */
+        /* Unknown register; this might be a guest error or a QEMU
+         * unimplemented feature.
+         */
+        qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
+                      "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
+                      isread ? "read" : "write", op0, op1, crn, crm, op2);
          unallocated_encoding(s);
          return;
      }
@@ -5838,7 +5845,100 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
   */
  static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
  {
-    unsupported_encoding(s, insn);
+    bool is_u = extract32(insn, 29, 1);
+    int size = extract32(insn, 22, 2);
+    int opcode = extract32(insn, 12, 4);
+    int rm = extract32(insn, 16, 5);
+    int rn = extract32(insn, 5, 5);
+    int rd = extract32(insn, 0, 5);
+
+    if (is_u) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    switch (opcode) {
+    case 0x9: /* SQDMLAL, SQDMLAL2 */
+    case 0xb: /* SQDMLSL, SQDMLSL2 */
+    case 0xd: /* SQDMULL, SQDMULL2 */
+        if (size == 0 || size == 3) {
+            unallocated_encoding(s);
+            return;
+        }
+        break;
+    default:
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (size == 2) {
+        TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+        TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+        TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+        read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
+        read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
+
+        tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
+        gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
+
+        switch (opcode) {
+        case 0xd: /* SQDMULL, SQDMULL2 */
+            break;
+        case 0xb: /* SQDMLSL, SQDMLSL2 */
+            tcg_gen_neg_i64(tcg_res, tcg_res);
+            /* fall through */
+        case 0x9: /* SQDMLAL, SQDMLAL2 */
+            read_vec_element(s, tcg_op1, rd, 0, MO_64);
+            gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
+                                              tcg_res, tcg_op1);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        write_fp_dreg(s, rd, tcg_res);
+
+        tcg_temp_free_i64(tcg_op1);
+        tcg_temp_free_i64(tcg_op2);
+        tcg_temp_free_i64(tcg_res);
+    } else {
+        TCGv_i32 tcg_op1 = tcg_temp_new_i32();
+        TCGv_i32 tcg_op2 = tcg_temp_new_i32();
+        TCGv_i64 tcg_res = tcg_temp_new_i64();
+
+        read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
+        read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
+
+        gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
+        gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
+
+        switch (opcode) {
+        case 0xd: /* SQDMULL, SQDMULL2 */
+            break;
+        case 0xb: /* SQDMLSL, SQDMLSL2 */
+            gen_helper_neon_negl_u32(tcg_res, tcg_res);
+            /* fall through */
+        case 0x9: /* SQDMLAL, SQDMLAL2 */
+        {
+            TCGv_i64 tcg_op3 = tcg_temp_new_i64();
+            read_vec_element(s, tcg_op3, rd, 0, MO_32);
+            gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
+                                              tcg_res, tcg_op3);
+            tcg_temp_free_i64(tcg_op3);
+            break;
+        }
+        default:
+            g_assert_not_reached();
+        }
+
+        tcg_gen_ext32u_i64(tcg_res, tcg_res);
+        write_fp_dreg(s, rd, tcg_res);
+
+        tcg_temp_free_i32(tcg_op1);
+        tcg_temp_free_i32(tcg_op2);
+        tcg_temp_free_i64(tcg_res);
+    }
  }
  
  static void handle_3same_64(DisasContext *s, int opcode, bool u,
@@ -5950,15 +6050,33 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
              read_vec_element(s, tcg_op2, rm, pass, MO_64);
  
              switch (fpopcode) {
+            case 0x39: /* FMLS */
+                /* As usual for ARM, separate negation for fused multiply-add */
+                gen_helper_vfp_negd(tcg_op1, tcg_op1);
+                /* fall through */
+            case 0x19: /* FMLA */
+                read_vec_element(s, tcg_res, rd, pass, MO_64);
+                gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
+                                       tcg_res, fpst);
+                break;
              case 0x18: /* FMAXNM */
                  gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
              case 0x1a: /* FADD */
                  gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
+            case 0x1b: /* FMULX */
+                gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x1c: /* FCMEQ */
+                gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
              case 0x1e: /* FMAX */
                  gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
+            case 0x1f: /* FRECPS */
+                gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
              case 0x38: /* FMINNM */
                  gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
@@ -5968,9 +6086,18 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
              case 0x3e: /* FMIN */
                  gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
+            case 0x3f: /* FRSQRTS */
+                gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
              case 0x5b: /* FMUL */
                  gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
+            case 0x5c: /* FCMGE */
+                gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5d: /* FACGE */
+                gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
              case 0x5f: /* FDIV */
                  gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
@@ -5978,6 +6105,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                  gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
                  gen_helper_vfp_absd(tcg_res, tcg_res);
                  break;
+            case 0x7c: /* FCMGT */
+                gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x7d: /* FACGT */
+                gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
              default:
                  g_assert_not_reached();
              }
@@ -5997,12 +6130,30 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
              read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
  
              switch (fpopcode) {
+            case 0x39: /* FMLS */
+                /* As usual for ARM, separate negation for fused multiply-add */
+                gen_helper_vfp_negs(tcg_op1, tcg_op1);
+                /* fall through */
+            case 0x19: /* FMLA */
+                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+                gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
+                                       tcg_res, fpst);
+                break;
              case 0x1a: /* FADD */
                  gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
+            case 0x1b: /* FMULX */
+                gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x1c: /* FCMEQ */
+                gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
              case 0x1e: /* FMAX */
                  gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
+            case 0x1f: /* FRECPS */
+                gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
              case 0x18: /* FMAXNM */
                  gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
@@ -6015,9 +6166,18 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
              case 0x3e: /* FMIN */
                  gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
+            case 0x3f: /* FRSQRTS */
+                gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
              case 0x5b: /* FMUL */
                  gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
+            case 0x5c: /* FCMGE */
+                gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5d: /* FACGE */
+                gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
              case 0x5f: /* FDIV */
                  gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
                  break;
@@ -6025,6 +6185,12 @@ static void handle_3same_float(DisasContext *s, int size, int elements,
                  gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
                  gen_helper_vfp_abss(tcg_res, tcg_res);
                  break;
+            case 0x7c: /* FCMGT */
+                gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x7d: /* FACGT */
+                gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
+                break;
              default:
                  g_assert_not_reached();
              }
@@ -6075,15 +6241,13 @@ static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
          int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
          switch (fpopcode) {
          case 0x1b: /* FMULX */
-        case 0x1c: /* FCMEQ */
          case 0x1f: /* FRECPS */
          case 0x3f: /* FRSQRTS */
-        case 0x5c: /* FCMGE */
          case 0x5d: /* FACGE */
-        case 0x7c: /* FCMGT */
          case 0x7d: /* FACGT */
-            unsupported_encoding(s, insn);
-            return;
+        case 0x1c: /* FCMEQ */
+        case 0x5c: /* FCMGE */
+        case 0x7c: /* FCMGT */
          case 0x7a: /* FABD */
              break;
          default:
@@ -6268,6 +6432,115 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
      }
  }
  
+static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
+                                   bool is_scalar, bool is_u, bool is_q,
+                                   int size, int rn, int rd)
+{
+    bool is_double = (size == 3);
+    TCGv_ptr fpst = get_fpstatus_ptr();
+
+    if (is_double) {
+        TCGv_i64 tcg_op = tcg_temp_new_i64();
+        TCGv_i64 tcg_zero = tcg_const_i64(0);
+        TCGv_i64 tcg_res = tcg_temp_new_i64();
+        NeonGenTwoDoubleOPFn *genfn;
+        bool swap = false;
+        int pass;
+
+        switch (opcode) {
+        case 0x2e: /* FCMLT (zero) */
+            swap = true;
+            /* fallthrough */
+        case 0x2c: /* FCMGT (zero) */
+            genfn = gen_helper_neon_cgt_f64;
+            break;
+        case 0x2d: /* FCMEQ (zero) */
+            genfn = gen_helper_neon_ceq_f64;
+            break;
+        case 0x6d: /* FCMLE (zero) */
+            swap = true;
+            /* fall through */
+        case 0x6c: /* FCMGE (zero) */
+            genfn = gen_helper_neon_cge_f64;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+            read_vec_element(s, tcg_op, rn, pass, MO_64);
+            if (swap) {
+                genfn(tcg_res, tcg_zero, tcg_op, fpst);
+            } else {
+                genfn(tcg_res, tcg_op, tcg_zero, fpst);
+            }
+            write_vec_element(s, tcg_res, rd, pass, MO_64);
+        }
+        if (is_scalar) {
+            clear_vec_high(s, rd);
+        }
+
+        tcg_temp_free_i64(tcg_res);
+        tcg_temp_free_i64(tcg_zero);
+        tcg_temp_free_i64(tcg_op);
+    } else {
+        TCGv_i32 tcg_op = tcg_temp_new_i32();
+        TCGv_i32 tcg_zero = tcg_const_i32(0);
+        TCGv_i32 tcg_res = tcg_temp_new_i32();
+        NeonGenTwoSingleOPFn *genfn;
+        bool swap = false;
+        int pass, maxpasses;
+
+        switch (opcode) {
+        case 0x2e: /* FCMLT (zero) */
+            swap = true;
+            /* fall through */
+        case 0x2c: /* FCMGT (zero) */
+            genfn = gen_helper_neon_cgt_f32;
+            break;
+        case 0x2d: /* FCMEQ (zero) */
+            genfn = gen_helper_neon_ceq_f32;
+            break;
+        case 0x6d: /* FCMLE (zero) */
+            swap = true;
+            /* fall through */
+        case 0x6c: /* FCMGE (zero) */
+            genfn = gen_helper_neon_cge_f32;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        if (is_scalar) {
+            maxpasses = 1;
+        } else {
+            maxpasses = is_q ? 4 : 2;
+        }
+
+        for (pass = 0; pass < maxpasses; pass++) {
+            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+            if (swap) {
+                genfn(tcg_res, tcg_zero, tcg_op, fpst);
+            } else {
+                genfn(tcg_res, tcg_op, tcg_zero, fpst);
+            }
+            if (is_scalar) {
+                write_fp_sreg(s, rd, tcg_res);
+            } else {
+                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+            }
+        }
+        tcg_temp_free_i32(tcg_res);
+        tcg_temp_free_i32(tcg_zero);
+        tcg_temp_free_i32(tcg_op);
+        if (!is_q && !is_scalar) {
+            clear_vec_high(s, rd);
+        }
+    }
+
+    tcg_temp_free_ptr(fpst);
+}
+
  /* C3.6.12 AdvSIMD scalar two reg misc
   *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
   * +-----+---+-----------+------+-----------+--------+-----+------+------+
@@ -6297,9 +6570,47 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
              return;
          }
          break;
+    case 0xc ... 0xf:
+    case 0x16 ... 0x1d:
+    case 0x1f:
+        /* Floating point: U, size[1] and opcode indicate operation;
+         * size[0] indicates single or double precision.
+         */
+        opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
+        size = extract32(size, 0, 1) ? 3 : 2;
+        switch (opcode) {
+        case 0x2c: /* FCMGT (zero) */
+        case 0x2d: /* FCMEQ (zero) */
+        case 0x2e: /* FCMLT (zero) */
+        case 0x6c: /* FCMGE (zero) */
+        case 0x6d: /* FCMLE (zero) */
+            handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
+            return;
+        case 0x1a: /* FCVTNS */
+        case 0x1b: /* FCVTMS */
+        case 0x1c: /* FCVTAS */
+        case 0x1d: /* SCVTF */
+        case 0x3a: /* FCVTPS */
+        case 0x3b: /* FCVTZS */
+        case 0x3d: /* FRECPE */
+        case 0x3f: /* FRECPX */
+        case 0x56: /* FCVTXN, FCVTXN2 */
+        case 0x5a: /* FCVTNU */
+        case 0x5b: /* FCVTMU */
+        case 0x5c: /* FCVTAU */
+        case 0x5d: /* UCVTF */
+        case 0x7a: /* FCVTPU */
+        case 0x7b: /* FCVTZU */
+        case 0x7d: /* FRSQRTE */
+            unsupported_encoding(s, insn);
+            return;
+        default:
+            unallocated_encoding(s);
+            return;
+        }
+        break;
      default:
          /* Other categories of encoding in this class:
-         *  + floating point (single and double)
           *  + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64
           *  + SQXTN/SQXTN2/SQXTUN/SQXTUN2/UQXTN/UQXTN2:
           *    narrowing saturate ops: size 64/32/16 -> 32/16/8
@@ -6322,17 +6633,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
      }
  }
  
-/* C3.6.13 AdvSIMD scalar x indexed element
- *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
- * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
- * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
- * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
- */
-static void disas_simd_scalar_indexed(DisasContext *s, uint32_t insn)
-{
-    unsupported_encoding(s, insn);
-}
-
  /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
  static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
                                   int immh, int immb, int opcode, int rn, int rd)
@@ -6856,39 +7156,22 @@ static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
      tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
  }
  
-/* Pairwise op subgroup of C3.6.16. */
-static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
+/* Pairwise op subgroup of C3.6.16.
+ *
+ * This is called directly or via the handle_3same_float for float pairwise
+ * operations where the opcode and size are calculated differently.
+ */
+static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
+                                   int size, int rn, int rm, int rd)
  {
-    int is_q = extract32(insn, 30, 1);
-    int u = extract32(insn, 29, 1);
-    int size = extract32(insn, 22, 2);
-    int opcode = extract32(insn, 11, 5);
-    int rm = extract32(insn, 16, 5);
-    int rn = extract32(insn, 5, 5);
-    int rd = extract32(insn, 0, 5);
+    TCGv_ptr fpst;
      int pass;
  
-    if (size == 3 && !is_q) {
-        unallocated_encoding(s);
-        return;
-    }
-
-    switch (opcode) {
-    case 0x14: /* SMAXP, UMAXP */
-    case 0x15: /* SMINP, UMINP */
-        if (size == 3) {
-            unallocated_encoding(s);
-            return;
-        }
-        break;
-    case 0x17:
-        if (u) {
-            unallocated_encoding(s);
-            return;
-        }
-        break;
-    default:
-        g_assert_not_reached();
+    /* Floating point operations need fpst */
+    if (opcode >= 0x58) {
+        fpst = get_fpstatus_ptr();
+    } else {
+        TCGV_UNUSED_PTR(fpst);
      }
  
      /* These operations work on the concatenated rm:rn, with each pair of
@@ -6906,9 +7189,28 @@ static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
              read_vec_element(s, tcg_op2, passreg, 1, MO_64);
              tcg_res[pass] = tcg_temp_new_i64();
  
-            /* The only 64 bit pairwise integer op is ADDP */
-            assert(opcode == 0x17);
-            tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
+            switch (opcode) {
+            case 0x17: /* ADDP */
+                tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
+                break;
+            case 0x58: /* FMAXNMP */
+                gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5a: /* FADDP */
+                gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5e: /* FMAXP */
+                gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x78: /* FMINNMP */
+                gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x7e: /* FMINP */
+                gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            default:
+                g_assert_not_reached();
+            }
  
              tcg_temp_free_i64(tcg_op1);
              tcg_temp_free_i64(tcg_op2);
@@ -6925,7 +7227,7 @@ static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
          for (pass = 0; pass < maxpass; pass++) {
              TCGv_i32 tcg_op1 = tcg_temp_new_i32();
              TCGv_i32 tcg_op2 = tcg_temp_new_i32();
-            NeonGenTwoOpFn *genfn;
+            NeonGenTwoOpFn *genfn = NULL;
              int passreg = pass < (maxpass / 2) ? rn : rm;
              int passelt = (is_q && (pass & 1)) ? 2 : 0;
  
@@ -6964,11 +7266,30 @@ static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
                  genfn = fns[size][u];
                  break;
              }
+            /* The FP operations are all on single floats (32 bit) */
+            case 0x58: /* FMAXNMP */
+                gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5a: /* FADDP */
+                gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x5e: /* FMAXP */
+                gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x78: /* FMINNMP */
+                gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
+            case 0x7e: /* FMINP */
+                gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
+                break;
              default:
                  g_assert_not_reached();
              }
  
-            genfn(tcg_res[pass], tcg_op1, tcg_op2);
+            /* FP ops called directly, otherwise call now */
+            if (genfn) {
+                genfn(tcg_res[pass], tcg_op1, tcg_op2);
+            }
  
              tcg_temp_free_i32(tcg_op1);
              tcg_temp_free_i32(tcg_op2);
@@ -6982,6 +7303,10 @@ static void disas_simd_3same_pair(DisasContext *s, uint32_t insn)
              clear_vec_high(s, rd);
          }
      }
+
+    if (!TCGV_IS_UNUSED_PTR(fpst)) {
+        tcg_temp_free_ptr(fpst);
+    }
  }
  
  /* Floating point op subgroup of C3.6.16. */
@@ -7015,30 +7340,32 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
      case 0x5e: /* FMAXP */
      case 0x78: /* FMINNMP */
      case 0x7e: /* FMINP */
-        /* pairwise ops */
-        unsupported_encoding(s, insn);
+        if (size && !is_q) {
+            unallocated_encoding(s);
+            return;
+        }
+        handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
+                               rn, rm, rd);
          return;
      case 0x1b: /* FMULX */
-    case 0x1c: /* FCMEQ */
      case 0x1f: /* FRECPS */
      case 0x3f: /* FRSQRTS */
-    case 0x5c: /* FCMGE */
      case 0x5d: /* FACGE */
-    case 0x7c: /* FCMGT */
      case 0x7d: /* FACGT */
      case 0x19: /* FMLA */
      case 0x39: /* FMLS */
-        unsupported_encoding(s, insn);
-        return;
      case 0x18: /* FMAXNM */
      case 0x1a: /* FADD */
+    case 0x1c: /* FCMEQ */
      case 0x1e: /* FMAX */
      case 0x38: /* FMINNM */
      case 0x3a: /* FSUB */
      case 0x3e: /* FMIN */
      case 0x5b: /* FMUL */
+    case 0x5c: /* FCMGE */
      case 0x5f: /* FDIV */
      case 0x7a: /* FABD */
+    case 0x7c: /* FCMGT */
          handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
          return;
      default:
@@ -7366,9 +7693,28 @@ static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
      case 0x17: /* ADDP */
      case 0x14: /* SMAXP, UMAXP */
      case 0x15: /* SMINP, UMINP */
+    {
          /* Pairwise operations */
-        disas_simd_3same_pair(s, insn);
+        int is_q = extract32(insn, 30, 1);
+        int u = extract32(insn, 29, 1);
+        int size = extract32(insn, 22, 2);
+        int rm = extract32(insn, 16, 5);
+        int rn = extract32(insn, 5, 5);
+        int rd = extract32(insn, 0, 5);
+        if (opcode == 0x17) {
+            if (u || (size == 3 && !is_q)) {
+                unallocated_encoding(s);
+                return;
+            }
+        } else {
+            if (size == 3) {
+                unallocated_encoding(s);
+                return;
+            }
+        }
+        handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
          break;
+    }
      case 0x18 ... 0x31:
          /* floating point ops, sz[1] and U are part of opcode */
          disas_simd_3same_float(s, insn);
@@ -7618,6 +7964,17 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                  return;
              }
              break;
+        case 0x2c: /* FCMGT (zero) */
+        case 0x2d: /* FCMEQ (zero) */
+        case 0x2e: /* FCMLT (zero) */
+        case 0x6c: /* FCMGE (zero) */
+        case 0x6d: /* FCMLE (zero) */
+            if (size == 3 && !is_q) {
+                unallocated_encoding(s);
+                return;
+            }
+            handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
+            return;
          case 0x16: /* FCVTN, FCVTN2 */
          case 0x17: /* FCVTL, FCVTL2 */
          case 0x18: /* FRINTN */
@@ -7626,9 +7983,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
          case 0x1b: /* FCVTMS */
          case 0x1c: /* FCVTAS */
          case 0x1d: /* SCVTF */
-        case 0x2c: /* FCMGT (zero) */
-        case 0x2d: /* FCMEQ (zero) */
-        case 0x2e: /* FCMLT (zero) */
          case 0x38: /* FRINTP */
          case 0x39: /* FRINTZ */
          case 0x3a: /* FCVTPS */
@@ -7642,8 +7996,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
          case 0x5b: /* FCVTMU */
          case 0x5c: /* FCVTAU */
          case 0x5d: /* UCVTF */
-        case 0x6c: /* FCMGE (zero) */
-        case 0x6d: /* FCMLE (zero) */
          case 0x79: /* FRINTI */
          case 0x7a: /* FCVTPU */
          case 0x7b: /* FCVTZU */
@@ -7805,13 +8157,18 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
      }
  }
  
-/* C3.6.18 AdvSIMD vector x indexed element
+/* C3.6.13 AdvSIMD scalar x indexed element
+ *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
+ * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
+ * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
+ * C3.6.18 AdvSIMD vector x indexed element
   *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
   * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
   * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
   * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
   */
-static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
+static void disas_simd_indexed(DisasContext *s, uint32_t insn)
  {
      /* This encoding has two kinds of instruction:
       *  normal, where we perform elt x idxelt => elt for each
@@ -7820,6 +8177,7 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
       *     double the width of the input element
       * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
       */
+    bool is_scalar = extract32(insn, 28, 1);
      bool is_q = extract32(insn, 30, 1);
      bool u = extract32(insn, 29, 1);
      int size = extract32(insn, 22, 2);
@@ -7839,7 +8197,7 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
      switch (opcode) {
      case 0x0: /* MLA */
      case 0x4: /* MLS */
-        if (!u) {
+        if (!u || is_scalar) {
              unallocated_encoding(s);
              return;
          }
@@ -7847,6 +8205,10 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
      case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
      case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
      case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
+        if (is_scalar) {
+            unallocated_encoding(s);
+            return;
+        }
          is_long = true;
          break;
      case 0x3: /* SQDMLAL, SQDMLAL2 */
@@ -7856,12 +8218,17 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
          /* fall through */
      case 0xc: /* SQDMULH */
      case 0xd: /* SQRDMULH */
-    case 0x8: /* MUL */
          if (u) {
              unallocated_encoding(s);
              return;
          }
          break;
+    case 0x8: /* MUL */
+        if (u || is_scalar) {
+            unallocated_encoding(s);
+            return;
+        }
+        break;
      case 0x1: /* FMLA */
      case 0x5: /* FMLS */
          if (u) {
@@ -7909,11 +8276,6 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
          }
      }
  
-    if (is_long) {
-        unsupported_encoding(s, insn);
-        return;
-    }
-
      if (is_fp) {
          fpst = get_fpstatus_ptr();
      } else {
@@ -7928,7 +8290,7 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
  
          read_vec_element(s, tcg_idx, rm, index, MO_64);
  
-        for (pass = 0; pass < 2; pass++) {
+        for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
              TCGv_i64 tcg_op = tcg_temp_new_i64();
              TCGv_i64 tcg_res = tcg_temp_new_i64();
  
@@ -7959,15 +8321,28 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
              tcg_temp_free_i64(tcg_res);
          }
  
+        if (is_scalar) {
+            clear_vec_high(s, rd);
+        }
+
          tcg_temp_free_i64(tcg_idx);
      } else if (!is_long) {
-        /* 32 bit floating point, or 16 or 32 bit integer */
+        /* 32 bit floating point, or 16 or 32 bit integer.
+         * For the 16 bit scalar case we use the usual Neon helpers and
+         * rely on the fact that 0 op 0 == 0 with no side effects.
+         */
          TCGv_i32 tcg_idx = tcg_temp_new_i32();
-        int pass;
+        int pass, maxpasses;
+
+        if (is_scalar) {
+            maxpasses = 1;
+        } else {
+            maxpasses = is_q ? 4 : 2;
+        }
  
          read_vec_element_i32(s, tcg_idx, rm, index, size);
  
-        if (size == 1) {
+        if (size == 1 && !is_scalar) {
              /* The simplest way to handle the 16x16 indexed ops is to duplicate
               * the index into both halves of the 32 bit tcg_idx and then use
               * the usual Neon helpers.
@@ -7975,11 +8350,11 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
              tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
          }
  
-        for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
+        for (pass = 0; pass < maxpasses; pass++) {
              TCGv_i32 tcg_op = tcg_temp_new_i32();
              TCGv_i32 tcg_res = tcg_temp_new_i32();
  
-            read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
+            read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
  
              switch (opcode) {
              case 0x0: /* MLA */
@@ -8043,7 +8418,12 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
                  g_assert_not_reached();
              }
  
-            write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+            if (is_scalar) {
+                write_fp_sreg(s, rd, tcg_res);
+            } else {
+                write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
+            }
+
              tcg_temp_free_i32(tcg_op);
              tcg_temp_free_i32(tcg_res);
          }
@@ -8055,6 +8435,172 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn)
          }
      } else {
          /* long ops: 16x16->32 or 32x32->64 */
+        TCGv_i64 tcg_res[2];
+        int pass;
+        bool satop = extract32(opcode, 0, 1);
+        TCGMemOp memop = MO_32;
+
+        if (satop || !u) {
+            memop |= MO_SIGN;
+        }
+
+        if (size == 2) {
+            TCGv_i64 tcg_idx = tcg_temp_new_i64();
+
+            read_vec_element(s, tcg_idx, rm, index, memop);
+
+            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+                TCGv_i64 tcg_op = tcg_temp_new_i64();
+                TCGv_i64 tcg_passres;
+                int passelt;
+
+                if (is_scalar) {
+                    passelt = 0;
+                } else {
+                    passelt = pass + (is_q * 2);
+                }
+
+                read_vec_element(s, tcg_op, rn, passelt, memop);
+
+                tcg_res[pass] = tcg_temp_new_i64();
+
+                if (opcode == 0xa || opcode == 0xb) {
+                    /* Non-accumulating ops */
+                    tcg_passres = tcg_res[pass];
+                } else {
+                    tcg_passres = tcg_temp_new_i64();
+                }
+
+                tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
+                tcg_temp_free_i64(tcg_op);
+
+                if (satop) {
+                    /* saturating, doubling */
+                    gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
+                                                      tcg_passres, tcg_passres);
+                }
+
+                if (opcode == 0xa || opcode == 0xb) {
+                    continue;
+                }
+
+                /* Accumulating op: handle accumulate step */
+                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+
+                switch (opcode) {
+                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+                    tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+                    break;
+                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+                    tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+                    break;
+                case 0x7: /* SQDMLSL, SQDMLSL2 */
+                    tcg_gen_neg_i64(tcg_passres, tcg_passres);
+                    /* fall through */
+                case 0x3: /* SQDMLAL, SQDMLAL2 */
+                    gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
+                                                      tcg_res[pass],
+                                                      tcg_passres);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+                tcg_temp_free_i64(tcg_passres);
+            }
+            tcg_temp_free_i64(tcg_idx);
+
+            if (is_scalar) {
+                clear_vec_high(s, rd);
+            }
+        } else {
+            TCGv_i32 tcg_idx = tcg_temp_new_i32();
+
+            assert(size == 1);
+            read_vec_element_i32(s, tcg_idx, rm, index, size);
+
+            if (!is_scalar) {
+                /* The simplest way to handle the 16x16 indexed ops is to
+                 * duplicate the index into both halves of the 32 bit tcg_idx
+                 * and then use the usual Neon helpers.
+                 */
+                tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
+            }
+
+            for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+                TCGv_i32 tcg_op = tcg_temp_new_i32();
+                TCGv_i64 tcg_passres;
+
+                if (is_scalar) {
+                    read_vec_element_i32(s, tcg_op, rn, pass, size);
+                } else {
+                    read_vec_element_i32(s, tcg_op, rn,
+                                         pass + (is_q * 2), MO_32);
+                }
+
+                tcg_res[pass] = tcg_temp_new_i64();
+
+                if (opcode == 0xa || opcode == 0xb) {
+                    /* Non-accumulating ops */
+                    tcg_passres = tcg_res[pass];
+                } else {
+                    tcg_passres = tcg_temp_new_i64();
+                }
+
+                if (memop & MO_SIGN) {
+                    gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
+                } else {
+                    gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
+                }
+                if (satop) {
+                    gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
+                                                      tcg_passres, tcg_passres);
+                }
+                tcg_temp_free_i32(tcg_op);
+
+                if (opcode == 0xa || opcode == 0xb) {
+                    continue;
+                }
+
+                /* Accumulating op: handle accumulate step */
+                read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+
+                switch (opcode) {
+                case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
+                    gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
+                                             tcg_passres);
+                    break;
+                case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
+                    gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
+                                             tcg_passres);
+                    break;
+                case 0x7: /* SQDMLSL, SQDMLSL2 */
+                    gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
+                    /* fall through */
+                case 0x3: /* SQDMLAL, SQDMLAL2 */
+                    gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
+                                                      tcg_res[pass],
+                                                      tcg_passres);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+                tcg_temp_free_i64(tcg_passres);
+            }
+            tcg_temp_free_i32(tcg_idx);
+
+            if (is_scalar) {
+                tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
+            }
+        }
+
+        if (is_scalar) {
+            tcg_res[1] = tcg_const_i64(0);
+        }
+
+        for (pass = 0; pass < 2; pass++) {
+            write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+            tcg_temp_free_i64(tcg_res[pass]);
+        }
      }
  
      if (!TCGV_IS_UNUSED_PTR(fpst)) {
@@ -8107,7 +8653,7 @@ static const AArch64DecodeTable data_proc_simd[] = {
      { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
      { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
      { 0x0e000400, 0x9fe08400, disas_simd_copy },
-    { 0x0f000000, 0x9f000400, disas_simd_indexed_vector },
+    { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
      /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
      { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
      { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
@@ -8119,7 +8665,7 @@ static const AArch64DecodeTable data_proc_simd[] = {
      { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
      { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
      { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
-    { 0x5f000000, 0xdf000400, disas_simd_scalar_indexed },
+    { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
      { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
      { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
      { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },