target/arm: Implement M-profile lazy FP state preservation

[qemu.git] / target / arm / translate.c
diff --git a/target/arm/translate.c b/target/arm/translate.c

index f0101d27887bba917739401731277ba0188073c3..4f29d09a28aff38f0d5a93e1915e981a23e60947 100644 (file)
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -28,6 +28,7 @@
  #include "tcg-op-gvec.h"
  #include "qemu/log.h"
  #include "qemu/bitops.h"
+#include "qemu/qemu-print.h"
  #include "arm_ldst.h"
  #include "exec/semihost.h"
  
@@ -298,17 +299,6 @@ static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
      tcg_temp_free_i32(tcg_excp);
  }
  
-static void gen_ss_advance(DisasContext *s)
-{
-    /* If the singlestep state is Active-not-pending, advance to
-     * Active-pending.
-     */
-    if (s->ss_active) {
-        s->pstate_ss = 0;
-        gen_helper_clear_pstate_ss(cpu_env);
-    }
-}
-
  static void gen_step_complete_exception(DisasContext *s)
  {
      /* We just completed step of an insn. Move from Active-not-pending
@@ -3357,14 +3347,10 @@ static const uint8_t fp_decode_rm[] = {
      FPROUNDING_NEGINF,
  };
  
-static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
+static int disas_vfp_misc_insn(DisasContext *s, uint32_t insn)
  {
      uint32_t rd, rn, rm, dp = extract32(insn, 8, 1);
  
-    if (!arm_dc_feature(s, ARM_FEATURE_V8)) {
-        return 1;
-    }
-
      if (dp) {
          VFP_DREG_D(rd, insn);
          VFP_DREG_N(rn, insn);
@@ -3375,15 +3361,18 @@ static int disas_vfp_v8_insn(DisasContext *s, uint32_t insn)
          rm = VFP_SREG_M(insn);
      }
  
-    if ((insn & 0x0f800e50) == 0x0e000a00) {
+    if ((insn & 0x0f800e50) == 0x0e000a00 && dc_isar_feature(aa32_vsel, s)) {
          return handle_vsel(insn, rd, rn, rm, dp);
-    } else if ((insn & 0x0fb00e10) == 0x0e800a00) {
+    } else if ((insn & 0x0fb00e10) == 0x0e800a00 &&
+               dc_isar_feature(aa32_vminmaxnm, s)) {
          return handle_vminmaxnm(insn, rd, rn, rm, dp);
-    } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40) {
+    } else if ((insn & 0x0fbc0ed0) == 0x0eb80a40 &&
+               dc_isar_feature(aa32_vrint, s)) {
          /* VRINTA, VRINTN, VRINTP, VRINTM */
          int rounding = fp_decode_rm[extract32(insn, 16, 2)];
          return handle_vrint(insn, rd, rm, dp, rounding);
-    } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40) {
+    } else if ((insn & 0x0fbc0e50) == 0x0ebc0a40 &&
+               dc_isar_feature(aa32_vcvt_dr, s)) {
          /* VCVTA, VCVTN, VCVTP, VCVTM */
          int rounding = fp_decode_rm[extract32(insn, 16, 2)];
          return handle_vcvt(insn, rd, rm, dp, rounding);
@@ -3410,8 +3399,14 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
       * for attempts to execute invalid vfp/neon encodings with FP disabled.
       */
      if (s->fp_excp_el) {
-        gen_exception_insn(s, 4, EXCP_UDEF,
-                           syn_fp_access_trap(1, 0xe, false), s->fp_excp_el);
+        if (arm_dc_feature(s, ARM_FEATURE_M)) {
+            gen_exception_insn(s, 4, EXCP_NOCP, syn_uncategorized(),
+                               s->fp_excp_el);
+        } else {
+            gen_exception_insn(s, 4, EXCP_UDEF,
+                               syn_fp_access_trap(1, 0xe, false),
+                               s->fp_excp_el);
+        }
          return 0;
      }
  
@@ -3426,11 +3421,80 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
          }
      }
  
+    if (arm_dc_feature(s, ARM_FEATURE_M)) {
+        /* Handle M-profile lazy FP state mechanics */
+
+        /* Trigger lazy-state preservation if necessary */
+        if (s->v7m_lspact) {
+            /*
+             * Lazy state saving affects external memory and also the NVIC,
+             * so we must mark it as an IO operation for icount.
+             */
+            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+                gen_io_start();
+            }
+            gen_helper_v7m_preserve_fp_state(cpu_env);
+            if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
+                gen_io_end();
+            }
+            /*
+             * If the preserve_fp_state helper doesn't throw an exception
+             * then it will clear LSPACT; we don't need to repeat this for
+             * any further FP insns in this TB.
+             */
+            s->v7m_lspact = false;
+        }
+
+        /* Update ownership of FP context: set FPCCR.S to match current state */
+        if (s->v8m_fpccr_s_wrong) {
+            TCGv_i32 tmp;
+
+            tmp = load_cpu_field(v7m.fpccr[M_REG_S]);
+            if (s->v8m_secure) {
+                tcg_gen_ori_i32(tmp, tmp, R_V7M_FPCCR_S_MASK);
+            } else {
+                tcg_gen_andi_i32(tmp, tmp, ~R_V7M_FPCCR_S_MASK);
+            }
+            store_cpu_field(tmp, v7m.fpccr[M_REG_S]);
+            /* Don't need to do this for any further FP insns in this TB */
+            s->v8m_fpccr_s_wrong = false;
+        }
+
+        if (s->v7m_new_fp_ctxt_needed) {
+            /*
+             * Create new FP context by updating CONTROL.FPCA, CONTROL.SFPA
+             * and the FPSCR.
+             */
+            TCGv_i32 control, fpscr;
+            uint32_t bits = R_V7M_CONTROL_FPCA_MASK;
+
+            fpscr = load_cpu_field(v7m.fpdscr[s->v8m_secure]);
+            gen_helper_vfp_set_fpscr(cpu_env, fpscr);
+            tcg_temp_free_i32(fpscr);
+            /*
+             * We don't need to arrange to end the TB, because the only
+             * parts of FPSCR which we cache in the TB flags are the VECLEN
+             * and VECSTRIDE, and those don't exist for M-profile.
+             */
+
+            if (s->v8m_secure) {
+                bits |= R_V7M_CONTROL_SFPA_MASK;
+            }
+            control = load_cpu_field(v7m.control[M_REG_S]);
+            tcg_gen_ori_i32(control, control, bits);
+            store_cpu_field(control, v7m.control[M_REG_S]);
+            /* Don't need to do this for any further FP insns in this TB */
+            s->v7m_new_fp_ctxt_needed = false;
+        }
+    }
+
      if (extract32(insn, 28, 4) == 0xf) {
-        /* Encodings with T=1 (Thumb) or unconditional (ARM):
-         * only used in v8 and above.
+        /*
+         * Encodings with T=1 (Thumb) or unconditional (ARM):
+         * only used for the "miscellaneous VFP features" added in v8A
+         * and v7M (and gated on the MVFR2.FPMisc field).
           */
-        return disas_vfp_v8_insn(s, insn);
+        return disas_vfp_misc_insn(s, insn);
      }
  
      dp = ((insn & 0xf00) == 0xb00);
@@ -3522,12 +3586,27 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
                      }
                  }
              } else { /* !dp */
+                bool is_sysreg;
+
                  if ((insn & 0x6f) != 0x00)
                      return 1;
                  rn = VFP_SREG_N(insn);
+
+                is_sysreg = extract32(insn, 21, 1);
+
+                if (arm_dc_feature(s, ARM_FEATURE_M)) {
+                    /*
+                     * The only M-profile VFP vmrs/vmsr sysreg is FPSCR.
+                     * Writes to R15 are UNPREDICTABLE; we choose to undef.
+                     */
+                    if (is_sysreg && (rd == 15 || (rn >> 1) != ARM_VFP_FPSCR)) {
+                        return 1;
+                    }
+                }
+
                  if (insn & ARM_CP_RW_BIT) {
                      /* vfp->arm */
-                    if (insn & (1 << 21)) {
+                    if (is_sysreg) {
                          /* system register */
                          rn >>= 1;
  
@@ -3594,7 +3673,7 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
                      }
                  } else {
                      /* arm->vfp */
-                    if (insn & (1 << 21)) {
+                    if (is_sysreg) {
                          rn >>= 1;
                          /* system register */
                          switch (rn) {
@@ -3639,52 +3718,125 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
              }
          } else {
              /* data processing */
+            bool rd_is_dp = dp;
+            bool rm_is_dp = dp;
+            bool no_output = false;
+
              /* The opcode is in bits 23, 21, 20 and 6.  */
              op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
-            if (dp) {
-                if (op == 15) {
-                    /* rn is opcode */
-                    rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
-                } else {
-                    /* rn is register number */
-                    VFP_DREG_N(rn, insn);
-                }
+            rn = VFP_SREG_N(insn);
  
-                if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18) ||
-                                 ((rn & 0x1e) == 0x6))) {
-                    /* Integer or single/half precision destination.  */
-                    rd = VFP_SREG_D(insn);
-                } else {
-                    VFP_DREG_D(rd, insn);
-                }
-                if (op == 15 &&
-                    (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14) ||
-                     ((rn & 0x1e) == 0x4))) {
-                    /* VCVT from int or half precision is always from S reg
-                     * regardless of dp bit. VCVT with immediate frac_bits
-                     * has same format as SREG_M.
+            if (op == 15) {
+                /* rn is opcode, encoded as per VFP_SREG_N. */
+                switch (rn) {
+                case 0x00: /* vmov */
+                case 0x01: /* vabs */
+                case 0x02: /* vneg */
+                case 0x03: /* vsqrt */
+                    break;
+
+                case 0x04: /* vcvtb.f64.f16, vcvtb.f32.f16 */
+                case 0x05: /* vcvtt.f64.f16, vcvtt.f32.f16 */
+                    /*
+                     * VCVTB, VCVTT: only present with the halfprec extension
+                     * UNPREDICTABLE if bit 8 is set prior to ARMv8
+                     * (we choose to UNDEF)
                       */
-                    rm = VFP_SREG_M(insn);
-                } else {
-                    VFP_DREG_M(rm, insn);
+                    if (dp) {
+                        if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
+                            return 1;
+                        }
+                    } else {
+                        if (!dc_isar_feature(aa32_fp16_spconv, s)) {
+                            return 1;
+                        }
+                    }
+                    rm_is_dp = false;
+                    break;
+                case 0x06: /* vcvtb.f16.f32, vcvtb.f16.f64 */
+                case 0x07: /* vcvtt.f16.f32, vcvtt.f16.f64 */
+                    if (dp) {
+                        if (!dc_isar_feature(aa32_fp16_dpconv, s)) {
+                            return 1;
+                        }
+                    } else {
+                        if (!dc_isar_feature(aa32_fp16_spconv, s)) {
+                            return 1;
+                        }
+                    }
+                    rd_is_dp = false;
+                    break;
+
+                case 0x08: case 0x0a: /* vcmp, vcmpz */
+                case 0x09: case 0x0b: /* vcmpe, vcmpez */
+                    no_output = true;
+                    break;
+
+                case 0x0c: /* vrintr */
+                case 0x0d: /* vrintz */
+                case 0x0e: /* vrintx */
+                    break;
+
+                case 0x0f: /* vcvt double<->single */
+                    rd_is_dp = !dp;
+                    break;
+
+                case 0x10: /* vcvt.fxx.u32 */
+                case 0x11: /* vcvt.fxx.s32 */
+                    rm_is_dp = false;
+                    break;
+                case 0x18: /* vcvtr.u32.fxx */
+                case 0x19: /* vcvtz.u32.fxx */
+                case 0x1a: /* vcvtr.s32.fxx */
+                case 0x1b: /* vcvtz.s32.fxx */
+                    rd_is_dp = false;
+                    break;
+
+                case 0x14: /* vcvt fp <-> fixed */
+                case 0x15:
+                case 0x16:
+                case 0x17:
+                case 0x1c:
+                case 0x1d:
+                case 0x1e:
+                case 0x1f:
+                    if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
+                        return 1;
+                    }
+                    /* Immediate frac_bits has same format as SREG_M.  */
+                    rm_is_dp = false;
+                    break;
+
+                case 0x13: /* vjcvt */
+                    if (!dp || !dc_isar_feature(aa32_jscvt, s)) {
+                        return 1;
+                    }
+                    rd_is_dp = false;
+                    break;
+
+                default:
+                    return 1;
                  }
+            } else if (dp) {
+                /* rn is register number */
+                VFP_DREG_N(rn, insn);
+            }
+
+            if (rd_is_dp) {
+                VFP_DREG_D(rd, insn);
+            } else {
+                rd = VFP_SREG_D(insn);
+            }
+            if (rm_is_dp) {
+                VFP_DREG_M(rm, insn);
              } else {
-                rn = VFP_SREG_N(insn);
-                if (op == 15 && rn == 15) {
-                    /* Double precision destination.  */
-                    VFP_DREG_D(rd, insn);
-                } else {
-                    rd = VFP_SREG_D(insn);
-                }
-                /* NB that we implicitly rely on the encoding for the frac_bits
-                 * in VCVT of fixed to float being the same as that of an SREG_M
-                 */
                  rm = VFP_SREG_M(insn);
              }
  
              veclen = s->vec_len;
-            if (op == 15 && rn > 3)
+            if (op == 15 && rn > 3) {
                  veclen = 0;
+            }
  
              /* Shut up compiler warnings.  */
              delta_m = 0;
@@ -3720,55 +3872,28 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
              /* Load the initial operands.  */
              if (op == 15) {
                  switch (rn) {
-                case 16:
-                case 17:
-                    /* Integer source */
-                    gen_mov_F0_vreg(0, rm);
-                    break;
-                case 8:
-                case 9:
-                    /* Compare */
+                case 0x08: case 0x09: /* Compare */
                      gen_mov_F0_vreg(dp, rd);
                      gen_mov_F1_vreg(dp, rm);
                      break;
-                case 10:
-                case 11:
-                    /* Compare with zero */
+                case 0x0a: case 0x0b: /* Compare with zero */
                      gen_mov_F0_vreg(dp, rd);
                      gen_vfp_F1_ld0(dp);
                      break;
-                case 20:
-                case 21:
-                case 22:
-                case 23:
-                case 28:
-                case 29:
-                case 30:
-                case 31:
+                case 0x14: /* vcvt fp <-> fixed */
+                case 0x15:
+                case 0x16:
+                case 0x17:
+                case 0x1c:
+                case 0x1d:
+                case 0x1e:
+                case 0x1f:
                      /* Source and destination the same.  */
                      gen_mov_F0_vreg(dp, rd);
                      break;
-                case 4:
-                case 5:
-                case 6:
-                case 7:
-                    /* VCVTB, VCVTT: only present with the halfprec extension
-                     * UNPREDICTABLE if bit 8 is set prior to ARMv8
-                     * (we choose to UNDEF)
-                     */
-                    if ((dp && !arm_dc_feature(s, ARM_FEATURE_V8)) ||
-                        !arm_dc_feature(s, ARM_FEATURE_VFP_FP16)) {
-                        return 1;
-                    }
-                    if (!extract32(rn, 1, 1)) {
-                        /* Half precision source.  */
-                        gen_mov_F0_vreg(0, rm);
-                        break;
-                    }
-                    /* Otherwise fall through */
                  default:
                      /* One source operand.  */
-                    gen_mov_F0_vreg(dp, rm);
+                    gen_mov_F0_vreg(rm_is_dp, rm);
                      break;
                  }
              } else {
@@ -4047,10 +4172,11 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
                          break;
                      }
                      case 15: /* single<->double conversion */
-                        if (dp)
+                        if (dp) {
                              gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
-                        else
+                        } else {
                              gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
+                        }
                          break;
                      case 16: /* fuito */
                          gen_vfp_uito(dp, 0);
@@ -4058,28 +4184,19 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
                      case 17: /* fsito */
                          gen_vfp_sito(dp, 0);
                          break;
+                    case 19: /* vjcvt */
+                        gen_helper_vjcvt(cpu_F0s, cpu_F0d, cpu_env);
+                        break;
                      case 20: /* fshto */
-                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
-                            return 1;
-                        }
                          gen_vfp_shto(dp, 16 - rm, 0);
                          break;
                      case 21: /* fslto */
-                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
-                            return 1;
-                        }
                          gen_vfp_slto(dp, 32 - rm, 0);
                          break;
                      case 22: /* fuhto */
-                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
-                            return 1;
-                        }
                          gen_vfp_uhto(dp, 16 - rm, 0);
                          break;
                      case 23: /* fulto */
-                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
-                            return 1;
-                        }
                          gen_vfp_ulto(dp, 32 - rm, 0);
                          break;
                      case 24: /* ftoui */
@@ -4095,57 +4212,34 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
                          gen_vfp_tosiz(dp, 0);
                          break;
                      case 28: /* ftosh */
-                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
-                            return 1;
-                        }
                          gen_vfp_tosh(dp, 16 - rm, 0);
                          break;
                      case 29: /* ftosl */
-                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
-                            return 1;
-                        }
                          gen_vfp_tosl(dp, 32 - rm, 0);
                          break;
                      case 30: /* ftouh */
-                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
-                            return 1;
-                        }
                          gen_vfp_touh(dp, 16 - rm, 0);
                          break;
                      case 31: /* ftoul */
-                        if (!arm_dc_feature(s, ARM_FEATURE_VFP3)) {
-                            return 1;
-                        }
                          gen_vfp_toul(dp, 32 - rm, 0);
                          break;
                      default: /* undefined */
-                        return 1;
+                        g_assert_not_reached();
                      }
                      break;
                  default: /* undefined */
                      return 1;
                  }
  
-                /* Write back the result.  */
-                if (op == 15 && (rn >= 8 && rn <= 11)) {
-                    /* Comparison, do nothing.  */
-                } else if (op == 15 && dp && ((rn & 0x1c) == 0x18 ||
-                                              (rn & 0x1e) == 0x6)) {
-                    /* VCVT double to int: always integer result.
-                     * VCVT double to half precision is always a single
-                     * precision result.
-                     */
-                    gen_mov_vreg_F0(0, rd);
-                } else if (op == 15 && rn == 15) {
-                    /* conversion */
-                    gen_mov_vreg_F0(!dp, rd);
-                } else {
-                    gen_mov_vreg_F0(dp, rd);
+                /* Write back the result, if any.  */
+                if (!no_output) {
+                    gen_mov_vreg_F0(rd_is_dp, rd);
                  }
  
                  /* break out of the loop if we have finished  */
-                if (veclen == 0)
+                if (veclen == 0) {
                      break;
+                }
  
                  if (op == 15 && delta_m == 0) {
                      /* single source one-many */
@@ -6148,6 +6242,142 @@ const GVecGen3 cmtst_op[4] = {
        .vece = MO_64 },
  };
  
+static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                          TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec x = tcg_temp_new_vec_matching(t);
+    tcg_gen_add_vec(vece, x, a, b);
+    tcg_gen_usadd_vec(vece, t, a, b);
+    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+    tcg_gen_or_vec(vece, sat, sat, x);
+    tcg_temp_free_vec(x);
+}
+
+const GVecGen4 uqadd_op[4] = {
+    { .fniv = gen_uqadd_vec,
+      .fno = gen_helper_gvec_uqadd_b,
+      .opc = INDEX_op_usadd_vec,
+      .write_aofs = true,
+      .vece = MO_8 },
+    { .fniv = gen_uqadd_vec,
+      .fno = gen_helper_gvec_uqadd_h,
+      .opc = INDEX_op_usadd_vec,
+      .write_aofs = true,
+      .vece = MO_16 },
+    { .fniv = gen_uqadd_vec,
+      .fno = gen_helper_gvec_uqadd_s,
+      .opc = INDEX_op_usadd_vec,
+      .write_aofs = true,
+      .vece = MO_32 },
+    { .fniv = gen_uqadd_vec,
+      .fno = gen_helper_gvec_uqadd_d,
+      .opc = INDEX_op_usadd_vec,
+      .write_aofs = true,
+      .vece = MO_64 },
+};
+
+static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                          TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec x = tcg_temp_new_vec_matching(t);
+    tcg_gen_add_vec(vece, x, a, b);
+    tcg_gen_ssadd_vec(vece, t, a, b);
+    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+    tcg_gen_or_vec(vece, sat, sat, x);
+    tcg_temp_free_vec(x);
+}
+
+const GVecGen4 sqadd_op[4] = {
+    { .fniv = gen_sqadd_vec,
+      .fno = gen_helper_gvec_sqadd_b,
+      .opc = INDEX_op_ssadd_vec,
+      .write_aofs = true,
+      .vece = MO_8 },
+    { .fniv = gen_sqadd_vec,
+      .fno = gen_helper_gvec_sqadd_h,
+      .opc = INDEX_op_ssadd_vec,
+      .write_aofs = true,
+      .vece = MO_16 },
+    { .fniv = gen_sqadd_vec,
+      .fno = gen_helper_gvec_sqadd_s,
+      .opc = INDEX_op_ssadd_vec,
+      .write_aofs = true,
+      .vece = MO_32 },
+    { .fniv = gen_sqadd_vec,
+      .fno = gen_helper_gvec_sqadd_d,
+      .opc = INDEX_op_ssadd_vec,
+      .write_aofs = true,
+      .vece = MO_64 },
+};
+
+static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                          TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec x = tcg_temp_new_vec_matching(t);
+    tcg_gen_sub_vec(vece, x, a, b);
+    tcg_gen_ussub_vec(vece, t, a, b);
+    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+    tcg_gen_or_vec(vece, sat, sat, x);
+    tcg_temp_free_vec(x);
+}
+
+const GVecGen4 uqsub_op[4] = {
+    { .fniv = gen_uqsub_vec,
+      .fno = gen_helper_gvec_uqsub_b,
+      .opc = INDEX_op_ussub_vec,
+      .write_aofs = true,
+      .vece = MO_8 },
+    { .fniv = gen_uqsub_vec,
+      .fno = gen_helper_gvec_uqsub_h,
+      .opc = INDEX_op_ussub_vec,
+      .write_aofs = true,
+      .vece = MO_16 },
+    { .fniv = gen_uqsub_vec,
+      .fno = gen_helper_gvec_uqsub_s,
+      .opc = INDEX_op_ussub_vec,
+      .write_aofs = true,
+      .vece = MO_32 },
+    { .fniv = gen_uqsub_vec,
+      .fno = gen_helper_gvec_uqsub_d,
+      .opc = INDEX_op_ussub_vec,
+      .write_aofs = true,
+      .vece = MO_64 },
+};
+
+static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                          TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec x = tcg_temp_new_vec_matching(t);
+    tcg_gen_sub_vec(vece, x, a, b);
+    tcg_gen_sssub_vec(vece, t, a, b);
+    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+    tcg_gen_or_vec(vece, sat, sat, x);
+    tcg_temp_free_vec(x);
+}
+
+const GVecGen4 sqsub_op[4] = {
+    { .fniv = gen_sqsub_vec,
+      .fno = gen_helper_gvec_sqsub_b,
+      .opc = INDEX_op_sssub_vec,
+      .write_aofs = true,
+      .vece = MO_8 },
+    { .fniv = gen_sqsub_vec,
+      .fno = gen_helper_gvec_sqsub_h,
+      .opc = INDEX_op_sssub_vec,
+      .write_aofs = true,
+      .vece = MO_16 },
+    { .fniv = gen_sqsub_vec,
+      .fno = gen_helper_gvec_sqsub_s,
+      .opc = INDEX_op_sssub_vec,
+      .write_aofs = true,
+      .vece = MO_32 },
+    { .fniv = gen_sqsub_vec,
+      .fno = gen_helper_gvec_sqsub_d,
+      .opc = INDEX_op_sssub_vec,
+      .write_aofs = true,
+      .vece = MO_64 },
+};
+
  /* Translate a NEON data processing instruction.  Return nonzero if the
     instruction is invalid.
     We process data in a mixture of 32-bit and 64-bit chunks.
@@ -6331,6 +6561,18 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
              }
              return 0;
  
+        case NEON_3R_VQADD:
+            tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+                           rn_ofs, rm_ofs, vec_size, vec_size,
+                           (u ? uqadd_op : sqadd_op) + size);
+            break;
+
+        case NEON_3R_VQSUB:
+            tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+                           rn_ofs, rm_ofs, vec_size, vec_size,
+                           (u ? uqsub_op : sqsub_op) + size);
+            break;
+
          case NEON_3R_VMUL: /* VMUL */
              if (u) {
                  /* Polynomial case allows only P8 and is handled below.  */
@@ -6395,24 +6637,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                  neon_load_reg64(cpu_V0, rn + pass);
                  neon_load_reg64(cpu_V1, rm + pass);
                  switch (op) {
-                case NEON_3R_VQADD:
-                    if (u) {
-                        gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
-                                                 cpu_V0, cpu_V1);
-                    } else {
-                        gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
-                                                 cpu_V0, cpu_V1);
-                    }
-                    break;
-                case NEON_3R_VQSUB:
-                    if (u) {
-                        gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
-                                                 cpu_V0, cpu_V1);
-                    } else {
-                        gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
-                                                 cpu_V0, cpu_V1);
-                    }
-                    break;
                  case NEON_3R_VSHL:
                      if (u) {
                          gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
@@ -6528,18 +6752,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
          case NEON_3R_VHADD:
              GEN_NEON_INTEGER_OP(hadd);
              break;
-        case NEON_3R_VQADD:
-            GEN_NEON_INTEGER_OP_ENV(qadd);
-            break;
          case NEON_3R_VRHADD:
              GEN_NEON_INTEGER_OP(rhadd);
              break;
          case NEON_3R_VHSUB:
              GEN_NEON_INTEGER_OP(hsub);
              break;
-        case NEON_3R_VQSUB:
-            GEN_NEON_INTEGER_OP_ENV(qsub);
-            break;
          case NEON_3R_VSHL:
              GEN_NEON_INTEGER_OP(shl);
              break;
@@ -7747,7 +7965,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                      TCGv_ptr fpst;
                      TCGv_i32 ahp;
  
-                    if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
+                    if (!dc_isar_feature(aa32_fp16_spconv, s) ||
                          q || (rm & 1)) {
                          return 1;
                      }
@@ -7779,7 +7997,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                  {
                      TCGv_ptr fpst;
                      TCGv_i32 ahp;
-                    if (!arm_dc_feature(s, ARM_FEATURE_VFP_FP16) ||
+                    if (!dc_isar_feature(aa32_fp16_spconv, s) ||
                          q || (rd & 1)) {
                          return 1;
                      }
@@ -8243,15 +8461,9 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
      gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
      int rd, rn, rm, opr_sz;
      int data = 0;
-    bool q;
-
-    q = extract32(insn, 6, 1);
-    VFP_DREG_D(rd, insn);
-    VFP_DREG_N(rn, insn);
-    VFP_DREG_M(rm, insn);
-    if ((rd | rn | rm) & q) {
-        return 1;
-    }
+    int off_rn, off_rm;
+    bool is_long = false, q = extract32(insn, 6, 1);
+    bool ptr_is_env = false;
  
      if ((insn & 0xfe200f10) == 0xfc200800) {
          /* VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... */
@@ -8278,10 +8490,39 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
              return 1;
          }
          fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
+    } else if ((insn & 0xff300f10) == 0xfc200810) {
+        /* VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... */
+        int is_s = extract32(insn, 23, 1);
+        if (!dc_isar_feature(aa32_fhm, s)) {
+            return 1;
+        }
+        is_long = true;
+        data = is_s; /* is_2 == 0 */
+        fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
+        ptr_is_env = true;
      } else {
          return 1;
      }
  
+    VFP_DREG_D(rd, insn);
+    if (rd & q) {
+        return 1;
+    }
+    if (q || !is_long) {
+        VFP_DREG_N(rn, insn);
+        VFP_DREG_M(rm, insn);
+        if ((rn | rm) & q & !is_long) {
+            return 1;
+        }
+        off_rn = vfp_reg_offset(1, rn);
+        off_rm = vfp_reg_offset(1, rm);
+    } else {
+        rn = VFP_SREG_N(insn);
+        rm = VFP_SREG_M(insn);
+        off_rn = vfp_reg_offset(0, rn);
+        off_rm = vfp_reg_offset(0, rm);
+    }
+
      if (s->fp_excp_el) {
          gen_exception_insn(s, 4, EXCP_UDEF,
                             syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
@@ -8293,16 +8534,19 @@ static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
  
      opr_sz = (1 + q) * 8;
      if (fn_gvec_ptr) {
-        TCGv_ptr fpst = get_fpstatus_ptr(1);
-        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
-                           vfp_reg_offset(1, rn),
-                           vfp_reg_offset(1, rm), fpst,
+        TCGv_ptr ptr;
+        if (ptr_is_env) {
+            ptr = cpu_env;
+        } else {
+            ptr = get_fpstatus_ptr(1);
+        }
+        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
                             opr_sz, opr_sz, data, fn_gvec_ptr);
-        tcg_temp_free_ptr(fpst);
+        if (!ptr_is_env) {
+            tcg_temp_free_ptr(ptr);
+        }
      } else {
-        tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
-                           vfp_reg_offset(1, rn),
-                           vfp_reg_offset(1, rm),
+        tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
                             opr_sz, opr_sz, data, fn_gvec);
      }
      return 0;
@@ -8321,14 +8565,9 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
      gen_helper_gvec_3 *fn_gvec = NULL;
      gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
      int rd, rn, rm, opr_sz, data;
-    bool q;
-
-    q = extract32(insn, 6, 1);
-    VFP_DREG_D(rd, insn);
-    VFP_DREG_N(rn, insn);
-    if ((rd | rn) & q) {
-        return 1;
-    }
+    int off_rn, off_rm;
+    bool is_long = false, q = extract32(insn, 6, 1);
+    bool ptr_is_env = false;
  
      if ((insn & 0xff000f10) == 0xfe000800) {
          /* VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... */
@@ -8357,6 +8596,7 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
      } else if ((insn & 0xffb00f00) == 0xfe200d00) {
          /* V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... */
          int u = extract32(insn, 4, 1);
+
          if (!dc_isar_feature(aa32_dp, s)) {
              return 1;
          }
@@ -8364,10 +8604,48 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
          /* rm is just Vm, and index is M.  */
          data = extract32(insn, 5, 1); /* index */
          rm = extract32(insn, 0, 4);
+    } else if ((insn & 0xffa00f10) == 0xfe000810) {
+        /* VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... */
+        int is_s = extract32(insn, 20, 1);
+        int vm20 = extract32(insn, 0, 3);
+        int vm3 = extract32(insn, 3, 1);
+        int m = extract32(insn, 5, 1);
+        int index;
+
+        if (!dc_isar_feature(aa32_fhm, s)) {
+            return 1;
+        }
+        if (q) {
+            rm = vm20;
+            index = m * 2 + vm3;
+        } else {
+            rm = vm20 * 2 + m;
+            index = vm3;
+        }
+        is_long = true;
+        data = (index << 2) | is_s; /* is_2 == 0 */
+        fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
+        ptr_is_env = true;
      } else {
          return 1;
      }
  
+    VFP_DREG_D(rd, insn);
+    if (rd & q) {
+        return 1;
+    }
+    if (q || !is_long) {
+        VFP_DREG_N(rn, insn);
+        if (rn & q & !is_long) {
+            return 1;
+        }
+        off_rn = vfp_reg_offset(1, rn);
+        off_rm = vfp_reg_offset(1, rm);
+    } else {
+        rn = VFP_SREG_N(insn);
+        off_rn = vfp_reg_offset(0, rn);
+        off_rm = vfp_reg_offset(0, rm);
+    }
      if (s->fp_excp_el) {
          gen_exception_insn(s, 4, EXCP_UDEF,
                             syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
@@ -8379,16 +8657,19 @@ static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
  
      opr_sz = (1 + q) * 8;
      if (fn_gvec_ptr) {
-        TCGv_ptr fpst = get_fpstatus_ptr(1);
-        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd),
-                           vfp_reg_offset(1, rn),
-                           vfp_reg_offset(1, rm), fpst,
+        TCGv_ptr ptr;
+        if (ptr_is_env) {
+            ptr = cpu_env;
+        } else {
+            ptr = get_fpstatus_ptr(1);
+        }
+        tcg_gen_gvec_3_ptr(vfp_reg_offset(1, rd), off_rn, off_rm, ptr,
                             opr_sz, opr_sz, data, fn_gvec_ptr);
-        tcg_temp_free_ptr(fpst);
+        if (!ptr_is_env) {
+            tcg_temp_free_ptr(ptr);
+        }
      } else {
-        tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd),
-                           vfp_reg_offset(1, rn),
-                           vfp_reg_offset(1, rm),
+        tcg_gen_gvec_3_ool(vfp_reg_offset(1, rd), off_rn, off_rm,
                             opr_sz, opr_sz, data, fn_gvec);
      }
      return 0;
@@ -9079,6 +9360,17 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
                   */
                  gen_goto_tb(s, 0, s->pc & ~1);
                  return;
+            case 7: /* sb */
+                if ((insn & 0xf) || !dc_isar_feature(aa32_sb, s)) {
+                    goto illegal_op;
+                }
+                /*
+                 * TODO: There is no speculation barrier opcode
+                 * for TCG; MB and end the TB instead.
+                 */
+                tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+                gen_goto_tb(s, 0, s->pc & ~1);
+                return;
              default:
                  goto illegal_op;
              }
@@ -10409,7 +10701,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn)
                              } else if (i == rn) {
                                  loaded_var = tmp;
                                  loaded_base = 1;
-                            } else if (rn == 15 && exc_return) {
+                            } else if (i == 15 && exc_return) {
                                  store_pc_exc_ret(s, tmp);
                              } else {
                                  store_reg_from_load(s, i, tmp);
@@ -11503,10 +11795,19 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
      case 6: case 7: case 14: case 15:
          /* Coprocessor.  */
          if (arm_dc_feature(s, ARM_FEATURE_M)) {
-            /* We don't currently implement M profile FP support,
-             * so this entire space should give a NOCP fault, with
-             * the exception of the v8M VLLDM and VLSTM insns, which
-             * must be NOPs in Secure state and UNDEF in Nonsecure state.
+            /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
+            if (extract32(insn, 24, 2) == 3) {
+                goto illegal_op; /* op0 = 0b11 : unallocated */
+            }
+
+            /*
+             * Decode VLLDM and VLSTM first: these are nonstandard because:
+             *  * if there is no FPU then these insns must NOP in
+             *    Secure state and UNDEF in Nonsecure state
+             *  * if there is an FPU then these insns do not have
+             *    the usual behaviour that disas_vfp_insn() provides of
+             *    being controlled by CPACR/NSACR enable bits or the
+             *    lazy-stacking logic.
               */
              if (arm_dc_feature(s, ARM_FEATURE_V8) &&
                  (insn & 0xffa00f00) == 0xec200a00) {
@@ -11520,6 +11821,15 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
                  /* Just NOP since FP support is not implemented */
                  break;
              }
+            if (arm_dc_feature(s, ARM_FEATURE_VFP) &&
+                ((insn >> 8) & 0xe) == 10) {
+                /* FP, and the CPU supports it */
+                if (disas_vfp_insn(s, insn)) {
+                    goto illegal_op;
+                }
+                break;
+            }
+
              /* All other insns: NOCP */
              gen_exception_insn(s, 4, EXCP_NOCP, syn_uncategorized(),
                                 default_exception_el(s));
@@ -11697,6 +12007,17 @@ static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
                               */
                              gen_goto_tb(s, 0, s->pc & ~1);
                              break;
+                        case 7: /* sb */
+                            if ((insn & 0xf) || !dc_isar_feature(aa32_sb, s)) {
+                                goto illegal_op;
+                            }
+                            /*
+                             * TODO: There is no speculation barrier opcode
+                             * for TCG; MB and end the TB instead.
+                             */
+                            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+                            gen_goto_tb(s, 0, s->pc & ~1);
+                            break;
                          default:
                              goto illegal_op;
                          }
@@ -13076,12 +13397,21 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
      dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
      dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
      dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
-    dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
-    dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
+    if (arm_feature(env, ARM_FEATURE_XSCALE)) {
+        dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
+        dc->vec_stride = 0;
+    } else {
+        dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
+        dc->c15_cpar = 0;
+    }
      dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_A32, HANDLER);
      dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
          regime_is_secure(env, dc->mmu_idx);
      dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_A32, STACKCHECK);
+    dc->v8m_fpccr_s_wrong = FIELD_EX32(tb_flags, TBFLAG_A32, FPCCR_S_WRONG);
+    dc->v7m_new_fp_ctxt_needed =
+        FIELD_EX32(tb_flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED);
+    dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_A32, LSPACT);
      dc->cp_regs = cpu->cp_regs;
      dc->features = env->features;
  
@@ -13541,7 +13871,7 @@ static const TranslatorOps thumb_translator_ops = {
  };
  
  /* generate intermediate code for basic block 'tb'.  */
-void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb)
+void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
  {
      DisasContext dc;
      const TranslatorOps *ops = &arm_translator_ops;
@@ -13555,27 +13885,26 @@ void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb)
      }
  #endif
  
-    translator_loop(ops, &dc.base, cpu, tb);
+    translator_loop(ops, &dc.base, cpu, tb, max_insns);
  }
  
-void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
-                        int flags)
+void arm_cpu_dump_state(CPUState *cs, FILE *f, int flags)
  {
      ARMCPU *cpu = ARM_CPU(cs);
      CPUARMState *env = &cpu->env;
      int i;
  
      if (is_a64(env)) {
-        aarch64_cpu_dump_state(cs, f, cpu_fprintf, flags);
+        aarch64_cpu_dump_state(cs, f, flags);
          return;
      }
  
      for(i=0;i<16;i++) {
-        cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
+        qemu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
          if ((i % 4) == 3)
-            cpu_fprintf(f, "\n");
+            qemu_fprintf(f, "\n");
          else
-            cpu_fprintf(f, " ");
+            qemu_fprintf(f, " ");
      }
  
      if (arm_feature(env, ARM_FEATURE_M)) {
@@ -13597,15 +13926,15 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
              }
          }
  
-        cpu_fprintf(f, "XPSR=%08x %c%c%c%c %c %s%s\n",
-                    xpsr,
-                    xpsr & XPSR_N ? 'N' : '-',
-                    xpsr & XPSR_Z ? 'Z' : '-',
-                    xpsr & XPSR_C ? 'C' : '-',
-                    xpsr & XPSR_V ? 'V' : '-',
-                    xpsr & XPSR_T ? 'T' : 'A',
-                    ns_status,
-                    mode);
+        qemu_fprintf(f, "XPSR=%08x %c%c%c%c %c %s%s\n",
+                     xpsr,
+                     xpsr & XPSR_N ? 'N' : '-',
+                     xpsr & XPSR_Z ? 'Z' : '-',
+                     xpsr & XPSR_C ? 'C' : '-',
+                     xpsr & XPSR_V ? 'V' : '-',
+                     xpsr & XPSR_T ? 'T' : 'A',
+                     ns_status,
+                     mode);
      } else {
          uint32_t psr = cpsr_read(env);
          const char *ns_status = "";
@@ -13615,15 +13944,15 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
              ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
          }
  
-        cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%s%d\n",
-                    psr,
-                    psr & CPSR_N ? 'N' : '-',
-                    psr & CPSR_Z ? 'Z' : '-',
-                    psr & CPSR_C ? 'C' : '-',
-                    psr & CPSR_V ? 'V' : '-',
-                    psr & CPSR_T ? 'T' : 'A',
-                    ns_status,
-                    aarch32_mode_name(psr), (psr & 0x10) ? 32 : 26);
+        qemu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%s%d\n",
+                     psr,
+                     psr & CPSR_N ? 'N' : '-',
+                     psr & CPSR_Z ? 'Z' : '-',
+                     psr & CPSR_C ? 'C' : '-',
+                     psr & CPSR_V ? 'V' : '-',
+                     psr & CPSR_T ? 'T' : 'A',
+                     ns_status,
+                     aarch32_mode_name(psr), (psr & 0x10) ? 32 : 26);
      }
  
      if (flags & CPU_DUMP_FPU) {
@@ -13636,12 +13965,12 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
          }
          for (i = 0; i < numvfpregs; i++) {
              uint64_t v = *aa32_vfp_dreg(env, i);
-            cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
-                        i * 2, (uint32_t)v,
-                        i * 2 + 1, (uint32_t)(v >> 32),
-                        i, v);
+            qemu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
+                         i * 2, (uint32_t)v,
+                         i * 2 + 1, (uint32_t)(v >> 32),
+                         i, v);
          }
-        cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
+        qemu_fprintf(f, "FPSCR: %08x\n", vfp_get_fpscr(env));
      }
  }