target/arm: Add copyright boilerplate

[qemu.git] / target / arm / helper.c
diff --git a/target/arm/helper.c b/target/arm/helper.c

index 55e9b77bb10e46690d87704273e79592c301138a..d3f3cb57d5fe9fe85496075162bc3ffd5136d9d9 100644 (file)
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -1,4 +1,12 @@
+/*
+ * ARM generic helpers.
+ *
+ * This code is licensed under the GNU GPL v2 or later.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
  #include "qemu/osdep.h"
+#include "qemu/units.h"
  #include "target/arm/idau.h"
  #include "trace.h"
  #include "cpu.h"
@@ -10,15 +18,19 @@
  #include "sysemu/sysemu.h"
  #include "qemu/bitops.h"
  #include "qemu/crc32c.h"
+#include "qemu/qemu-print.h"
  #include "exec/exec-all.h"
  #include "exec/cpu_ldst.h"
  #include "arm_ldst.h"
  #include <zlib.h> /* For crc32 */
-#include "exec/semihost.h"
+#include "hw/semihosting/semihost.h"
  #include "sysemu/cpus.h"
  #include "sysemu/kvm.h"
  #include "fpu/softfloat.h"
  #include "qemu/range.h"
+#include "qapi/qapi-commands-target.h"
+#include "qapi/error.h"
+#include "qemu/guest-random.h"
  
  #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
  
@@ -222,7 +234,7 @@ static void write_raw_cp_reg(CPUARMState *env, const ARMCPRegInfo *ri,
  
  static int arm_gdb_get_sysreg(CPUARMState *env, uint8_t *buf, int reg)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      const ARMCPRegInfo *ri;
      uint32_t key;
  
@@ -543,7 +555,7 @@ static CPAccessResult access_tpm(CPUARMState *env, const ARMCPRegInfo *ri,
  
  static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      raw_write(env, ri, value);
      tlb_flush(CPU(cpu)); /* Flush TLB as domain not tracked in TLB */
@@ -551,7 +563,7 @@ static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
  
  static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      if (raw_read(env, ri) != value) {
          /* Unlike real hardware the qemu TLB uses virtual addresses,
@@ -565,7 +577,7 @@ static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
  static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                               uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_PMSA)
          && !extended_addresses_enabled(env)) {
@@ -582,7 +594,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                               uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
  
      tlb_flush_all_cpus_synced(cs);
  }
@@ -590,7 +602,7 @@ static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                               uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
  
      tlb_flush_all_cpus_synced(cs);
  }
@@ -598,7 +610,7 @@ static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                               uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
  
      tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
  }
@@ -606,7 +618,7 @@ static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                               uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
  
      tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
  }
@@ -626,7 +638,7 @@ static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
                            uint64_t value)
  {
      /* Invalidate all (TLBIALL) */
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      if (tlb_force_broadcast(env)) {
          tlbiall_is_write(env, NULL, value);
@@ -640,7 +652,7 @@ static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
                            uint64_t value)
  {
      /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      if (tlb_force_broadcast(env)) {
          tlbimva_is_write(env, NULL, value);
@@ -654,7 +666,7 @@ static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
  {
      /* Invalidate by ASID (TLBIASID) */
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      if (tlb_force_broadcast(env)) {
          tlbiasid_is_write(env, NULL, value);
@@ -668,7 +680,7 @@ static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
  {
      /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      if (tlb_force_broadcast(env)) {
          tlbimvaa_is_write(env, NULL, value);
@@ -681,7 +693,7 @@ static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                 uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
  
      tlb_flush_by_mmuidx(cs,
                          ARMMMUIdxBit_S12NSE1 |
@@ -692,7 +704,7 @@ static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbiall_nsnh_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                    uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
  
      tlb_flush_by_mmuidx_all_cpus_synced(cs,
                                          ARMMMUIdxBit_S12NSE1 |
@@ -709,7 +721,7 @@ static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
       * translation information.
       * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
       */
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
      uint64_t pageaddr;
  
      if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
@@ -724,7 +736,7 @@ static void tlbiipas2_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                 uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
      uint64_t pageaddr;
  
      if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
@@ -740,7 +752,7 @@ static void tlbiipas2_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
  
      tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_S1E2);
  }
@@ -748,7 +760,7 @@ static void tlbiall_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                   uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
  
      tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E2);
  }
@@ -756,7 +768,7 @@ static void tlbiall_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
      uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
  
      tlb_flush_page_by_mmuidx(cs, pageaddr, ARMMMUIdxBit_S1E2);
@@ -765,7 +777,7 @@ static void tlbimva_hyp_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbimva_hyp_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                   uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
      uint64_t pageaddr = value & ~MAKE_64BIT_MASK(0, 12);
  
      tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
@@ -925,9 +937,36 @@ static void cpacr_write(CPUARMState *env, const ARMCPRegInfo *ri,
          }
          value &= mask;
      }
+
+    /*
+     * For A-profile AArch32 EL3 (but not M-profile secure mode), if NSACR.CP10
+     * is 0 then CPACR.{CP11,CP10} ignore writes and read as 0b00.
+     */
+    if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
+        !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
+        value &= ~(0xf << 20);
+        value |= env->cp15.cpacr_el1 & (0xf << 20);
+    }
+
      env->cp15.cpacr_el1 = value;
  }
  
+static uint64_t cpacr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /*
+     * For A-profile AArch32 EL3 (but not M-profile secure mode), if NSACR.CP10
+     * is 0 then CPACR.{CP11,CP10} ignore writes and read as 0b00.
+     */
+    uint64_t value = env->cp15.cpacr_el1;
+
+    if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
+        !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
+        value &= ~(0xf << 20);
+    }
+    return value;
+}
+
+
  static void cpacr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
  {
      /* Call cpacr_write() so that we reset with the correct RAO bits set
@@ -993,7 +1032,7 @@ static const ARMCPRegInfo v6_cp_reginfo[] = {
      { .name = "CPACR", .state = ARM_CP_STATE_BOTH, .opc0 = 3,
        .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 2, .accessfn = cpacr_access,
        .access = PL1_RW, .fieldoffset = offsetof(CPUARMState, cp15.cpacr_el1),
-      .resetfn = cpacr_reset, .writefn = cpacr_write },
+      .resetfn = cpacr_reset, .writefn = cpacr_write, .readfn = cpacr_read },
      REGINFO_SENTINEL
  };
  
@@ -1166,7 +1205,7 @@ void pmu_init(ARMCPU *cpu)
  
          if (cnt->supported(&cpu->env)) {
              supported_event_map[cnt->number] = i;
-            uint64_t event_mask = 1 << (cnt->number & 0x1f);
+            uint64_t event_mask = 1ULL << (cnt->number & 0x1f);
              if (cnt->number & 0x20) {
                  cpu->pmceid1 |= event_mask;
              } else {
@@ -1281,6 +1320,10 @@ static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter)
      int el = arm_current_el(env);
      uint8_t hpmn = env->cp15.mdcr_el2 & MDCR_HPMN;
  
+    if (!arm_feature(env, ARM_FEATURE_PMU)) {
+        return false;
+    }
+
      if (!arm_feature(env, ARM_FEATURE_EL2) ||
              (counter < hpmn || counter == 31)) {
          e = env->cp15.c9_pmcr & PMCRE;
@@ -1344,7 +1387,7 @@ static bool pmu_counter_enabled(CPUARMState *env, uint8_t counter)
  
  static void pmu_update_irq(CPUARMState *env)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      qemu_set_irq(cpu->pmu_interrupt, (env->cp15.c9_pmcr & PMCRE) &&
              (env->cp15.c9_pminten & env->cp15.c9_pmovsr));
  }
@@ -1355,7 +1398,7 @@ static void pmu_update_irq(CPUARMState *env)
   * etc. can be done logically. This is essentially a no-op if the counter is
   * not enabled at the time of the call.
   */
-void pmccntr_op_start(CPUARMState *env)
+static void pmccntr_op_start(CPUARMState *env)
  {
      uint64_t cycles = cycles_get_count(env);
  
@@ -1385,7 +1428,7 @@ void pmccntr_op_start(CPUARMState *env)
   * guest-visible count. A call to pmccntr_op_finish should follow every call to
   * pmccntr_op_start.
   */
-void pmccntr_op_finish(CPUARMState *env)
+static void pmccntr_op_finish(CPUARMState *env)
  {
      if (pmu_counter_enabled(env, 31)) {
  #ifndef CONFIG_USER_ONLY
@@ -1399,7 +1442,7 @@ void pmccntr_op_finish(CPUARMState *env)
          if (overflow_in > 0) {
              int64_t overflow_at = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
                  overflow_in;
-            ARMCPU *cpu = arm_env_get_cpu(env);
+            ARMCPU *cpu = env_archcpu(env);
              timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at);
          }
  #endif
@@ -1448,7 +1491,7 @@ static void pmevcntr_op_finish(CPUARMState *env, uint8_t counter)
          if (overflow_in > 0) {
              int64_t overflow_at = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
                  overflow_in;
-            ARMCPU *cpu = arm_env_get_cpu(env);
+            ARMCPU *cpu = env_archcpu(env);
              timer_mod_anticipate_ns(cpu->pmu_timer, overflow_at);
          }
  #endif
@@ -1856,7 +1899,7 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
  {
      /* Begin with base v8.0 state.  */
      uint32_t valid_mask = 0x3fff;
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      if (arm_el_is_aa64(env, 3)) {
          value |= SCR_FW | SCR_AW;   /* these two bits are RES1.  */
@@ -1893,7 +1936,7 @@ static void scr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
  
  static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      /* Acquire the CSSELR index from the bank corresponding to the CCSIDR
       * bank
@@ -1912,7 +1955,7 @@ static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
  
  static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
      uint64_t hcr_el2 = arm_hcr_el2_eff(env);
      uint64_t ret = 0;
  
@@ -2443,7 +2486,7 @@ static void gt_recalc_timer(ARMCPU *cpu, int timeridx)
  static void gt_timer_reset(CPUARMState *env, const ARMCPRegInfo *ri,
                             int timeridx)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      timer_del(cpu->gt_timer[timeridx]);
  }
@@ -2464,7 +2507,7 @@ static void gt_cval_write(CPUARMState *env, const ARMCPRegInfo *ri,
  {
      trace_arm_gt_cval_write(timeridx, value);
      env->cp15.c14_timer[timeridx].cval = value;
-    gt_recalc_timer(arm_env_get_cpu(env), timeridx);
+    gt_recalc_timer(env_archcpu(env), timeridx);
  }
  
  static uint64_t gt_tval_read(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2485,14 +2528,14 @@ static void gt_tval_write(CPUARMState *env, const ARMCPRegInfo *ri,
      trace_arm_gt_tval_write(timeridx, value);
      env->cp15.c14_timer[timeridx].cval = gt_get_countervalue(env) - offset +
                                           sextract64(value, 0, 32);
-    gt_recalc_timer(arm_env_get_cpu(env), timeridx);
+    gt_recalc_timer(env_archcpu(env), timeridx);
  }
  
  static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
                           int timeridx,
                           uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      uint32_t oldval = env->cp15.c14_timer[timeridx].ctl;
  
      trace_arm_gt_ctl_write(timeridx, value);
@@ -2570,7 +2613,7 @@ static void gt_virt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void gt_cntvoff_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      trace_arm_gt_cntvoff_write(value);
      raw_write(env, ri, value);
@@ -2687,7 +2730,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
      /* per-timer control */
      { .name = "CNTP_CTL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 1,
        .secure = ARM_CP_SECSTATE_NS,
-      .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL1_RW | PL0_R,
+      .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL0_RW,
        .accessfn = gt_ptimer_access,
        .fieldoffset = offsetoflow32(CPUARMState,
                                     cp15.c14_timer[GTIMER_PHYS].ctl),
@@ -2696,7 +2739,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
      { .name = "CNTP_CTL_S",
        .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 1,
        .secure = ARM_CP_SECSTATE_S,
-      .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL1_RW | PL0_R,
+      .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL0_RW,
        .accessfn = gt_ptimer_access,
        .fieldoffset = offsetoflow32(CPUARMState,
                                     cp15.c14_timer[GTIMER_SEC].ctl),
@@ -2704,14 +2747,14 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
      },
      { .name = "CNTP_CTL_EL0", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 1,
-      .type = ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .type = ARM_CP_IO, .access = PL0_RW,
        .accessfn = gt_ptimer_access,
        .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].ctl),
        .resetvalue = 0,
        .writefn = gt_phys_ctl_write, .raw_writefn = raw_write,
      },
      { .name = "CNTV_CTL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 1,
-      .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL1_RW | PL0_R,
+      .type = ARM_CP_IO | ARM_CP_ALIAS, .access = PL0_RW,
        .accessfn = gt_vtimer_access,
        .fieldoffset = offsetoflow32(CPUARMState,
                                     cp15.c14_timer[GTIMER_VIRT].ctl),
@@ -2719,7 +2762,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
      },
      { .name = "CNTV_CTL_EL0", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 1,
-      .type = ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .type = ARM_CP_IO, .access = PL0_RW,
        .accessfn = gt_vtimer_access,
        .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].ctl),
        .resetvalue = 0,
@@ -2728,31 +2771,31 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
      /* TimerValue views: a 32 bit downcounting view of the underlying state */
      { .name = "CNTP_TVAL", .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 0,
        .secure = ARM_CP_SECSTATE_NS,
-      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL0_RW,
        .accessfn = gt_ptimer_access,
        .readfn = gt_phys_tval_read, .writefn = gt_phys_tval_write,
      },
      { .name = "CNTP_TVAL_S",
        .cp = 15, .crn = 14, .crm = 2, .opc1 = 0, .opc2 = 0,
        .secure = ARM_CP_SECSTATE_S,
-      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL0_RW,
        .accessfn = gt_ptimer_access,
        .readfn = gt_sec_tval_read, .writefn = gt_sec_tval_write,
      },
      { .name = "CNTP_TVAL_EL0", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 0,
-      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL0_RW,
        .accessfn = gt_ptimer_access, .resetfn = gt_phys_timer_reset,
        .readfn = gt_phys_tval_read, .writefn = gt_phys_tval_write,
      },
      { .name = "CNTV_TVAL", .cp = 15, .crn = 14, .crm = 3, .opc1 = 0, .opc2 = 0,
-      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL0_RW,
        .accessfn = gt_vtimer_access,
        .readfn = gt_virt_tval_read, .writefn = gt_virt_tval_write,
      },
      { .name = "CNTV_TVAL_EL0", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 0,
-      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL1_RW | PL0_R,
+      .type = ARM_CP_NO_RAW | ARM_CP_IO, .access = PL0_RW,
        .accessfn = gt_vtimer_access, .resetfn = gt_virt_timer_reset,
        .readfn = gt_virt_tval_read, .writefn = gt_virt_tval_write,
      },
@@ -2780,7 +2823,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
      /* Comparison value, indicating when the timer goes off */
      { .name = "CNTP_CVAL", .cp = 15, .crm = 14, .opc1 = 2,
        .secure = ARM_CP_SECSTATE_NS,
-      .access = PL1_RW | PL0_R,
+      .access = PL0_RW,
        .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_ALIAS,
        .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
        .accessfn = gt_ptimer_access,
@@ -2788,7 +2831,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
      },
      { .name = "CNTP_CVAL_S", .cp = 15, .crm = 14, .opc1 = 2,
        .secure = ARM_CP_SECSTATE_S,
-      .access = PL1_RW | PL0_R,
+      .access = PL0_RW,
        .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_ALIAS,
        .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_SEC].cval),
        .accessfn = gt_ptimer_access,
@@ -2796,14 +2839,14 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
      },
      { .name = "CNTP_CVAL_EL0", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 2, .opc2 = 2,
-      .access = PL1_RW | PL0_R,
+      .access = PL0_RW,
        .type = ARM_CP_IO,
        .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_PHYS].cval),
        .resetvalue = 0, .accessfn = gt_ptimer_access,
        .writefn = gt_phys_cval_write, .raw_writefn = raw_write,
      },
      { .name = "CNTV_CVAL", .cp = 15, .crm = 14, .opc1 = 3,
-      .access = PL1_RW | PL0_R,
+      .access = PL0_RW,
        .type = ARM_CP_64BIT | ARM_CP_IO | ARM_CP_ALIAS,
        .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
        .accessfn = gt_vtimer_access,
@@ -2811,7 +2854,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
      },
      { .name = "CNTV_CVAL_EL0", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 3, .opc2 = 2,
-      .access = PL1_RW | PL0_R,
+      .access = PL0_RW,
        .type = ARM_CP_IO,
        .fieldoffset = offsetof(CPUARMState, cp15.c14_timer[GTIMER_VIRT].cval),
        .resetvalue = 0, .accessfn = gt_vtimer_access,
@@ -3203,7 +3246,7 @@ static uint64_t pmsav7_read(CPUARMState *env, const ARMCPRegInfo *ri)
  static void pmsav7_write(CPUARMState *env, const ARMCPRegInfo *ri,
                           uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      uint32_t *u32p = *(uint32_t **)raw_ptr(env, ri);
  
      if (!u32p) {
@@ -3218,7 +3261,7 @@ static void pmsav7_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void pmsav7_rgnr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      uint32_t nrgs = cpu->pmsav7_dregion;
  
      if (value >= nrgs) {
@@ -3346,7 +3389,7 @@ static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                               uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      TCR *tcr = raw_ptr(env, ri);
  
      if (arm_feature(env, ARM_FEATURE_LPAE)) {
@@ -3375,7 +3418,7 @@ static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
  static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                 uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      TCR *tcr = raw_ptr(env, ri);
  
      /* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */
@@ -3389,7 +3432,7 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
      /* If the ASID changes (with a 64-bit write), we must flush the TLB.  */
      if (cpreg_field_is_64bit(ri) &&
          extract64(raw_read(env, ri) ^ value, 48, 16) != 0) {
-        ARMCPU *cpu = arm_env_get_cpu(env);
+        ARMCPU *cpu = env_archcpu(env);
          tlb_flush(CPU(cpu));
      }
      raw_write(env, ri, value);
@@ -3398,7 +3441,7 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                          uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      CPUState *cs = CPU(cpu);
  
      /* Accesses to VTTBR may change the VMID so we must flush the TLB.  */
@@ -3488,7 +3531,7 @@ static void omap_wfi_write(CPUARMState *env, const ARMCPRegInfo *ri,
                             uint64_t value)
  {
      /* Wait-for-interrupt (deprecated) */
-    cpu_interrupt(CPU(arm_env_get_cpu(env)), CPU_INTERRUPT_HALT);
+    cpu_interrupt(env_cpu(env), CPU_INTERRUPT_HALT);
  }
  
  static void omap_cachemaint_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -3641,7 +3684,7 @@ static const ARMCPRegInfo strongarm_cp_reginfo[] = {
  
  static uint64_t midr_read(CPUARMState *env, const ARMCPRegInfo *ri)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      unsigned int cur_el = arm_current_el(env);
      bool secure = arm_is_secure(env);
  
@@ -3653,7 +3696,7 @@ static uint64_t midr_read(CPUARMState *env, const ARMCPRegInfo *ri)
  
  static uint64_t mpidr_read_val(CPUARMState *env)
  {
-    ARMCPU *cpu = ARM_CPU(arm_env_get_cpu(env));
+    ARMCPU *cpu = env_archcpu(env);
      uint64_t mpidr = cpu->mp_affinity;
  
      if (arm_feature(env, ARM_FEATURE_V7MP)) {
@@ -3764,7 +3807,7 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env,
  static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                        uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
      bool sec = arm_is_secure_below_el3(env);
  
      if (sec) {
@@ -3781,7 +3824,7 @@ static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                      uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
  
      if (tlb_force_broadcast(env)) {
          tlbi_aa64_vmalle1is_write(env, NULL, value);
@@ -3806,7 +3849,7 @@ static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
       * stage 2 translations, whereas most other scopes only invalidate
       * stage 1 translations.
       */
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      CPUState *cs = CPU(cpu);
  
      if (arm_is_secure_below_el3(env)) {
@@ -3830,7 +3873,7 @@ static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbi_aa64_alle2_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                    uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      CPUState *cs = CPU(cpu);
  
      tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_S1E2);
@@ -3839,7 +3882,7 @@ static void tlbi_aa64_alle2_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbi_aa64_alle3_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                    uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      CPUState *cs = CPU(cpu);
  
      tlb_flush_by_mmuidx(cs, ARMMMUIdxBit_S1E3);
@@ -3852,7 +3895,7 @@ static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
       * stage 2 translations, whereas most other scopes only invalidate
       * stage 1 translations.
       */
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
      bool sec = arm_is_secure_below_el3(env);
      bool has_el2 = arm_feature(env, ARM_FEATURE_EL2);
  
@@ -3875,7 +3918,7 @@ static void tlbi_aa64_alle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbi_aa64_alle2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                      uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
  
      tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E2);
  }
@@ -3883,7 +3926,7 @@ static void tlbi_aa64_alle2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                      uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
  
      tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E3);
  }
@@ -3895,7 +3938,7 @@ static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri,
       * Currently handles both VAE2 and VALE2, since we don't support
       * flush-last-level-only.
       */
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      CPUState *cs = CPU(cpu);
      uint64_t pageaddr = sextract64(value << 12, 0, 56);
  
@@ -3909,7 +3952,7 @@ static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri,
       * Currently handles both VAE3 and VALE3, since we don't support
       * flush-last-level-only.
       */
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      CPUState *cs = CPU(cpu);
      uint64_t pageaddr = sextract64(value << 12, 0, 56);
  
@@ -3919,7 +3962,7 @@ static void tlbi_aa64_vae3_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                     uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      CPUState *cs = CPU(cpu);
      bool sec = arm_is_secure_below_el3(env);
      uint64_t pageaddr = sextract64(value << 12, 0, 56);
@@ -3943,7 +3986,7 @@ static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
       * since we don't support flush-for-specific-ASID-only or
       * flush-last-level-only.
       */
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      CPUState *cs = CPU(cpu);
      uint64_t pageaddr = sextract64(value << 12, 0, 56);
  
@@ -3966,7 +4009,7 @@ static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                     uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
      uint64_t pageaddr = sextract64(value << 12, 0, 56);
  
      tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
@@ -3976,7 +4019,7 @@ static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbi_aa64_vae3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                     uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
      uint64_t pageaddr = sextract64(value << 12, 0, 56);
  
      tlb_flush_page_by_mmuidx_all_cpus_synced(cs, pageaddr,
@@ -3992,7 +4035,7 @@ static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
       * translation information.
       * This must NOP if EL2 isn't implemented or SCR_EL3.NS is zero.
       */
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      CPUState *cs = CPU(cpu);
      uint64_t pageaddr;
  
@@ -4008,7 +4051,7 @@ static void tlbi_aa64_ipas2e1_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void tlbi_aa64_ipas2e1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                        uint64_t value)
  {
-    CPUState *cs = ENV_GET_CPU(env);
+    CPUState *cs = env_cpu(env);
      uint64_t pageaddr;
  
      if (!arm_feature(env, ARM_FEATURE_EL2) || !(env->cp15.scr_el3 & SCR_NS)) {
@@ -4035,7 +4078,7 @@ static CPAccessResult aa64_zva_access(CPUARMState *env, const ARMCPRegInfo *ri,
  
  static uint64_t aa64_dczid_read(CPUARMState *env, const ARMCPRegInfo *ri)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      int dzp_bit = 1 << 4;
  
      /* DZP indicates whether DC ZVA access is allowed */
@@ -4070,7 +4113,7 @@ static void spsel_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t val)
  static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                          uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      if (raw_read(env, ri) == value) {
          /* Skip the TLB flush if nothing actually changed; Linux likes
@@ -4562,7 +4605,7 @@ static const ARMCPRegInfo el3_no_el2_v8_cp_reginfo[] = {
  
  static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      uint64_t valid_mask = HCR_MASK;
  
      if (arm_feature(env, ARM_FEATURE_EL3)) {
@@ -4674,6 +4717,36 @@ uint64_t arm_hcr_el2_eff(CPUARMState *env)
      return ret;
  }
  
+static void cptr_el2_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                           uint64_t value)
+{
+    /*
+     * For A-profile AArch32 EL3, if NSACR.CP10
+     * is 0 then HCPTR.{TCP11,TCP10} ignore writes and read as 1.
+     */
+    if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
+        !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
+        value &= ~(0x3 << 10);
+        value |= env->cp15.cptr_el[2] & (0x3 << 10);
+    }
+    env->cp15.cptr_el[2] = value;
+}
+
+static uint64_t cptr_el2_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /*
+     * For A-profile AArch32 EL3, if NSACR.CP10
+     * is 0 then HCPTR.{TCP11,TCP10} ignore writes and read as 1.
+     */
+    uint64_t value = env->cp15.cptr_el[2];
+
+    if (arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
+        !arm_is_secure(env) && !extract32(env->cp15.nsacr, 10, 1)) {
+        value |= 0x3 << 10;
+    }
+    return value;
+}
+
  static const ARMCPRegInfo el2_cp_reginfo[] = {
      { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64,
        .type = ARM_CP_IO,
@@ -4721,7 +4794,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
      { .name = "CPTR_EL2", .state = ARM_CP_STATE_BOTH,
        .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 2,
        .access = PL2_RW, .accessfn = cptr_access, .resetvalue = 0,
-      .fieldoffset = offsetof(CPUARMState, cp15.cptr_el[2]) },
+      .fieldoffset = offsetof(CPUARMState, cp15.cptr_el[2]),
+      .readfn = cptr_el2_read, .writefn = cptr_el2_write },
      { .name = "MAIR_EL2", .state = ARM_CP_STATE_BOTH,
        .opc0 = 3, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 0,
        .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[2]),
@@ -5229,7 +5303,7 @@ int sve_exception_el(CPUARMState *env, int el)
   */
  uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      uint32_t zcr_len = cpu->sve_max_vq - 1;
  
      if (el <= 1) {
@@ -5397,7 +5471,7 @@ void hw_watchpoint_update_all(ARMCPU *cpu)
  static void dbgwvr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                           uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      int i = ri->crm;
  
      /* Bits [63:49] are hardwired to the value of bit [48]; that is, the
@@ -5413,7 +5487,7 @@ static void dbgwvr_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void dbgwcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                           uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      int i = ri->crm;
  
      raw_write(env, ri, value);
@@ -5515,7 +5589,7 @@ void hw_breakpoint_update_all(ARMCPU *cpu)
  static void dbgbvr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                           uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      int i = ri->crm;
  
      raw_write(env, ri, value);
@@ -5525,7 +5599,7 @@ static void dbgbvr_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void dbgbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                           uint64_t value)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      int i = ri->crm;
  
      /* BAS[3] is a read-only copy of BAS[2], and BAS[1] a read-only
@@ -5621,7 +5695,7 @@ static void define_debug_regs(ARMCPU *cpu)
   */
  static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      uint64_t pfr1 = cpu->id_pfr1;
  
      if (env->gicv3state) {
@@ -5632,7 +5706,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri)
  
  static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      uint64_t pfr0 = cpu->isar.id_aa64pfr0;
  
      if (env->gicv3state) {
@@ -5700,47 +5774,130 @@ static const ARMCPRegInfo pauth_reginfo[] = {
      { .name = "APDAKEYLO_EL1", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 0,
        .access = PL1_RW, .accessfn = access_pauth,
-      .fieldoffset = offsetof(CPUARMState, apda_key.lo) },
+      .fieldoffset = offsetof(CPUARMState, keys.apda.lo) },
      { .name = "APDAKEYHI_EL1", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 1,
        .access = PL1_RW, .accessfn = access_pauth,
-      .fieldoffset = offsetof(CPUARMState, apda_key.hi) },
+      .fieldoffset = offsetof(CPUARMState, keys.apda.hi) },
      { .name = "APDBKEYLO_EL1", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 2,
        .access = PL1_RW, .accessfn = access_pauth,
-      .fieldoffset = offsetof(CPUARMState, apdb_key.lo) },
+      .fieldoffset = offsetof(CPUARMState, keys.apdb.lo) },
      { .name = "APDBKEYHI_EL1", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 2, .opc2 = 3,
        .access = PL1_RW, .accessfn = access_pauth,
-      .fieldoffset = offsetof(CPUARMState, apdb_key.hi) },
+      .fieldoffset = offsetof(CPUARMState, keys.apdb.hi) },
      { .name = "APGAKEYLO_EL1", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 3, .opc2 = 0,
        .access = PL1_RW, .accessfn = access_pauth,
-      .fieldoffset = offsetof(CPUARMState, apga_key.lo) },
+      .fieldoffset = offsetof(CPUARMState, keys.apga.lo) },
      { .name = "APGAKEYHI_EL1", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 3, .opc2 = 1,
        .access = PL1_RW, .accessfn = access_pauth,
-      .fieldoffset = offsetof(CPUARMState, apga_key.hi) },
+      .fieldoffset = offsetof(CPUARMState, keys.apga.hi) },
      { .name = "APIAKEYLO_EL1", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 0,
        .access = PL1_RW, .accessfn = access_pauth,
-      .fieldoffset = offsetof(CPUARMState, apia_key.lo) },
+      .fieldoffset = offsetof(CPUARMState, keys.apia.lo) },
      { .name = "APIAKEYHI_EL1", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 1,
        .access = PL1_RW, .accessfn = access_pauth,
-      .fieldoffset = offsetof(CPUARMState, apia_key.hi) },
+      .fieldoffset = offsetof(CPUARMState, keys.apia.hi) },
      { .name = "APIBKEYLO_EL1", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 2,
        .access = PL1_RW, .accessfn = access_pauth,
-      .fieldoffset = offsetof(CPUARMState, apib_key.lo) },
+      .fieldoffset = offsetof(CPUARMState, keys.apib.lo) },
      { .name = "APIBKEYHI_EL1", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 0, .crn = 2, .crm = 1, .opc2 = 3,
        .access = PL1_RW, .accessfn = access_pauth,
-      .fieldoffset = offsetof(CPUARMState, apib_key.hi) },
+      .fieldoffset = offsetof(CPUARMState, keys.apib.hi) },
+    REGINFO_SENTINEL
+};
+
+static uint64_t rndr_readfn(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    Error *err = NULL;
+    uint64_t ret;
+
+    /* Success sets NZCV = 0000.  */
+    env->NF = env->CF = env->VF = 0, env->ZF = 1;
+
+    if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
+        /*
+         * ??? Failed, for unknown reasons in the crypto subsystem.
+         * The best we can do is log the reason and return the
+         * timed-out indication to the guest.  There is no reason
+         * we know to expect this failure to be transitory, so the
+         * guest may well hang retrying the operation.
+         */
+        qemu_log_mask(LOG_UNIMP, "%s: Crypto failure: %s",
+                      ri->name, error_get_pretty(err));
+        error_free(err);
+
+        env->ZF = 0; /* NZCF = 0100 */
+        return 0;
+    }
+    return ret;
+}
+
+/* We do not support re-seeding, so the two registers operate the same.  */
+static const ARMCPRegInfo rndr_reginfo[] = {
+    { .name = "RNDR", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END | ARM_CP_IO,
+      .opc0 = 3, .opc1 = 3, .crn = 2, .crm = 4, .opc2 = 0,
+      .access = PL0_R, .readfn = rndr_readfn },
+    { .name = "RNDRRS", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END | ARM_CP_IO,
+      .opc0 = 3, .opc1 = 3, .crn = 2, .crm = 4, .opc2 = 1,
+      .access = PL0_R, .readfn = rndr_readfn },
      REGINFO_SENTINEL
  };
  #endif
  
+static CPAccessResult access_predinv(CPUARMState *env, const ARMCPRegInfo *ri,
+                                     bool isread)
+{
+    int el = arm_current_el(env);
+
+    if (el == 0) {
+        uint64_t sctlr = arm_sctlr(env, el);
+        if (!(sctlr & SCTLR_EnRCTX)) {
+            return CP_ACCESS_TRAP;
+        }
+    } else if (el == 1) {
+        uint64_t hcr = arm_hcr_el2_eff(env);
+        if (hcr & HCR_NV) {
+            return CP_ACCESS_TRAP_EL2;
+        }
+    }
+    return CP_ACCESS_OK;
+}
+
+static const ARMCPRegInfo predinv_reginfo[] = {
+    { .name = "CFP_RCTX", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 3, .opc2 = 4,
+      .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+    { .name = "DVP_RCTX", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 3, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+    { .name = "CPP_RCTX", .state = ARM_CP_STATE_AA64,
+      .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 3, .opc2 = 7,
+      .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+    /*
+     * Note the AArch32 opcodes have a different OPC1.
+     */
+    { .name = "CFPRCTX", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 0, .crn = 7, .crm = 3, .opc2 = 4,
+      .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+    { .name = "DVPRCTX", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 0, .crn = 7, .crm = 3, .opc2 = 5,
+      .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+    { .name = "CPPRCTX", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 0, .crn = 7, .crm = 3, .opc2 = 7,
+      .type = ARM_CP_NOP, .access = PL0_W, .accessfn = access_predinv },
+    REGINFO_SENTINEL
+};
+
  void register_cp_regs_for_features(ARMCPU *cpu)
  {
      /* Register all the coprocessor registers based on feature bits */
@@ -6639,7 +6796,21 @@ void register_cp_regs_for_features(ARMCPU *cpu)
      if (cpu_isar_feature(aa64_pauth, cpu)) {
          define_arm_cp_regs(cpu, pauth_reginfo);
      }
+    if (cpu_isar_feature(aa64_rndr, cpu)) {
+        define_arm_cp_regs(cpu, rndr_reginfo);
+    }
  #endif
+
+    /*
+     * While all v8.0 cpus support aarch64, QEMU does have configurations
+     * that do not set ID_AA64ISAR1, e.g. user-only qemu-arm -cpu max,
+     * which will set ID_ISAR6.
+     */
+    if (arm_feature(&cpu->env, ARM_FEATURE_AARCH64)
+        ? cpu_isar_feature(aa64_predinv, cpu)
+        : cpu_isar_feature(aa32_predinv, cpu)) {
+        define_arm_cp_regs(cpu, predinv_reginfo);
+    }
  }
  
  void arm_cpu_register_gdb_regs_for_features(ARMCPU *cpu)
@@ -6687,29 +6858,23 @@ static gint arm_cpu_list_compare(gconstpointer a, gconstpointer b)
  static void arm_cpu_list_entry(gpointer data, gpointer user_data)
  {
      ObjectClass *oc = data;
-    CPUListState *s = user_data;
      const char *typename;
      char *name;
  
      typename = object_class_get_name(oc);
      name = g_strndup(typename, strlen(typename) - strlen("-" TYPE_ARM_CPU));
-    (*s->cpu_fprintf)(s->file, "  %s\n",
-                      name);
+    qemu_printf("  %s\n", name);
      g_free(name);
  }
  
-void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf)
+void arm_cpu_list(void)
  {
-    CPUListState s = {
-        .file = f,
-        .cpu_fprintf = cpu_fprintf,
-    };
      GSList *list;
  
      list = object_class_get_list(TYPE_ARM_CPU, false);
      list = g_slist_sort(list, arm_cpu_list_compare);
-    (*cpu_fprintf)(f, "Available CPUs:\n");
-    g_slist_foreach(list, arm_cpu_list_entry, &s);
+    qemu_printf("Available CPUs:\n");
+    g_slist_foreach(list, arm_cpu_list_entry, NULL);
      g_slist_free(list);
  }
  
@@ -6733,7 +6898,7 @@ static void arm_cpu_add_definition(gpointer data, gpointer user_data)
      *cpu_list = entry;
  }
  
-CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
+CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
  {
      CpuDefinitionInfoList *cpu_list = NULL;
      GSList *list;
@@ -7321,14 +7486,14 @@ uint32_t HELPER(rbit)(uint32_t x)
  /* These should probably raise undefined insn exceptions.  */
  void HELPER(v7m_msr)(CPUARMState *env, uint32_t reg, uint32_t val)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      cpu_abort(CPU(cpu), "v7m_msr %d\n", reg);
  }
  
  uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      cpu_abort(CPU(cpu), "v7m_mrs %d\n", reg);
      return 0;
@@ -7346,6 +7511,24 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
      g_assert_not_reached();
  }
  
+void HELPER(v7m_preserve_fp_state)(CPUARMState *env)
+{
+    /* translate.c should never generate calls here in user-only mode */
+    g_assert_not_reached();
+}
+
+void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
+{
+    /* translate.c should never generate calls here in user-only mode */
+    g_assert_not_reached();
+}
+
+void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
+{
+    /* translate.c should never generate calls here in user-only mode */
+    g_assert_not_reached();
+}
+
  uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op)
  {
      /* The TT instructions can be used by unprivileged code, but in
@@ -7370,7 +7553,7 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op)
  
  static void switch_mode(CPUARMState *env, int mode)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
  
      if (mode != ARM_CPU_MODE_USR) {
          cpu_abort(CPU(cpu), "Tried to switch out of user mode\n");
@@ -7524,8 +7707,37 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
      return target_el;
  }
  
+/*
+ * Return true if the v7M CPACR permits access to the FPU for the specified
+ * security state and privilege level.
+ */
+static bool v7m_cpacr_pass(CPUARMState *env, bool is_secure, bool is_priv)
+{
+    switch (extract32(env->v7m.cpacr[is_secure], 20, 2)) {
+    case 0:
+    case 2: /* UNPREDICTABLE: we treat like 0 */
+        return false;
+    case 1:
+        return is_priv;
+    case 3:
+        return true;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+/*
+ * What kind of stack write are we doing? This affects how exceptions
+ * generated during the stacking are treated.
+ */
+typedef enum StackingMode {
+    STACK_NORMAL,
+    STACK_IGNFAULTS,
+    STACK_LAZYFP,
+} StackingMode;
+
  static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value,
-                            ARMMMUIdx mmu_idx, bool ignfault)
+                            ARMMMUIdx mmu_idx, StackingMode mode)
  {
      CPUState *cs = CPU(cpu);
      CPUARMState *env = &cpu->env;
@@ -7543,15 +7755,31 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value,
                        &attrs, &prot, &page_size, &fi, NULL)) {
          /* MPU/SAU lookup failed */
          if (fi.type == ARMFault_QEMU_SFault) {
-            qemu_log_mask(CPU_LOG_INT,
-                          "...SecureFault with SFSR.AUVIOL during stacking\n");
-            env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK | R_V7M_SFSR_SFARVALID_MASK;
+            if (mode == STACK_LAZYFP) {
+                qemu_log_mask(CPU_LOG_INT,
+                              "...SecureFault with SFSR.LSPERR "
+                              "during lazy stacking\n");
+                env->v7m.sfsr |= R_V7M_SFSR_LSPERR_MASK;
+            } else {
+                qemu_log_mask(CPU_LOG_INT,
+                              "...SecureFault with SFSR.AUVIOL "
+                              "during stacking\n");
+                env->v7m.sfsr |= R_V7M_SFSR_AUVIOL_MASK;
+            }
+            env->v7m.sfsr |= R_V7M_SFSR_SFARVALID_MASK;
              env->v7m.sfar = addr;
              exc = ARMV7M_EXCP_SECURE;
              exc_secure = false;
          } else {
-            qemu_log_mask(CPU_LOG_INT, "...MemManageFault with CFSR.MSTKERR\n");
-            env->v7m.cfsr[secure] |= R_V7M_CFSR_MSTKERR_MASK;
+            if (mode == STACK_LAZYFP) {
+                qemu_log_mask(CPU_LOG_INT,
+                              "...MemManageFault with CFSR.MLSPERR\n");
+                env->v7m.cfsr[secure] |= R_V7M_CFSR_MLSPERR_MASK;
+            } else {
+                qemu_log_mask(CPU_LOG_INT,
+                              "...MemManageFault with CFSR.MSTKERR\n");
+                env->v7m.cfsr[secure] |= R_V7M_CFSR_MSTKERR_MASK;
+            }
              exc = ARMV7M_EXCP_MEM;
              exc_secure = secure;
          }
@@ -7561,8 +7789,13 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value,
                           attrs, &txres);
      if (txres != MEMTX_OK) {
          /* BusFault trying to write the data */
-        qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.STKERR\n");
-        env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_STKERR_MASK;
+        if (mode == STACK_LAZYFP) {
+            qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.LSPERR\n");
+            env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_LSPERR_MASK;
+        } else {
+            qemu_log_mask(CPU_LOG_INT, "...BusFault with BFSR.STKERR\n");
+            env->v7m.cfsr[M_REG_NS] |= R_V7M_CFSR_STKERR_MASK;
+        }
          exc = ARMV7M_EXCP_BUS;
          exc_secure = false;
          goto pend_fault;
@@ -7577,11 +7810,19 @@ pend_fault:
       * later if we have two derived exceptions.
       * The only case when we must not pend the exception but instead
       * throw it away is if we are doing the push of the callee registers
-     * and we've already generated a derived exception. Even in this
-     * case we will still update the fault status registers.
+     * and we've already generated a derived exception (this is indicated
+     * by the caller passing STACK_IGNFAULTS). Even in this case we will
+     * still update the fault status registers.
       */
-    if (!ignfault) {
+    switch (mode) {
+    case STACK_NORMAL:
          armv7m_nvic_set_pending_derived(env->nvic, exc, exc_secure);
+        break;
+    case STACK_LAZYFP:
+        armv7m_nvic_set_pending_lazyfp(env->nvic, exc, exc_secure);
+        break;
+    case STACK_IGNFAULTS:
+        break;
      }
      return false;
  }
@@ -7647,6 +7888,97 @@ pend_fault:
      return false;
  }
  
+void HELPER(v7m_preserve_fp_state)(CPUARMState *env)
+{
+    /*
+     * Preserve FP state (because LSPACT was set and we are about
+     * to execute an FP instruction). This corresponds to the
+     * PreserveFPState() pseudocode.
+     * We may throw an exception if the stacking fails.
+     */
+    ARMCPU *cpu = env_archcpu(env);
+    bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
+    bool negpri = !(env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_HFRDY_MASK);
+    bool is_priv = !(env->v7m.fpccr[is_secure] & R_V7M_FPCCR_USER_MASK);
+    bool splimviol = env->v7m.fpccr[is_secure] & R_V7M_FPCCR_SPLIMVIOL_MASK;
+    uint32_t fpcar = env->v7m.fpcar[is_secure];
+    bool stacked_ok = true;
+    bool ts = is_secure && (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK);
+    bool take_exception;
+
+    /* Take the iothread lock as we are going to touch the NVIC */
+    qemu_mutex_lock_iothread();
+
+    /* Check the background context had access to the FPU */
+    if (!v7m_cpacr_pass(env, is_secure, is_priv)) {
+        armv7m_nvic_set_pending_lazyfp(env->nvic, ARMV7M_EXCP_USAGE, is_secure);
+        env->v7m.cfsr[is_secure] |= R_V7M_CFSR_NOCP_MASK;
+        stacked_ok = false;
+    } else if (!is_secure && !extract32(env->v7m.nsacr, 10, 1)) {
+        armv7m_nvic_set_pending_lazyfp(env->nvic, ARMV7M_EXCP_USAGE, M_REG_S);
+        env->v7m.cfsr[M_REG_S] |= R_V7M_CFSR_NOCP_MASK;
+        stacked_ok = false;
+    }
+
+    if (!splimviol && stacked_ok) {
+        /* We only stack if the stack limit wasn't violated */
+        int i;
+        ARMMMUIdx mmu_idx;
+
+        mmu_idx = arm_v7m_mmu_idx_all(env, is_secure, is_priv, negpri);
+        for (i = 0; i < (ts ? 32 : 16); i += 2) {
+            uint64_t dn = *aa32_vfp_dreg(env, i / 2);
+            uint32_t faddr = fpcar + 4 * i;
+            uint32_t slo = extract64(dn, 0, 32);
+            uint32_t shi = extract64(dn, 32, 32);
+
+            if (i >= 16) {
+                faddr += 8; /* skip the slot for the FPSCR */
+            }
+            stacked_ok = stacked_ok &&
+                v7m_stack_write(cpu, faddr, slo, mmu_idx, STACK_LAZYFP) &&
+                v7m_stack_write(cpu, faddr + 4, shi, mmu_idx, STACK_LAZYFP);
+        }
+
+        stacked_ok = stacked_ok &&
+            v7m_stack_write(cpu, fpcar + 0x40,
+                            vfp_get_fpscr(env), mmu_idx, STACK_LAZYFP);
+    }
+
+    /*
+     * We definitely pended an exception, but it's possible that it
+     * might not be able to be taken now. If its priority permits us
+     * to take it now, then we must not update the LSPACT or FP regs,
+     * but instead jump out to take the exception immediately.
+     * If it's just pending and won't be taken until the current
+     * handler exits, then we do update LSPACT and the FP regs.
+     */
+    take_exception = !stacked_ok &&
+        armv7m_nvic_can_take_pending_exception(env->nvic);
+
+    qemu_mutex_unlock_iothread();
+
+    if (take_exception) {
+        raise_exception_ra(env, EXCP_LAZYFP, 0, 1, GETPC());
+    }
+
+    env->v7m.fpccr[is_secure] &= ~R_V7M_FPCCR_LSPACT_MASK;
+
+    if (ts) {
+        /* Clear s0 to s31 and the FPSCR */
+        int i;
+
+        for (i = 0; i < 32; i += 2) {
+            *aa32_vfp_dreg(env, i / 2) = 0;
+        }
+        vfp_set_fpscr(env, 0);
+    }
+    /*
+     * Otherwise s0 to s15 and FPSCR are UNKNOWN; we choose to leave them
+     * unchanged.
+     */
+}
+
  /* Write to v7M CONTROL.SPSEL bit for the specified security bank.
   * This may change the current stack pointer between Main and Process
   * stack pointers if it is done for the CONTROL register for the current
@@ -7768,6 +8100,9 @@ void HELPER(v7m_bxns)(CPUARMState *env, uint32_t dest)
      /* translate.c should have made BXNS UNDEF unless we're secure */
      assert(env->v7m.secure);
  
+    if (!(dest & 1)) {
+        env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK;
+    }
      switch_v7m_security_state(env, dest & 1);
      env->thumb = 1;
      env->regs[15] = dest & ~1;
@@ -7825,6 +8160,7 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
           */
          write_v7m_exception(env, 1);
      }
+    env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK;
      switch_v7m_security_state(env, 0);
      env->thumb = 1;
      env->regs[15] = dest;
@@ -7924,6 +8260,21 @@ load_fail:
      return false;
  }
  
+static uint32_t v7m_integrity_sig(CPUARMState *env, uint32_t lr)
+{
+    /*
+     * Return the integrity signature value for the callee-saves
+     * stack frame section. @lr is the exception return payload/LR value
+     * whose FType bit forms bit 0 of the signature if FP is present.
+     */
+    uint32_t sig = 0xfefa125a;
+
+    if (!arm_feature(env, ARM_FEATURE_VFP) || (lr & R_V7M_EXCRET_FTYPE_MASK)) {
+        sig |= 1;
+    }
+    return sig;
+}
+
  static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
                                    bool ignore_faults)
  {
@@ -7938,6 +8289,8 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
      bool stacked_ok;
      uint32_t limit;
      bool want_psp;
+    uint32_t sig;
+    StackingMode smode = ignore_faults ? STACK_IGNFAULTS : STACK_NORMAL;
  
      if (dotailchain) {
          bool mode = lr & R_V7M_EXCRET_MODE_MASK;
@@ -7979,24 +8332,17 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
      /* Write as much of the stack frame as we can. A write failure may
       * cause us to pend a derived exception.
       */
+    sig = v7m_integrity_sig(env, lr);
      stacked_ok =
-        v7m_stack_write(cpu, frameptr, 0xfefa125b, mmu_idx, ignore_faults) &&
-        v7m_stack_write(cpu, frameptr + 0x8, env->regs[4], mmu_idx,
-                        ignore_faults) &&
-        v7m_stack_write(cpu, frameptr + 0xc, env->regs[5], mmu_idx,
-                        ignore_faults) &&
-        v7m_stack_write(cpu, frameptr + 0x10, env->regs[6], mmu_idx,
-                        ignore_faults) &&
-        v7m_stack_write(cpu, frameptr + 0x14, env->regs[7], mmu_idx,
-                        ignore_faults) &&
-        v7m_stack_write(cpu, frameptr + 0x18, env->regs[8], mmu_idx,
-                        ignore_faults) &&
-        v7m_stack_write(cpu, frameptr + 0x1c, env->regs[9], mmu_idx,
-                        ignore_faults) &&
-        v7m_stack_write(cpu, frameptr + 0x20, env->regs[10], mmu_idx,
-                        ignore_faults) &&
-        v7m_stack_write(cpu, frameptr + 0x24, env->regs[11], mmu_idx,
-                        ignore_faults);
+        v7m_stack_write(cpu, frameptr, sig, mmu_idx, smode) &&
+        v7m_stack_write(cpu, frameptr + 0x8, env->regs[4], mmu_idx, smode) &&
+        v7m_stack_write(cpu, frameptr + 0xc, env->regs[5], mmu_idx, smode) &&
+        v7m_stack_write(cpu, frameptr + 0x10, env->regs[6], mmu_idx, smode) &&
+        v7m_stack_write(cpu, frameptr + 0x14, env->regs[7], mmu_idx, smode) &&
+        v7m_stack_write(cpu, frameptr + 0x18, env->regs[8], mmu_idx, smode) &&
+        v7m_stack_write(cpu, frameptr + 0x1c, env->regs[9], mmu_idx, smode) &&
+        v7m_stack_write(cpu, frameptr + 0x20, env->regs[10], mmu_idx, smode) &&
+        v7m_stack_write(cpu, frameptr + 0x24, env->regs[11], mmu_idx, smode);
  
      /* Update SP regardless of whether any of the stack accesses failed. */
      *frame_sp_p = frameptr;
@@ -8021,6 +8367,14 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
      qemu_log_mask(CPU_LOG_INT, "...taking pending %s exception %d\n",
                    targets_secure ? "secure" : "nonsecure", exc);
  
+    if (dotailchain) {
+        /* Sanitize LR FType and PREFIX bits */
+        if (!arm_feature(env, ARM_FEATURE_VFP)) {
+            lr |= R_V7M_EXCRET_FTYPE_MASK;
+        }
+        lr = deposit32(lr, 24, 8, 0xff);
+    }
+
      if (arm_feature(env, ARM_FEATURE_V8)) {
          if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
              (lr & R_V7M_EXCRET_S_MASK)) {
@@ -8116,6 +8470,9 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
      switch_v7m_security_state(env, targets_secure);
      write_v7m_control_spsel(env, 0);
      arm_clear_exclusive(env);
+    /* Clear SFPA and FPCA (has no effect if no FPU) */
+    env->v7m.control[M_REG_S] &=
+        ~(R_V7M_CONTROL_FPCA_MASK | R_V7M_CONTROL_SFPA_MASK);
      /* Clear IT bits */
      env->condexec_bits = 0;
      env->regs[14] = lr;
@@ -8123,6 +8480,187 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
      env->thumb = addr & 1;
  }
  
+static void v7m_update_fpccr(CPUARMState *env, uint32_t frameptr,
+                             bool apply_splim)
+{
+    /*
+     * Like the pseudocode UpdateFPCCR: save state in FPCAR and FPCCR
+     * that we will need later in order to do lazy FP reg stacking.
+     */
+    bool is_secure = env->v7m.secure;
+    void *nvic = env->nvic;
+    /*
+     * Some bits are unbanked and live always in fpccr[M_REG_S]; some bits
+     * are banked and we want to update the bit in the bank for the
+     * current security state; and in one case we want to specifically
+     * update the NS banked version of a bit even if we are secure.
+     */
+    uint32_t *fpccr_s = &env->v7m.fpccr[M_REG_S];
+    uint32_t *fpccr_ns = &env->v7m.fpccr[M_REG_NS];
+    uint32_t *fpccr = &env->v7m.fpccr[is_secure];
+    bool hfrdy, bfrdy, mmrdy, ns_ufrdy, s_ufrdy, sfrdy, monrdy;
+
+    env->v7m.fpcar[is_secure] = frameptr & ~0x7;
+
+    if (apply_splim && arm_feature(env, ARM_FEATURE_V8)) {
+        bool splimviol;
+        uint32_t splim = v7m_sp_limit(env);
+        bool ign = armv7m_nvic_neg_prio_requested(nvic, is_secure) &&
+            (env->v7m.ccr[is_secure] & R_V7M_CCR_STKOFHFNMIGN_MASK);
+
+        splimviol = !ign && frameptr < splim;
+        *fpccr = FIELD_DP32(*fpccr, V7M_FPCCR, SPLIMVIOL, splimviol);
+    }
+
+    *fpccr = FIELD_DP32(*fpccr, V7M_FPCCR, LSPACT, 1);
+
+    *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, S, is_secure);
+
+    *fpccr = FIELD_DP32(*fpccr, V7M_FPCCR, USER, arm_current_el(env) == 0);
+
+    *fpccr = FIELD_DP32(*fpccr, V7M_FPCCR, THREAD,
+                        !arm_v7m_is_handler_mode(env));
+
+    hfrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_HARD, false);
+    *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, HFRDY, hfrdy);
+
+    bfrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_BUS, false);
+    *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, BFRDY, bfrdy);
+
+    mmrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_MEM, is_secure);
+    *fpccr = FIELD_DP32(*fpccr, V7M_FPCCR, MMRDY, mmrdy);
+
+    ns_ufrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_USAGE, false);
+    *fpccr_ns = FIELD_DP32(*fpccr_ns, V7M_FPCCR, UFRDY, ns_ufrdy);
+
+    monrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_DEBUG, false);
+    *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, MONRDY, monrdy);
+
+    if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+        s_ufrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_USAGE, true);
+        *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, UFRDY, s_ufrdy);
+
+        sfrdy = armv7m_nvic_get_ready_status(nvic, ARMV7M_EXCP_SECURE, false);
+        *fpccr_s = FIELD_DP32(*fpccr_s, V7M_FPCCR, SFRDY, sfrdy);
+    }
+}
+
+void HELPER(v7m_vlstm)(CPUARMState *env, uint32_t fptr)
+{
+    /* fptr is the value of Rn, the frame pointer we store the FP regs to */
+    bool s = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
+    bool lspact = env->v7m.fpccr[s] & R_V7M_FPCCR_LSPACT_MASK;
+
+    assert(env->v7m.secure);
+
+    if (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)) {
+        return;
+    }
+
+    /* Check access to the coprocessor is permitted */
+    if (!v7m_cpacr_pass(env, true, arm_current_el(env) != 0)) {
+        raise_exception_ra(env, EXCP_NOCP, 0, 1, GETPC());
+    }
+
+    if (lspact) {
+        /* LSPACT should not be active when there is active FP state */
+        raise_exception_ra(env, EXCP_LSERR, 0, 1, GETPC());
+    }
+
+    if (fptr & 7) {
+        raise_exception_ra(env, EXCP_UNALIGNED, 0, 1, GETPC());
+    }
+
+    /*
+     * Note that we do not use v7m_stack_write() here, because the
+     * accesses should not set the FSR bits for stacking errors if they
+     * fail. (In pseudocode terms, they are AccType_NORMAL, not AccType_STACK
+     * or AccType_LAZYFP). Faults in cpu_stl_data() will throw exceptions
+     * and longjmp out.
+     */
+    if (!(env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPEN_MASK)) {
+        bool ts = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK;
+        int i;
+
+        for (i = 0; i < (ts ? 32 : 16); i += 2) {
+            uint64_t dn = *aa32_vfp_dreg(env, i / 2);
+            uint32_t faddr = fptr + 4 * i;
+            uint32_t slo = extract64(dn, 0, 32);
+            uint32_t shi = extract64(dn, 32, 32);
+
+            if (i >= 16) {
+                faddr += 8; /* skip the slot for the FPSCR */
+            }
+            cpu_stl_data(env, faddr, slo);
+            cpu_stl_data(env, faddr + 4, shi);
+        }
+        cpu_stl_data(env, fptr + 0x40, vfp_get_fpscr(env));
+
+        /*
+         * If TS is 0 then s0 to s15 and FPSCR are UNKNOWN; we choose to
+         * leave them unchanged, matching our choice in v7m_preserve_fp_state.
+         */
+        if (ts) {
+            for (i = 0; i < 32; i += 2) {
+                *aa32_vfp_dreg(env, i / 2) = 0;
+            }
+            vfp_set_fpscr(env, 0);
+        }
+    } else {
+        v7m_update_fpccr(env, fptr, false);
+    }
+
+    env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_FPCA_MASK;
+}
+
+void HELPER(v7m_vlldm)(CPUARMState *env, uint32_t fptr)
+{
+    /* fptr is the value of Rn, the frame pointer we load the FP regs from */
+    assert(env->v7m.secure);
+
+    if (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)) {
+        return;
+    }
+
+    /* Check access to the coprocessor is permitted */
+    if (!v7m_cpacr_pass(env, true, arm_current_el(env) != 0)) {
+        raise_exception_ra(env, EXCP_NOCP, 0, 1, GETPC());
+    }
+
+    if (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPACT_MASK) {
+        /* State in FP is still valid */
+        env->v7m.fpccr[M_REG_S] &= ~R_V7M_FPCCR_LSPACT_MASK;
+    } else {
+        bool ts = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK;
+        int i;
+        uint32_t fpscr;
+
+        if (fptr & 7) {
+            raise_exception_ra(env, EXCP_UNALIGNED, 0, 1, GETPC());
+        }
+
+        for (i = 0; i < (ts ? 32 : 16); i += 2) {
+            uint32_t slo, shi;
+            uint64_t dn;
+            uint32_t faddr = fptr + 4 * i;
+
+            if (i >= 16) {
+                faddr += 8; /* skip the slot for the FPSCR */
+            }
+
+            slo = cpu_ldl_data(env, faddr);
+            shi = cpu_ldl_data(env, faddr + 4);
+
+            dn = (uint64_t) shi << 32 | slo;
+            *aa32_vfp_dreg(env, i / 2) = dn;
+        }
+        fpscr = cpu_ldl_data(env, fptr + 0x40);
+        vfp_set_fpscr(env, fpscr);
+    }
+
+    env->v7m.control[M_REG_S] |= R_V7M_CONTROL_FPCA_MASK;
+}
+
  static bool v7m_push_stack(ARMCPU *cpu)
  {
      /* Do the "set up stack frame" part of exception entry,
@@ -8131,11 +8669,25 @@ static bool v7m_push_stack(ARMCPU *cpu)
       * should ignore further stack faults trying to process
       * that derived exception.)
       */
-    bool stacked_ok;
+    bool stacked_ok = true, limitviol = false;
      CPUARMState *env = &cpu->env;
      uint32_t xpsr = xpsr_read(env);
      uint32_t frameptr = env->regs[13];
      ARMMMUIdx mmu_idx = arm_mmu_idx(env);
+    uint32_t framesize;
+    bool nsacr_cp10 = extract32(env->v7m.nsacr, 10, 1);
+
+    if ((env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) &&
+        (env->v7m.secure || nsacr_cp10)) {
+        if (env->v7m.secure &&
+            env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK) {
+            framesize = 0xa8;
+        } else {
+            framesize = 0x68;
+        }
+    } else {
+        framesize = 0x20;
+    }
  
      /* Align stack pointer if the guest wants that */
      if ((frameptr & 4) &&
@@ -8144,7 +8696,13 @@ static bool v7m_push_stack(ARMCPU *cpu)
          xpsr |= XPSR_SPREALIGN;
      }
  
-    frameptr -= 0x20;
+    xpsr &= ~XPSR_SFPA;
+    if (env->v7m.secure &&
+        (env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)) {
+        xpsr |= XPSR_SFPA;
+    }
+
+    frameptr -= framesize;
  
      if (arm_feature(env, ARM_FEATURE_V8)) {
          uint32_t limit = v7m_sp_limit(env);
@@ -8162,7 +8720,14 @@ static bool v7m_push_stack(ARMCPU *cpu)
              armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
                                      env->v7m.secure);
              env->regs[13] = limit;
-            return true;
+            /*
+             * We won't try to perform any further memory accesses but
+             * we must continue through the following code to check for
+             * permission faults during FPU state preservation, and we
+             * must update FPCCR if lazy stacking is enabled.
+             */
+            limitviol = true;
+            stacked_ok = false;
          }
      }
  
@@ -8171,27 +8736,108 @@ static bool v7m_push_stack(ARMCPU *cpu)
       * (which may be taken in preference to the one we started with
       * if it has higher priority).
       */
-    stacked_ok =
-        v7m_stack_write(cpu, frameptr, env->regs[0], mmu_idx, false) &&
-        v7m_stack_write(cpu, frameptr + 4, env->regs[1], mmu_idx, false) &&
-        v7m_stack_write(cpu, frameptr + 8, env->regs[2], mmu_idx, false) &&
-        v7m_stack_write(cpu, frameptr + 12, env->regs[3], mmu_idx, false) &&
-        v7m_stack_write(cpu, frameptr + 16, env->regs[12], mmu_idx, false) &&
-        v7m_stack_write(cpu, frameptr + 20, env->regs[14], mmu_idx, false) &&
-        v7m_stack_write(cpu, frameptr + 24, env->regs[15], mmu_idx, false) &&
-        v7m_stack_write(cpu, frameptr + 28, xpsr, mmu_idx, false);
+    stacked_ok = stacked_ok &&
+        v7m_stack_write(cpu, frameptr, env->regs[0], mmu_idx, STACK_NORMAL) &&
+        v7m_stack_write(cpu, frameptr + 4, env->regs[1],
+                        mmu_idx, STACK_NORMAL) &&
+        v7m_stack_write(cpu, frameptr + 8, env->regs[2],
+                        mmu_idx, STACK_NORMAL) &&
+        v7m_stack_write(cpu, frameptr + 12, env->regs[3],
+                        mmu_idx, STACK_NORMAL) &&
+        v7m_stack_write(cpu, frameptr + 16, env->regs[12],
+                        mmu_idx, STACK_NORMAL) &&
+        v7m_stack_write(cpu, frameptr + 20, env->regs[14],
+                        mmu_idx, STACK_NORMAL) &&
+        v7m_stack_write(cpu, frameptr + 24, env->regs[15],
+                        mmu_idx, STACK_NORMAL) &&
+        v7m_stack_write(cpu, frameptr + 28, xpsr, mmu_idx, STACK_NORMAL);
+
+    if (env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) {
+        /* FPU is active, try to save its registers */
+        bool fpccr_s = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
+        bool lspact = env->v7m.fpccr[fpccr_s] & R_V7M_FPCCR_LSPACT_MASK;
+
+        if (lspact && arm_feature(env, ARM_FEATURE_M_SECURITY)) {
+            qemu_log_mask(CPU_LOG_INT,
+                          "...SecureFault because LSPACT and FPCA both set\n");
+            env->v7m.sfsr |= R_V7M_SFSR_LSERR_MASK;
+            armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+        } else if (!env->v7m.secure && !nsacr_cp10) {
+            qemu_log_mask(CPU_LOG_INT,
+                          "...Secure UsageFault with CFSR.NOCP because "
+                          "NSACR.CP10 prevents stacking FP regs\n");
+            armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, M_REG_S);
+            env->v7m.cfsr[M_REG_S] |= R_V7M_CFSR_NOCP_MASK;
+        } else {
+            if (!(env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPEN_MASK)) {
+                /* Lazy stacking disabled, save registers now */
+                int i;
+                bool cpacr_pass = v7m_cpacr_pass(env, env->v7m.secure,
+                                                 arm_current_el(env) != 0);
  
-    /* Update SP regardless of whether any of the stack accesses failed. */
-    env->regs[13] = frameptr;
+                if (stacked_ok && !cpacr_pass) {
+                    /*
+                     * Take UsageFault if CPACR forbids access. The pseudocode
+                     * here does a full CheckCPEnabled() but we know the NSACR
+                     * check can never fail as we have already handled that.
+                     */
+                    qemu_log_mask(CPU_LOG_INT,
+                                  "...UsageFault with CFSR.NOCP because "
+                                  "CPACR.CP10 prevents stacking FP regs\n");
+                    armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+                                            env->v7m.secure);
+                    env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_NOCP_MASK;
+                    stacked_ok = false;
+                }
  
-    return !stacked_ok;
+                for (i = 0; i < ((framesize == 0xa8) ? 32 : 16); i += 2) {
+                    uint64_t dn = *aa32_vfp_dreg(env, i / 2);
+                    uint32_t faddr = frameptr + 0x20 + 4 * i;
+                    uint32_t slo = extract64(dn, 0, 32);
+                    uint32_t shi = extract64(dn, 32, 32);
+
+                    if (i >= 16) {
+                        faddr += 8; /* skip the slot for the FPSCR */
+                    }
+                    stacked_ok = stacked_ok &&
+                        v7m_stack_write(cpu, faddr, slo,
+                                        mmu_idx, STACK_NORMAL) &&
+                        v7m_stack_write(cpu, faddr + 4, shi,
+                                        mmu_idx, STACK_NORMAL);
+                }
+                stacked_ok = stacked_ok &&
+                    v7m_stack_write(cpu, frameptr + 0x60,
+                                    vfp_get_fpscr(env), mmu_idx, STACK_NORMAL);
+                if (cpacr_pass) {
+                    for (i = 0; i < ((framesize == 0xa8) ? 32 : 16); i += 2) {
+                        *aa32_vfp_dreg(env, i / 2) = 0;
+                    }
+                    vfp_set_fpscr(env, 0);
+                }
+            } else {
+                /* Lazy stacking enabled, save necessary info to stack later */
+                v7m_update_fpccr(env, frameptr + 0x20, true);
+            }
+        }
+    }
+
+    /*
+     * If we broke a stack limit then SP was already updated earlier;
+     * otherwise we update SP regardless of whether any of the stack
+     * accesses failed or we took some other kind of fault.
+     */
+    if (!limitviol) {
+        env->regs[13] = frameptr;
+    }
+
+    return !stacked_ok;
  }
  
  static void do_v7m_exception_exit(ARMCPU *cpu)
  {
      CPUARMState *env = &cpu->env;
      uint32_t excret;
-    uint32_t xpsr;
+    uint32_t xpsr, xpsr_mask;
      bool ufault = false;
      bool sfault = false;
      bool return_to_sp_process;
@@ -8199,6 +8845,8 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
      bool rettobase = false;
      bool exc_secure = false;
      bool return_to_secure;
+    bool ftype;
+    bool restore_s16_s31;
  
      /* If we're not in Handler mode then jumps to magic exception-exit
       * addresses don't have magic behaviour. However for the v8M
@@ -8236,6 +8884,16 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
                        excret);
      }
  
+    ftype = excret & R_V7M_EXCRET_FTYPE_MASK;
+
+    if (!arm_feature(env, ARM_FEATURE_VFP) && !ftype) {
+        qemu_log_mask(LOG_GUEST_ERROR, "M profile: zero FTYPE in exception "
+                      "exit PC value 0x%" PRIx32 " is UNPREDICTABLE "
+                      "if FPU not present\n",
+                      excret);
+        ftype = true;
+    }
+
      if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
          /* EXC_RETURN.ES validation check (R_SMFL). We must do this before
           * we pick which FAULTMASK to clear.
@@ -8336,6 +8994,30 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
       */
      write_v7m_control_spsel_for_secstate(env, return_to_sp_process, exc_secure);
  
+    /*
+     * Clear scratch FP values left in caller saved registers; this
+     * must happen before any kind of tail chaining.
+     */
+    if ((env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_CLRONRET_MASK) &&
+        (env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK)) {
+        if (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPACT_MASK) {
+            env->v7m.sfsr |= R_V7M_SFSR_LSERR_MASK;
+            armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+            qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing "
+                          "stackframe: error during lazy state deactivation\n");
+            v7m_exception_taken(cpu, excret, true, false);
+            return;
+        } else {
+            /* Clear s0..s15 and FPSCR */
+            int i;
+
+            for (i = 0; i < 16; i += 2) {
+                *aa32_vfp_dreg(env, i / 2) = 0;
+            }
+            vfp_set_fpscr(env, 0);
+        }
+    }
+
      if (sfault) {
          env->v7m.sfsr |= R_V7M_SFSR_INVER_MASK;
          armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
@@ -8409,12 +9091,11 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
          if (return_to_secure &&
              ((excret & R_V7M_EXCRET_ES_MASK) == 0 ||
               (excret & R_V7M_EXCRET_DCRS_MASK) == 0)) {
-            uint32_t expected_sig = 0xfefa125b;
              uint32_t actual_sig;
  
              pop_ok = v7m_stack_read(cpu, &actual_sig, frameptr, mmu_idx);
  
-            if (pop_ok && expected_sig != actual_sig) {
+            if (pop_ok && v7m_integrity_sig(env, excret) != actual_sig) {
                  /* Take a SecureFault on the current stack */
                  env->v7m.sfsr |= R_V7M_SFSR_INVIS_MASK;
                  armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
@@ -8498,8 +9179,105 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
              }
          }
  
+        if (!ftype) {
+            /* FP present and we need to handle it */
+            if (!return_to_secure &&
+                (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_LSPACT_MASK)) {
+                armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+                env->v7m.sfsr |= R_V7M_SFSR_LSERR_MASK;
+                qemu_log_mask(CPU_LOG_INT,
+                              "...taking SecureFault on existing stackframe: "
+                              "Secure LSPACT set but exception return is "
+                              "not to secure state\n");
+                v7m_exception_taken(cpu, excret, true, false);
+                return;
+            }
+
+            restore_s16_s31 = return_to_secure &&
+                (env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_TS_MASK);
+
+            if (env->v7m.fpccr[return_to_secure] & R_V7M_FPCCR_LSPACT_MASK) {
+                /* State in FPU is still valid, just clear LSPACT */
+                env->v7m.fpccr[return_to_secure] &= ~R_V7M_FPCCR_LSPACT_MASK;
+            } else {
+                int i;
+                uint32_t fpscr;
+                bool cpacr_pass, nsacr_pass;
+
+                cpacr_pass = v7m_cpacr_pass(env, return_to_secure,
+                                            return_to_priv);
+                nsacr_pass = return_to_secure ||
+                    extract32(env->v7m.nsacr, 10, 1);
+
+                if (!cpacr_pass) {
+                    armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+                                            return_to_secure);
+                    env->v7m.cfsr[return_to_secure] |= R_V7M_CFSR_NOCP_MASK;
+                    qemu_log_mask(CPU_LOG_INT,
+                                  "...taking UsageFault on existing "
+                                  "stackframe: CPACR.CP10 prevents unstacking "
+                                  "FP regs\n");
+                    v7m_exception_taken(cpu, excret, true, false);
+                    return;
+                } else if (!nsacr_pass) {
+                    armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, true);
+                    env->v7m.cfsr[M_REG_S] |= R_V7M_CFSR_INVPC_MASK;
+                    qemu_log_mask(CPU_LOG_INT,
+                                  "...taking Secure UsageFault on existing "
+                                  "stackframe: NSACR.CP10 prevents unstacking "
+                                  "FP regs\n");
+                    v7m_exception_taken(cpu, excret, true, false);
+                    return;
+                }
+
+                for (i = 0; i < (restore_s16_s31 ? 32 : 16); i += 2) {
+                    uint32_t slo, shi;
+                    uint64_t dn;
+                    uint32_t faddr = frameptr + 0x20 + 4 * i;
+
+                    if (i >= 16) {
+                        faddr += 8; /* Skip the slot for the FPSCR */
+                    }
+
+                    pop_ok = pop_ok &&
+                        v7m_stack_read(cpu, &slo, faddr, mmu_idx) &&
+                        v7m_stack_read(cpu, &shi, faddr + 4, mmu_idx);
+
+                    if (!pop_ok) {
+                        break;
+                    }
+
+                    dn = (uint64_t)shi << 32 | slo;
+                    *aa32_vfp_dreg(env, i / 2) = dn;
+                }
+                pop_ok = pop_ok &&
+                    v7m_stack_read(cpu, &fpscr, frameptr + 0x60, mmu_idx);
+                if (pop_ok) {
+                    vfp_set_fpscr(env, fpscr);
+                }
+                if (!pop_ok) {
+                    /*
+                     * These regs are 0 if security extension present;
+                     * otherwise merely UNKNOWN. We zero always.
+                     */
+                    for (i = 0; i < (restore_s16_s31 ? 32 : 16); i += 2) {
+                        *aa32_vfp_dreg(env, i / 2) = 0;
+                    }
+                    vfp_set_fpscr(env, 0);
+                }
+            }
+        }
+        env->v7m.control[M_REG_S] = FIELD_DP32(env->v7m.control[M_REG_S],
+                                               V7M_CONTROL, FPCA, !ftype);
+
          /* Commit to consuming the stack frame */
          frameptr += 0x20;
+        if (!ftype) {
+            frameptr += 0x48;
+            if (restore_s16_s31) {
+                frameptr += 0x40;
+            }
+        }
          /* Undo stack alignment (the SPREALIGN bit indicates that the original
           * pre-exception SP was not 8-aligned and we added a padding word to
           * align it, so we undo this by ORing in the bit that increases it
@@ -8511,8 +9289,20 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
          }
          *frame_sp_p = frameptr;
      }
+
+    xpsr_mask = ~(XPSR_SPREALIGN | XPSR_SFPA);
+    if (!arm_feature(env, ARM_FEATURE_THUMB_DSP)) {
+        xpsr_mask &= ~XPSR_GE;
+    }
      /* This xpsr_write() will invalidate frame_sp_p as it may switch stack */
-    xpsr_write(env, xpsr, ~XPSR_SPREALIGN);
+    xpsr_write(env, xpsr, xpsr_mask);
+
+    if (env->v7m.secure) {
+        bool sfpa = xpsr & XPSR_SFPA;
+
+        env->v7m.control[M_REG_S] = FIELD_DP32(env->v7m.control[M_REG_S],
+                                               V7M_CONTROL, SFPA, sfpa);
+    }
  
      /* The restored xPSR exception field will be zero if we're
       * resuming in Thread mode. If that doesn't match what the
@@ -8635,6 +9425,9 @@ static void arm_log_exception(int idx)
              [EXCP_NOCP] = "v7M NOCP UsageFault",
              [EXCP_INVSTATE] = "v7M INVSTATE UsageFault",
              [EXCP_STKOF] = "v8M STKOF UsageFault",
+            [EXCP_LAZYFP] = "v7M exception during lazy FP stacking",
+            [EXCP_LSERR] = "v8M LSERR UsageFault",
+            [EXCP_UNALIGNED] = "v7M UNALIGNED UsageFault",
          };
  
          if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
@@ -8753,6 +9546,7 @@ static bool v7m_handle_execute_nsc(ARMCPU *cpu)
      qemu_log_mask(CPU_LOG_INT, "...really an SG instruction at 0x%08" PRIx32
                    ", executing it\n", env->regs[15]);
      env->regs[14] &= ~1;
+    env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK;
      switch_v7m_security_state(env, true);
      xpsr_write(env, 0, XPSR_IT);
      env->regs[15] += 4;
@@ -8783,9 +9577,23 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
          env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_UNDEFINSTR_MASK;
          break;
      case EXCP_NOCP:
-        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
-        env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_NOCP_MASK;
+    {
+        /*
+         * NOCP might be directed to something other than the current
+         * security state if this fault is because of NSACR; we indicate
+         * the target security state using exception.target_el.
+         */
+        int target_secstate;
+
+        if (env->exception.target_el == 3) {
+            target_secstate = M_REG_S;
+        } else {
+            target_secstate = env->v7m.secure;
+        }
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, target_secstate);
+        env->v7m.cfsr[target_secstate] |= R_V7M_CFSR_NOCP_MASK;
          break;
+    }
      case EXCP_INVSTATE:
          armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
          env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVSTATE_MASK;
@@ -8794,6 +9602,14 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
          armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
          env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
          break;
+    case EXCP_LSERR:
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
+        env->v7m.sfsr |= R_V7M_SFSR_LSERR_MASK;
+        break;
+    case EXCP_UNALIGNED:
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
+        env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_UNALIGNED_MASK;
+        break;
      case EXCP_SWI:
          /* The PC already points to the next instruction.  */
          armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC, env->v7m.secure);
@@ -8913,6 +9729,12 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
              return;
          }
          break;
+    case EXCP_LAZYFP:
+        /*
+         * We already pended the specific exception in the NVIC in the
+         * v7m_preserve_fp_state() helper function.
+         */
+        break;
      default:
          cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
          return; /* Never happens.  Keep compiler happy.  */
@@ -8920,8 +9742,7 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
  
      if (arm_feature(env, ARM_FEATURE_V8)) {
          lr = R_V7M_EXCRET_RES1_MASK |
-            R_V7M_EXCRET_DCRS_MASK |
-            R_V7M_EXCRET_FTYPE_MASK;
+            R_V7M_EXCRET_DCRS_MASK;
          /* The S bit indicates whether we should return to Secure
           * or NonSecure (ie our current state).
           * The ES bit indicates whether we're taking this exception
@@ -8936,6 +9757,9 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
          if (env->v7m.secure) {
              lr |= R_V7M_EXCRET_S_MASK;
          }
+        if (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK)) {
+            lr |= R_V7M_EXCRET_FTYPE_MASK;
+        }
      } else {
          lr = R_V7M_EXCRET_RES1_MASK |
              R_V7M_EXCRET_S_MASK |
@@ -10179,7 +11003,7 @@ static bool get_phys_addr_v5(CPUARMState *env, uint32_t address,
                               target_ulong *page_size,
                               ARMMMUFaultInfo *fi)
  {
-    CPUState *cs = CPU(arm_env_get_cpu(env));
+    CPUState *cs = env_cpu(env);
      int level = 1;
      uint32_t table;
      uint32_t desc;
@@ -10300,7 +11124,7 @@ static bool get_phys_addr_v6(CPUARMState *env, uint32_t address,
                               hwaddr *phys_ptr, MemTxAttrs *attrs, int *prot,
                               target_ulong *page_size, ARMMMUFaultInfo *fi)
  {
-    CPUState *cs = CPU(arm_env_get_cpu(env));
+    CPUState *cs = env_cpu(env);
      int level = 1;
      uint32_t table;
      uint32_t desc;
@@ -10685,7 +11509,7 @@ static bool get_phys_addr_lpae(CPUARMState *env, target_ulong address,
                                 target_ulong *page_size_ptr,
                                 ARMMMUFaultInfo *fi, ARMCacheAttrs *cacheattrs)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      CPUState *cs = CPU(cpu);
      /* Read an LPAE long-descriptor translation table. */
      ARMFaultType fault_type = ARMFault_Translation;
@@ -11043,7 +11867,7 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
                                   target_ulong *page_size,
                                   ARMMMUFaultInfo *fi)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      int n;
      bool is_user = regime_is_user(env, mmu_idx);
  
@@ -11247,7 +12071,7 @@ static void v8m_security_lookup(CPUARMState *env, uint32_t address,
       * pseudocode SecurityCheck() function.
       * We assume the caller has zero-initialized *sattrs.
       */
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      int r;
      bool idau_exempt = false, idau_ns = true, idau_nsc = true;
      int idau_region = IREGION_NOTVALID;
@@ -11360,7 +12184,7 @@ static bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
       * We set is_subpage to true if the region hit doesn't cover the
       * entire TARGET_PAGE the address is within.
       */
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      bool is_user = regime_is_user(env, mmu_idx);
      uint32_t secure = regime_is_secure(env, mmu_idx);
      int n;
@@ -11386,9 +12210,11 @@ static bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
          hit = true;
      } else if (m_is_ppb_region(env, address)) {
          hit = true;
-    } else if (pmsav7_use_background_region(cpu, mmu_idx, is_user)) {
-        hit = true;
      } else {
+        if (pmsav7_use_background_region(cpu, mmu_idx, is_user)) {
+            hit = true;
+        }
+
          for (n = (int)cpu->pmsav7_dregion - 1; n >= 0; n--) {
              /* region search */
              /* Note that the base address is bits [31:5] from the register
@@ -11426,7 +12252,7 @@ static bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
                  *is_subpage = true;
              }
  
-            if (hit) {
+            if (matchregion != -1) {
                  /* Multiple regions match -- always a failure (unlike
                   * PMSAv7 where highest-numbered-region wins)
                   */
@@ -11879,43 +12705,6 @@ static bool get_phys_addr(CPUARMState *env, target_ulong address,
      }
  }
  
-/* Walk the page table and (if the mapping exists) add the page
- * to the TLB. Return false on success, or true on failure. Populate
- * fsr with ARM DFSR/IFSR fault register format value on failure.
- */
-bool arm_tlb_fill(CPUState *cs, vaddr address,
-                  MMUAccessType access_type, int mmu_idx,
-                  ARMMMUFaultInfo *fi)
-{
-    ARMCPU *cpu = ARM_CPU(cs);
-    CPUARMState *env = &cpu->env;
-    hwaddr phys_addr;
-    target_ulong page_size;
-    int prot;
-    int ret;
-    MemTxAttrs attrs = {};
-
-    ret = get_phys_addr(env, address, access_type,
-                        core_to_arm_mmu_idx(env, mmu_idx), &phys_addr,
-                        &attrs, &prot, &page_size, fi, NULL);
-    if (!ret) {
-        /*
-         * Map a single [sub]page. Regions smaller than our declared
-         * target page size are handled specially, so for those we
-         * pass in the exact addresses.
-         */
-        if (page_size >= TARGET_PAGE_SIZE) {
-            phys_addr &= TARGET_PAGE_MASK;
-            address &= TARGET_PAGE_MASK;
-        }
-        tlb_set_page_with_attrs(cs, address, phys_addr, attrs,
-                                prot, mmu_idx, page_size);
-        return 0;
-    }
-
-    return ret;
-}
-
  hwaddr arm_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr,
                                           MemTxAttrs *attrs)
  {
@@ -11954,12 +12743,22 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg)
          }
          if (!(reg & 4)) {
              mask |= XPSR_NZCV | XPSR_Q; /* APSR */
+            if (arm_feature(env, ARM_FEATURE_THUMB_DSP)) {
+                mask |= XPSR_GE;
+            }
          }
          /* EPSR reads as zero */
          return xpsr_read(env) & mask;
          break;
      case 20: /* CONTROL */
-        return env->v7m.control[env->v7m.secure];
+    {
+        uint32_t value = env->v7m.control[env->v7m.secure];
+        if (!env->v7m.secure) {
+            /* SFPA is RAZ/WI from NS; FPCA is stored in the M_REG_S bank */
+            value |= env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK;
+        }
+        return value;
+    }
      case 0x94: /* CONTROL_NS */
          /* We have to handle this here because unprivileged Secure code
           * can read the NS CONTROL register.
@@ -11967,7 +12766,8 @@ uint32_t HELPER(v7m_mrs)(CPUARMState *env, uint32_t reg)
          if (!env->v7m.secure) {
              return 0;
          }
-        return env->v7m.control[M_REG_NS];
+        return env->v7m.control[M_REG_NS] |
+            (env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK);
      }
  
      if (el == 0) {
@@ -12073,9 +12873,13 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
       */
      uint32_t mask = extract32(maskreg, 8, 4);
      uint32_t reg = extract32(maskreg, 0, 8);
+    int cur_el = arm_current_el(env);
  
-    if (arm_current_el(env) == 0 && reg > 7) {
-        /* only xPSR sub-fields may be written by unprivileged */
+    if (cur_el == 0 && reg > 7 && reg != 20) {
+        /*
+         * only xPSR sub-fields and CONTROL.SFPA may be written by
+         * unprivileged code
+         */
          return;
      }
  
@@ -12134,6 +12938,15 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
                  env->v7m.control[M_REG_NS] &= ~R_V7M_CONTROL_NPRIV_MASK;
                  env->v7m.control[M_REG_NS] |= val & R_V7M_CONTROL_NPRIV_MASK;
              }
+            /*
+             * SFPA is RAZ/WI from NS. FPCA is RO if NSACR.CP10 == 0,
+             * RES0 if the FPU is not present, and is stored in the S bank
+             */
+            if (arm_feature(env, ARM_FEATURE_VFP) &&
+                extract32(env->v7m.nsacr, 10, 1)) {
+                env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_FPCA_MASK;
+                env->v7m.control[M_REG_S] |= val & R_V7M_CONTROL_FPCA_MASK;
+            }
              return;
          case 0x98: /* SP_NS */
          {
@@ -12151,7 +12964,7 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
              limit = is_psp ? env->v7m.psplim[false] : env->v7m.msplim[false];
  
              if (val < limit) {
-                CPUState *cs = CPU(arm_env_get_cpu(env));
+                CPUState *cs = env_cpu(env);
  
                  cpu_restore_state(cs, GETPC(), true);
                  raise_exception(env, EXCP_STKOF, 0, 1);
@@ -12236,21 +13049,41 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
          env->v7m.faultmask[env->v7m.secure] = val & 1;
          break;
      case 20: /* CONTROL */
-        /* Writing to the SPSEL bit only has an effect if we are in
+        /*
+         * Writing to the SPSEL bit only has an effect if we are in
           * thread mode; other bits can be updated by any privileged code.
           * write_v7m_control_spsel() deals with updating the SPSEL bit in
           * env->v7m.control, so we only need update the others.
           * For v7M, we must just ignore explicit writes to SPSEL in handler
           * mode; for v8M the write is permitted but will have no effect.
+         * All these bits are writes-ignored from non-privileged code,
+         * except for SFPA.
           */
-        if (arm_feature(env, ARM_FEATURE_V8) ||
-            !arm_v7m_is_handler_mode(env)) {
+        if (cur_el > 0 && (arm_feature(env, ARM_FEATURE_V8) ||
+                           !arm_v7m_is_handler_mode(env))) {
              write_v7m_control_spsel(env, (val & R_V7M_CONTROL_SPSEL_MASK) != 0);
          }
-        if (arm_feature(env, ARM_FEATURE_M_MAIN)) {
+        if (cur_el > 0 && arm_feature(env, ARM_FEATURE_M_MAIN)) {
              env->v7m.control[env->v7m.secure] &= ~R_V7M_CONTROL_NPRIV_MASK;
              env->v7m.control[env->v7m.secure] |= val & R_V7M_CONTROL_NPRIV_MASK;
          }
+        if (arm_feature(env, ARM_FEATURE_VFP)) {
+            /*
+             * SFPA is RAZ/WI from NS or if no FPU.
+             * FPCA is RO if NSACR.CP10 == 0, RES0 if the FPU is not present.
+             * Both are stored in the S bank.
+             */
+            if (env->v7m.secure) {
+                env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_SFPA_MASK;
+                env->v7m.control[M_REG_S] |= val & R_V7M_CONTROL_SFPA_MASK;
+            }
+            if (cur_el > 0 &&
+                (env->v7m.secure || !arm_feature(env, ARM_FEATURE_M_SECURITY) ||
+                 extract32(env->v7m.nsacr, 10, 1))) {
+                env->v7m.control[M_REG_S] &= ~R_V7M_CONTROL_FPCA_MASK;
+                env->v7m.control[M_REG_S] |= val & R_V7M_CONTROL_FPCA_MASK;
+            }
+        }
          break;
      default:
      bad_reg:
@@ -12350,6 +13183,59 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op)
  
  #endif
  
+bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
+                      MMUAccessType access_type, int mmu_idx,
+                      bool probe, uintptr_t retaddr)
+{
+    ARMCPU *cpu = ARM_CPU(cs);
+
+#ifdef CONFIG_USER_ONLY
+    cpu->env.exception.vaddress = address;
+    if (access_type == MMU_INST_FETCH) {
+        cs->exception_index = EXCP_PREFETCH_ABORT;
+    } else {
+        cs->exception_index = EXCP_DATA_ABORT;
+    }
+    cpu_loop_exit_restore(cs, retaddr);
+#else
+    hwaddr phys_addr;
+    target_ulong page_size;
+    int prot, ret;
+    MemTxAttrs attrs = {};
+    ARMMMUFaultInfo fi = {};
+
+    /*
+     * Walk the page table and (if the mapping exists) add the page
+     * to the TLB.  On success, return true.  Otherwise, if probing,
+     * return false.  Otherwise populate fsr with ARM DFSR/IFSR fault
+     * register format, and signal the fault.
+     */
+    ret = get_phys_addr(&cpu->env, address, access_type,
+                        core_to_arm_mmu_idx(&cpu->env, mmu_idx),
+                        &phys_addr, &attrs, &prot, &page_size, &fi, NULL);
+    if (likely(!ret)) {
+        /*
+         * Map a single [sub]page. Regions smaller than our declared
+         * target page size are handled specially, so for those we
+         * pass in the exact addresses.
+         */
+        if (page_size >= TARGET_PAGE_SIZE) {
+            phys_addr &= TARGET_PAGE_MASK;
+            address &= TARGET_PAGE_MASK;
+        }
+        tlb_set_page_with_attrs(cs, address, phys_addr, attrs,
+                                prot, mmu_idx, page_size);
+        return true;
+    } else if (probe) {
+        return false;
+    } else {
+        /* now we have a real cpu fault */
+        cpu_restore_state(cs, retaddr, true);
+        arm_deliver_fault(cpu, address, access_type, mmu_idx, &fi);
+    }
+#endif
+}
+
  void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
  {
      /* Implement DC ZVA, which zeroes a fixed-length block of memory.
@@ -12359,7 +13245,7 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
       * alignment faults or any memory attribute handling).
       */
  
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      uint64_t blocklen = 4 << cpu->dcz_blocksize;
      uint64_t vaddr = vaddr_in & ~(blocklen - 1);
  
@@ -12370,14 +13256,17 @@ void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
           * We know that in fact for any v8 CPU the page size is at least 4K
           * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
           * 1K as an artefact of legacy v5 subpage support being present in the
-         * same QEMU executable.
+         * same QEMU executable. So in practice the hostaddr[] array has
+         * two entries, given the current setting of TARGET_PAGE_BITS_MIN.
           */
          int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
-        void *hostaddr[maxidx];
+        void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
          int try, i;
          unsigned mmu_idx = cpu_mmu_index(env, false);
          TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
  
+        assert(maxidx <= ARRAY_SIZE(hostaddr));
+
          for (try = 0; try < 2; try++) {
  
              for (i = 0; i < maxidx; i++) {
@@ -12678,1139 +13567,106 @@ uint32_t HELPER(sel_flags)(uint32_t flags, uint32_t a, uint32_t b)
      return (a & mask) | (b & ~mask);
  }
  
-/* VFP support.  We follow the convention used for VFP instructions:
-   Single precision routines have a "s" suffix, double precision a
-   "d" suffix.  */
-
-/* Convert host exception flags to vfp form.  */
-static inline int vfp_exceptbits_from_host(int host_bits)
+/* CRC helpers.
+ * The upper bytes of val (above the number specified by 'bytes') must have
+ * been zeroed out by the caller.
+ */
+uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
  {
-    int target_bits = 0;
+    uint8_t buf[4];
+
+    stl_le_p(buf, val);
  
-    if (host_bits & float_flag_invalid)
-        target_bits |= 1;
-    if (host_bits & float_flag_divbyzero)
-        target_bits |= 2;
-    if (host_bits & float_flag_overflow)
-        target_bits |= 4;
-    if (host_bits & (float_flag_underflow | float_flag_output_denormal))
-        target_bits |= 8;
-    if (host_bits & float_flag_inexact)
-        target_bits |= 0x10;
-    if (host_bits & float_flag_input_denormal)
-        target_bits |= 0x80;
-    return target_bits;
+    /* zlib crc32 converts the accumulator and output to one's complement.  */
+    return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
  }
  
-uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
+uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
  {
-    uint32_t i, fpscr;
-
-    fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
-            | (env->vfp.vec_len << 16)
-            | (env->vfp.vec_stride << 20);
-
-    i = get_float_exception_flags(&env->vfp.fp_status);
-    i |= get_float_exception_flags(&env->vfp.standard_fp_status);
-    /* FZ16 does not generate an input denormal exception.  */
-    i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
-          & ~float_flag_input_denormal);
-    fpscr |= vfp_exceptbits_from_host(i);
+    uint8_t buf[4];
  
-    i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
-    fpscr |= i ? FPCR_QC : 0;
+    stl_le_p(buf, val);
  
-    return fpscr;
+    /* Linux crc32c converts the output to one's complement.  */
+    return crc32c(acc, buf, bytes) ^ 0xffffffff;
  }
  
-uint32_t vfp_get_fpscr(CPUARMState *env)
+/* Return the exception level to which FP-disabled exceptions should
+ * be taken, or 0 if FP is enabled.
+ */
+int fp_exception_el(CPUARMState *env, int cur_el)
  {
-    return HELPER(vfp_get_fpscr)(env);
-}
+#ifndef CONFIG_USER_ONLY
+    int fpen;
  
-/* Convert vfp exception flags to target form.  */
-static inline int vfp_exceptbits_to_host(int target_bits)
-{
-    int host_bits = 0;
+    /* CPACR and the CPTR registers don't exist before v6, so FP is
+     * always accessible
+     */
+    if (!arm_feature(env, ARM_FEATURE_V6)) {
+        return 0;
+    }
  
-    if (target_bits & 1)
-        host_bits |= float_flag_invalid;
-    if (target_bits & 2)
-        host_bits |= float_flag_divbyzero;
-    if (target_bits & 4)
-        host_bits |= float_flag_overflow;
-    if (target_bits & 8)
-        host_bits |= float_flag_underflow;
-    if (target_bits & 0x10)
-        host_bits |= float_flag_inexact;
-    if (target_bits & 0x80)
-        host_bits |= float_flag_input_denormal;
-    return host_bits;
-}
+    if (arm_feature(env, ARM_FEATURE_M)) {
+        /* CPACR can cause a NOCP UsageFault taken to current security state */
+        if (!v7m_cpacr_pass(env, env->v7m.secure, cur_el != 0)) {
+            return 1;
+        }
  
-void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
-{
-    int i;
-    uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
+        if (arm_feature(env, ARM_FEATURE_M_SECURITY) && !env->v7m.secure) {
+            if (!extract32(env->v7m.nsacr, 10, 1)) {
+                /* FP insns cause a NOCP UsageFault taken to Secure */
+                return 3;
+            }
+        }
  
-    /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
-    if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) {
-        val &= ~FPCR_FZ16;
+        return 0;
      }
  
-    /*
-     * We don't implement trapped exception handling, so the
-     * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
-     *
-     * If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC
-     * (which are stored in fp_status), and the other RES0 bits
-     * in between, then we clear all of the low 16 bits.
+    /* The CPACR controls traps to EL1, or PL1 if we're 32 bit:
+     * 0, 2 : trap EL0 and EL1/PL1 accesses
+     * 1    : trap only EL0 accesses
+     * 3    : trap no accesses
       */
-    env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
-    env->vfp.vec_len = (val >> 16) & 7;
-    env->vfp.vec_stride = (val >> 20) & 3;
+    fpen = extract32(env->cp15.cpacr_el1, 20, 2);
+    switch (fpen) {
+    case 0:
+    case 2:
+        if (cur_el == 0 || cur_el == 1) {
+            /* Trap to PL1, which might be EL1 or EL3 */
+            if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
+                return 3;
+            }
+            return 1;
+        }
+        if (cur_el == 3 && !is_a64(env)) {
+            /* Secure PL1 running at EL3 */
+            return 3;
+        }
+        break;
+    case 1:
+        if (cur_el == 0) {
+            return 1;
+        }
+        break;
+    case 3:
+        break;
+    }
  
      /*
-     * The bit we set within fpscr_q is arbitrary; the register as a
-     * whole being zero/non-zero is what counts.
-     */
-    env->vfp.qc[0] = val & FPCR_QC;
-    env->vfp.qc[1] = 0;
-    env->vfp.qc[2] = 0;
-    env->vfp.qc[3] = 0;
-
-    changed ^= val;
-    if (changed & (3 << 22)) {
-        i = (val >> 22) & 3;
-        switch (i) {
-        case FPROUNDING_TIEEVEN:
-            i = float_round_nearest_even;
-            break;
-        case FPROUNDING_POSINF:
-            i = float_round_up;
-            break;
-        case FPROUNDING_NEGINF:
-            i = float_round_down;
-            break;
-        case FPROUNDING_ZERO:
-            i = float_round_to_zero;
-            break;
+     * The NSACR allows A-profile AArch32 EL3 and M-profile secure mode
+     * to control non-secure access to the FPU. It doesn't have any
+     * effect if EL3 is AArch64 or if EL3 doesn't exist at all.
+     */
+    if ((arm_feature(env, ARM_FEATURE_EL3) && !arm_el_is_aa64(env, 3) &&
+         cur_el <= 2 && !arm_is_secure_below_el3(env))) {
+        if (!extract32(env->cp15.nsacr, 10, 1)) {
+            /* FP insns act as UNDEF */
+            return cur_el == 2 ? 2 : 1;
          }
-        set_float_rounding_mode(i, &env->vfp.fp_status);
-        set_float_rounding_mode(i, &env->vfp.fp_status_f16);
-    }
-    if (changed & FPCR_FZ16) {
-        bool ftz_enabled = val & FPCR_FZ16;
-        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
-        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
-    }
-    if (changed & FPCR_FZ) {
-        bool ftz_enabled = val & FPCR_FZ;
-        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status);
-        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status);
-    }
-    if (changed & FPCR_DN) {
-        bool dnan_enabled = val & FPCR_DN;
-        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status);
-        set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16);
      }
  
-    /* The exception flags are ORed together when we read fpscr so we
-     * only need to preserve the current state in one of our
-     * float_status values.
-     */
-    i = vfp_exceptbits_to_host(val);
-    set_float_exception_flags(i, &env->vfp.fp_status);
-    set_float_exception_flags(0, &env->vfp.fp_status_f16);
-    set_float_exception_flags(0, &env->vfp.standard_fp_status);
-}
-
-void vfp_set_fpscr(CPUARMState *env, uint32_t val)
-{
-    HELPER(vfp_set_fpscr)(env, val);
-}
-
-#define VFP_HELPER(name, p) HELPER(glue(glue(vfp_,name),p))
-
-#define VFP_BINOP(name) \
-float32 VFP_HELPER(name, s)(float32 a, float32 b, void *fpstp) \
-{ \
-    float_status *fpst = fpstp; \
-    return float32_ ## name(a, b, fpst); \
-} \
-float64 VFP_HELPER(name, d)(float64 a, float64 b, void *fpstp) \
-{ \
-    float_status *fpst = fpstp; \
-    return float64_ ## name(a, b, fpst); \
-}
-VFP_BINOP(add)
-VFP_BINOP(sub)
-VFP_BINOP(mul)
-VFP_BINOP(div)
-VFP_BINOP(min)
-VFP_BINOP(max)
-VFP_BINOP(minnum)
-VFP_BINOP(maxnum)
-#undef VFP_BINOP
-
-float32 VFP_HELPER(neg, s)(float32 a)
-{
-    return float32_chs(a);
-}
-
-float64 VFP_HELPER(neg, d)(float64 a)
-{
-    return float64_chs(a);
-}
-
-float32 VFP_HELPER(abs, s)(float32 a)
-{
-    return float32_abs(a);
-}
-
-float64 VFP_HELPER(abs, d)(float64 a)
-{
-    return float64_abs(a);
-}
-
-float32 VFP_HELPER(sqrt, s)(float32 a, CPUARMState *env)
-{
-    return float32_sqrt(a, &env->vfp.fp_status);
-}
-
-float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
-{
-    return float64_sqrt(a, &env->vfp.fp_status);
-}
-
-static void softfloat_to_vfp_compare(CPUARMState *env, int cmp)
-{
-    uint32_t flags;
-    switch (cmp) {
-    case float_relation_equal:
-        flags = 0x6;
-        break;
-    case float_relation_less:
-        flags = 0x8;
-        break;
-    case float_relation_greater:
-        flags = 0x2;
-        break;
-    case float_relation_unordered:
-        flags = 0x3;
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    env->vfp.xregs[ARM_VFP_FPSCR] =
-        deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
-}
-
-/* XXX: check quiet/signaling case */
-#define DO_VFP_cmp(p, type) \
-void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env)  \
-{ \
-    softfloat_to_vfp_compare(env, \
-        type ## _compare_quiet(a, b, &env->vfp.fp_status)); \
-} \
-void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
-{ \
-    softfloat_to_vfp_compare(env, \
-        type ## _compare(a, b, &env->vfp.fp_status)); \
-}
-DO_VFP_cmp(s, float32)
-DO_VFP_cmp(d, float64)
-#undef DO_VFP_cmp
-
-/* Integer to float and float to integer conversions */
-
-#define CONV_ITOF(name, ftype, fsz, sign)                           \
-ftype HELPER(name)(uint32_t x, void *fpstp)                         \
-{                                                                   \
-    float_status *fpst = fpstp;                                     \
-    return sign##int32_to_##float##fsz((sign##int32_t)x, fpst);     \
-}
-
-#define CONV_FTOI(name, ftype, fsz, sign, round)                \
-sign##int32_t HELPER(name)(ftype x, void *fpstp)                \
-{                                                               \
-    float_status *fpst = fpstp;                                 \
-    if (float##fsz##_is_any_nan(x)) {                           \
-        float_raise(float_flag_invalid, fpst);                  \
-        return 0;                                               \
-    }                                                           \
-    return float##fsz##_to_##sign##int32##round(x, fpst);       \
-}
-
-#define FLOAT_CONVS(name, p, ftype, fsz, sign)            \
-    CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign)        \
-    CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, )        \
-    CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero)
-
-FLOAT_CONVS(si, h, uint32_t, 16, )
-FLOAT_CONVS(si, s, float32, 32, )
-FLOAT_CONVS(si, d, float64, 64, )
-FLOAT_CONVS(ui, h, uint32_t, 16, u)
-FLOAT_CONVS(ui, s, float32, 32, u)
-FLOAT_CONVS(ui, d, float64, 64, u)
-
-#undef CONV_ITOF
-#undef CONV_FTOI
-#undef FLOAT_CONVS
-
-/* floating point conversion */
-float64 VFP_HELPER(fcvtd, s)(float32 x, CPUARMState *env)
-{
-    return float32_to_float64(x, &env->vfp.fp_status);
-}
-
-float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
-{
-    return float64_to_float32(x, &env->vfp.fp_status);
-}
-
-/* VFP3 fixed point conversion.  */
-#define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
-float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t  x, uint32_t shift, \
-                                     void *fpstp) \
-{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
-
-#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ROUND, suff)   \
-uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \
-                                            void *fpst)                   \
-{                                                                         \
-    if (unlikely(float##fsz##_is_any_nan(x))) {                           \
-        float_raise(float_flag_invalid, fpst);                            \
-        return 0;                                                         \
-    }                                                                     \
-    return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst);       \
-}
-
-#define VFP_CONV_FIX(name, p, fsz, isz, itype)                   \
-VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
-                         float_round_to_zero, _round_to_zero)    \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
-                         get_float_rounding_mode(fpst), )
-
-#define VFP_CONV_FIX_A64(name, p, fsz, isz, itype)               \
-VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
-                         get_float_rounding_mode(fpst), )
-
-VFP_CONV_FIX(sh, d, 64, 64, int16)
-VFP_CONV_FIX(sl, d, 64, 64, int32)
-VFP_CONV_FIX_A64(sq, d, 64, 64, int64)
-VFP_CONV_FIX(uh, d, 64, 64, uint16)
-VFP_CONV_FIX(ul, d, 64, 64, uint32)
-VFP_CONV_FIX_A64(uq, d, 64, 64, uint64)
-VFP_CONV_FIX(sh, s, 32, 32, int16)
-VFP_CONV_FIX(sl, s, 32, 32, int32)
-VFP_CONV_FIX_A64(sq, s, 32, 64, int64)
-VFP_CONV_FIX(uh, s, 32, 32, uint16)
-VFP_CONV_FIX(ul, s, 32, 32, uint32)
-VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
-
-#undef VFP_CONV_FIX
-#undef VFP_CONV_FIX_FLOAT
-#undef VFP_CONV_FLOAT_FIX_ROUND
-#undef VFP_CONV_FIX_A64
-
-uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
-{
-    return int32_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
-{
-    return uint32_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
-{
-    return int64_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
-{
-    return uint64_to_float16_scalbn(x, -shift, fpst);
-}
-
-uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
-{
-    if (unlikely(float16_is_any_nan(x))) {
-        float_raise(float_flag_invalid, fpst);
-        return 0;
-    }
-    return float16_to_int16_scalbn(x, get_float_rounding_mode(fpst),
-                                   shift, fpst);
-}
-
-uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst)
-{
-    if (unlikely(float16_is_any_nan(x))) {
-        float_raise(float_flag_invalid, fpst);
-        return 0;
-    }
-    return float16_to_uint16_scalbn(x, get_float_rounding_mode(fpst),
-                                    shift, fpst);
-}
-
-uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst)
-{
-    if (unlikely(float16_is_any_nan(x))) {
-        float_raise(float_flag_invalid, fpst);
-        return 0;
-    }
-    return float16_to_int32_scalbn(x, get_float_rounding_mode(fpst),
-                                   shift, fpst);
-}
-
-uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst)
-{
-    if (unlikely(float16_is_any_nan(x))) {
-        float_raise(float_flag_invalid, fpst);
-        return 0;
-    }
-    return float16_to_uint32_scalbn(x, get_float_rounding_mode(fpst),
-                                    shift, fpst);
-}
-
-uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst)
-{
-    if (unlikely(float16_is_any_nan(x))) {
-        float_raise(float_flag_invalid, fpst);
-        return 0;
-    }
-    return float16_to_int64_scalbn(x, get_float_rounding_mode(fpst),
-                                   shift, fpst);
-}
-
-uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst)
-{
-    if (unlikely(float16_is_any_nan(x))) {
-        float_raise(float_flag_invalid, fpst);
-        return 0;
-    }
-    return float16_to_uint64_scalbn(x, get_float_rounding_mode(fpst),
-                                    shift, fpst);
-}
-
-/* Set the current fp rounding mode and return the old one.
- * The argument is a softfloat float_round_ value.
- */
-uint32_t HELPER(set_rmode)(uint32_t rmode, void *fpstp)
-{
-    float_status *fp_status = fpstp;
-
-    uint32_t prev_rmode = get_float_rounding_mode(fp_status);
-    set_float_rounding_mode(rmode, fp_status);
-
-    return prev_rmode;
-}
-
-/* Set the current fp rounding mode in the standard fp status and return
- * the old one. This is for NEON instructions that need to change the
- * rounding mode but wish to use the standard FPSCR values for everything
- * else. Always set the rounding mode back to the correct value after
- * modifying it.
- * The argument is a softfloat float_round_ value.
- */
-uint32_t HELPER(set_neon_rmode)(uint32_t rmode, CPUARMState *env)
-{
-    float_status *fp_status = &env->vfp.standard_fp_status;
-
-    uint32_t prev_rmode = get_float_rounding_mode(fp_status);
-    set_float_rounding_mode(rmode, fp_status);
-
-    return prev_rmode;
-}
-
-/* Half precision conversions.  */
-float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t ahp_mode)
-{
-    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
-     * it would affect flushing input denormals.
-     */
-    float_status *fpst = fpstp;
-    flag save = get_flush_inputs_to_zero(fpst);
-    set_flush_inputs_to_zero(false, fpst);
-    float32 r = float16_to_float32(a, !ahp_mode, fpst);
-    set_flush_inputs_to_zero(save, fpst);
-    return r;
-}
-
-uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t ahp_mode)
-{
-    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
-     * it would affect flushing output denormals.
-     */
-    float_status *fpst = fpstp;
-    flag save = get_flush_to_zero(fpst);
-    set_flush_to_zero(false, fpst);
-    float16 r = float32_to_float16(a, !ahp_mode, fpst);
-    set_flush_to_zero(save, fpst);
-    return r;
-}
-
-float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t ahp_mode)
-{
-    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
-     * it would affect flushing input denormals.
-     */
-    float_status *fpst = fpstp;
-    flag save = get_flush_inputs_to_zero(fpst);
-    set_flush_inputs_to_zero(false, fpst);
-    float64 r = float16_to_float64(a, !ahp_mode, fpst);
-    set_flush_inputs_to_zero(save, fpst);
-    return r;
-}
-
-uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t ahp_mode)
-{
-    /* Squash FZ16 to 0 for the duration of conversion.  In this case,
-     * it would affect flushing output denormals.
-     */
-    float_status *fpst = fpstp;
-    flag save = get_flush_to_zero(fpst);
-    set_flush_to_zero(false, fpst);
-    float16 r = float64_to_float16(a, !ahp_mode, fpst);
-    set_flush_to_zero(save, fpst);
-    return r;
-}
-
-#define float32_two make_float32(0x40000000)
-#define float32_three make_float32(0x40400000)
-#define float32_one_point_five make_float32(0x3fc00000)
-
-float32 HELPER(recps_f32)(float32 a, float32 b, CPUARMState *env)
-{
-    float_status *s = &env->vfp.standard_fp_status;
-    if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
-        (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
-        if (!(float32_is_zero(a) || float32_is_zero(b))) {
-            float_raise(float_flag_input_denormal, s);
-        }
-        return float32_two;
-    }
-    return float32_sub(float32_two, float32_mul(a, b, s), s);
-}
-
-float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUARMState *env)
-{
-    float_status *s = &env->vfp.standard_fp_status;
-    float32 product;
-    if ((float32_is_infinity(a) && float32_is_zero_or_denormal(b)) ||
-        (float32_is_infinity(b) && float32_is_zero_or_denormal(a))) {
-        if (!(float32_is_zero(a) || float32_is_zero(b))) {
-            float_raise(float_flag_input_denormal, s);
-        }
-        return float32_one_point_five;
-    }
-    product = float32_mul(a, b, s);
-    return float32_div(float32_sub(float32_three, product, s), float32_two, s);
-}
-
-/* NEON helpers.  */
-
-/* Constants 256 and 512 are used in some helpers; we avoid relying on
- * int->float conversions at run-time.  */
-#define float64_256 make_float64(0x4070000000000000LL)
-#define float64_512 make_float64(0x4080000000000000LL)
-#define float16_maxnorm make_float16(0x7bff)
-#define float32_maxnorm make_float32(0x7f7fffff)
-#define float64_maxnorm make_float64(0x7fefffffffffffffLL)
-
-/* Reciprocal functions
- *
- * The algorithm that must be used to calculate the estimate
- * is specified by the ARM ARM, see FPRecipEstimate()/RecipEstimate
- */
-
-/* See RecipEstimate()
- *
- * input is a 9 bit fixed point number
- * input range 256 .. 511 for a number from 0.5 <= x < 1.0.
- * result range 256 .. 511 for a number from 1.0 to 511/256.
- */
-
-static int recip_estimate(int input)
-{
-    int a, b, r;
-    assert(256 <= input && input < 512);
-    a = (input * 2) + 1;
-    b = (1 << 19) / a;
-    r = (b + 1) >> 1;
-    assert(256 <= r && r < 512);
-    return r;
-}
-
-/*
- * Common wrapper to call recip_estimate
- *
- * The parameters are exponent and 64 bit fraction (without implicit
- * bit) where the binary point is nominally at bit 52. Returns a
- * float64 which can then be rounded to the appropriate size by the
- * callee.
- */
-
-static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
-{
-    uint32_t scaled, estimate;
-    uint64_t result_frac;
-    int result_exp;
-
-    /* Handle sub-normals */
-    if (*exp == 0) {
-        if (extract64(frac, 51, 1) == 0) {
-            *exp = -1;
-            frac <<= 2;
-        } else {
-            frac <<= 1;
-        }
-    }
-
-    /* scaled = UInt('1':fraction<51:44>) */
-    scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
-    estimate = recip_estimate(scaled);
-
-    result_exp = exp_off - *exp;
-    result_frac = deposit64(0, 44, 8, estimate);
-    if (result_exp == 0) {
-        result_frac = deposit64(result_frac >> 1, 51, 1, 1);
-    } else if (result_exp == -1) {
-        result_frac = deposit64(result_frac >> 2, 50, 2, 1);
-        result_exp = 0;
-    }
-
-    *exp = result_exp;
-
-    return result_frac;
-}
-
-static bool round_to_inf(float_status *fpst, bool sign_bit)
-{
-    switch (fpst->float_rounding_mode) {
-    case float_round_nearest_even: /* Round to Nearest */
-        return true;
-    case float_round_up: /* Round to +Inf */
-        return !sign_bit;
-    case float_round_down: /* Round to -Inf */
-        return sign_bit;
-    case float_round_to_zero: /* Round to Zero */
-        return false;
-    }
-
-    g_assert_not_reached();
-}
-
-uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    float16 f16 = float16_squash_input_denormal(input, fpst);
-    uint32_t f16_val = float16_val(f16);
-    uint32_t f16_sign = float16_is_neg(f16);
-    int f16_exp = extract32(f16_val, 10, 5);
-    uint32_t f16_frac = extract32(f16_val, 0, 10);
-    uint64_t f64_frac;
-
-    if (float16_is_any_nan(f16)) {
-        float16 nan = f16;
-        if (float16_is_signaling_nan(f16, fpst)) {
-            float_raise(float_flag_invalid, fpst);
-            nan = float16_silence_nan(f16, fpst);
-        }
-        if (fpst->default_nan_mode) {
-            nan =  float16_default_nan(fpst);
-        }
-        return nan;
-    } else if (float16_is_infinity(f16)) {
-        return float16_set_sign(float16_zero, float16_is_neg(f16));
-    } else if (float16_is_zero(f16)) {
-        float_raise(float_flag_divbyzero, fpst);
-        return float16_set_sign(float16_infinity, float16_is_neg(f16));
-    } else if (float16_abs(f16) < (1 << 8)) {
-        /* Abs(value) < 2.0^-16 */
-        float_raise(float_flag_overflow | float_flag_inexact, fpst);
-        if (round_to_inf(fpst, f16_sign)) {
-            return float16_set_sign(float16_infinity, f16_sign);
-        } else {
-            return float16_set_sign(float16_maxnorm, f16_sign);
-        }
-    } else if (f16_exp >= 29 && fpst->flush_to_zero) {
-        float_raise(float_flag_underflow, fpst);
-        return float16_set_sign(float16_zero, float16_is_neg(f16));
-    }
-
-    f64_frac = call_recip_estimate(&f16_exp, 29,
-                                   ((uint64_t) f16_frac) << (52 - 10));
-
-    /* result = sign : result_exp<4:0> : fraction<51:42> */
-    f16_val = deposit32(0, 15, 1, f16_sign);
-    f16_val = deposit32(f16_val, 10, 5, f16_exp);
-    f16_val = deposit32(f16_val, 0, 10, extract64(f64_frac, 52 - 10, 10));
-    return make_float16(f16_val);
-}
-
-float32 HELPER(recpe_f32)(float32 input, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    float32 f32 = float32_squash_input_denormal(input, fpst);
-    uint32_t f32_val = float32_val(f32);
-    bool f32_sign = float32_is_neg(f32);
-    int f32_exp = extract32(f32_val, 23, 8);
-    uint32_t f32_frac = extract32(f32_val, 0, 23);
-    uint64_t f64_frac;
-
-    if (float32_is_any_nan(f32)) {
-        float32 nan = f32;
-        if (float32_is_signaling_nan(f32, fpst)) {
-            float_raise(float_flag_invalid, fpst);
-            nan = float32_silence_nan(f32, fpst);
-        }
-        if (fpst->default_nan_mode) {
-            nan =  float32_default_nan(fpst);
-        }
-        return nan;
-    } else if (float32_is_infinity(f32)) {
-        return float32_set_sign(float32_zero, float32_is_neg(f32));
-    } else if (float32_is_zero(f32)) {
-        float_raise(float_flag_divbyzero, fpst);
-        return float32_set_sign(float32_infinity, float32_is_neg(f32));
-    } else if (float32_abs(f32) < (1ULL << 21)) {
-        /* Abs(value) < 2.0^-128 */
-        float_raise(float_flag_overflow | float_flag_inexact, fpst);
-        if (round_to_inf(fpst, f32_sign)) {
-            return float32_set_sign(float32_infinity, f32_sign);
-        } else {
-            return float32_set_sign(float32_maxnorm, f32_sign);
-        }
-    } else if (f32_exp >= 253 && fpst->flush_to_zero) {
-        float_raise(float_flag_underflow, fpst);
-        return float32_set_sign(float32_zero, float32_is_neg(f32));
-    }
-
-    f64_frac = call_recip_estimate(&f32_exp, 253,
-                                   ((uint64_t) f32_frac) << (52 - 23));
-
-    /* result = sign : result_exp<7:0> : fraction<51:29> */
-    f32_val = deposit32(0, 31, 1, f32_sign);
-    f32_val = deposit32(f32_val, 23, 8, f32_exp);
-    f32_val = deposit32(f32_val, 0, 23, extract64(f64_frac, 52 - 23, 23));
-    return make_float32(f32_val);
-}
-
-float64 HELPER(recpe_f64)(float64 input, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    float64 f64 = float64_squash_input_denormal(input, fpst);
-    uint64_t f64_val = float64_val(f64);
-    bool f64_sign = float64_is_neg(f64);
-    int f64_exp = extract64(f64_val, 52, 11);
-    uint64_t f64_frac = extract64(f64_val, 0, 52);
-
-    /* Deal with any special cases */
-    if (float64_is_any_nan(f64)) {
-        float64 nan = f64;
-        if (float64_is_signaling_nan(f64, fpst)) {
-            float_raise(float_flag_invalid, fpst);
-            nan = float64_silence_nan(f64, fpst);
-        }
-        if (fpst->default_nan_mode) {
-            nan =  float64_default_nan(fpst);
-        }
-        return nan;
-    } else if (float64_is_infinity(f64)) {
-        return float64_set_sign(float64_zero, float64_is_neg(f64));
-    } else if (float64_is_zero(f64)) {
-        float_raise(float_flag_divbyzero, fpst);
-        return float64_set_sign(float64_infinity, float64_is_neg(f64));
-    } else if ((f64_val & ~(1ULL << 63)) < (1ULL << 50)) {
-        /* Abs(value) < 2.0^-1024 */
-        float_raise(float_flag_overflow | float_flag_inexact, fpst);
-        if (round_to_inf(fpst, f64_sign)) {
-            return float64_set_sign(float64_infinity, f64_sign);
-        } else {
-            return float64_set_sign(float64_maxnorm, f64_sign);
-        }
-    } else if (f64_exp >= 2045 && fpst->flush_to_zero) {
-        float_raise(float_flag_underflow, fpst);
-        return float64_set_sign(float64_zero, float64_is_neg(f64));
-    }
-
-    f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
-
-    /* result = sign : result_exp<10:0> : fraction<51:0>; */
-    f64_val = deposit64(0, 63, 1, f64_sign);
-    f64_val = deposit64(f64_val, 52, 11, f64_exp);
-    f64_val = deposit64(f64_val, 0, 52, f64_frac);
-    return make_float64(f64_val);
-}
-
-/* The algorithm that must be used to calculate the estimate
- * is specified by the ARM ARM.
- */
-
-static int do_recip_sqrt_estimate(int a)
-{
-    int b, estimate;
-
-    assert(128 <= a && a < 512);
-    if (a < 256) {
-        a = a * 2 + 1;
-    } else {
-        a = (a >> 1) << 1;
-        a = (a + 1) * 2;
-    }
-    b = 512;
-    while (a * (b + 1) * (b + 1) < (1 << 28)) {
-        b += 1;
-    }
-    estimate = (b + 1) / 2;
-    assert(256 <= estimate && estimate < 512);
-
-    return estimate;
-}
-
-
-static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
-{
-    int estimate;
-    uint32_t scaled;
-
-    if (*exp == 0) {
-        while (extract64(frac, 51, 1) == 0) {
-            frac = frac << 1;
-            *exp -= 1;
-        }
-        frac = extract64(frac, 0, 51) << 1;
-    }
-
-    if (*exp & 1) {
-        /* scaled = UInt('01':fraction<51:45>) */
-        scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
-    } else {
-        /* scaled = UInt('1':fraction<51:44>) */
-        scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
-    }
-    estimate = do_recip_sqrt_estimate(scaled);
-
-    *exp = (exp_off - *exp) / 2;
-    return extract64(estimate, 0, 8) << 44;
-}
-
-uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp)
-{
-    float_status *s = fpstp;
-    float16 f16 = float16_squash_input_denormal(input, s);
-    uint16_t val = float16_val(f16);
-    bool f16_sign = float16_is_neg(f16);
-    int f16_exp = extract32(val, 10, 5);
-    uint16_t f16_frac = extract32(val, 0, 10);
-    uint64_t f64_frac;
-
-    if (float16_is_any_nan(f16)) {
-        float16 nan = f16;
-        if (float16_is_signaling_nan(f16, s)) {
-            float_raise(float_flag_invalid, s);
-            nan = float16_silence_nan(f16, s);
-        }
-        if (s->default_nan_mode) {
-            nan =  float16_default_nan(s);
-        }
-        return nan;
-    } else if (float16_is_zero(f16)) {
-        float_raise(float_flag_divbyzero, s);
-        return float16_set_sign(float16_infinity, f16_sign);
-    } else if (f16_sign) {
-        float_raise(float_flag_invalid, s);
-        return float16_default_nan(s);
-    } else if (float16_is_infinity(f16)) {
-        return float16_zero;
-    }
-
-    /* Scale and normalize to a double-precision value between 0.25 and 1.0,
-     * preserving the parity of the exponent.  */
-
-    f64_frac = ((uint64_t) f16_frac) << (52 - 10);
-
-    f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
-
-    /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
-    val = deposit32(0, 15, 1, f16_sign);
-    val = deposit32(val, 10, 5, f16_exp);
-    val = deposit32(val, 2, 8, extract64(f64_frac, 52 - 8, 8));
-    return make_float16(val);
-}
-
-float32 HELPER(rsqrte_f32)(float32 input, void *fpstp)
-{
-    float_status *s = fpstp;
-    float32 f32 = float32_squash_input_denormal(input, s);
-    uint32_t val = float32_val(f32);
-    uint32_t f32_sign = float32_is_neg(f32);
-    int f32_exp = extract32(val, 23, 8);
-    uint32_t f32_frac = extract32(val, 0, 23);
-    uint64_t f64_frac;
-
-    if (float32_is_any_nan(f32)) {
-        float32 nan = f32;
-        if (float32_is_signaling_nan(f32, s)) {
-            float_raise(float_flag_invalid, s);
-            nan = float32_silence_nan(f32, s);
-        }
-        if (s->default_nan_mode) {
-            nan =  float32_default_nan(s);
-        }
-        return nan;
-    } else if (float32_is_zero(f32)) {
-        float_raise(float_flag_divbyzero, s);
-        return float32_set_sign(float32_infinity, float32_is_neg(f32));
-    } else if (float32_is_neg(f32)) {
-        float_raise(float_flag_invalid, s);
-        return float32_default_nan(s);
-    } else if (float32_is_infinity(f32)) {
-        return float32_zero;
-    }
-
-    /* Scale and normalize to a double-precision value between 0.25 and 1.0,
-     * preserving the parity of the exponent.  */
-
-    f64_frac = ((uint64_t) f32_frac) << 29;
-
-    f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
-
-    /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
-    val = deposit32(0, 31, 1, f32_sign);
-    val = deposit32(val, 23, 8, f32_exp);
-    val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
-    return make_float32(val);
-}
-
-float64 HELPER(rsqrte_f64)(float64 input, void *fpstp)
-{
-    float_status *s = fpstp;
-    float64 f64 = float64_squash_input_denormal(input, s);
-    uint64_t val = float64_val(f64);
-    bool f64_sign = float64_is_neg(f64);
-    int f64_exp = extract64(val, 52, 11);
-    uint64_t f64_frac = extract64(val, 0, 52);
-
-    if (float64_is_any_nan(f64)) {
-        float64 nan = f64;
-        if (float64_is_signaling_nan(f64, s)) {
-            float_raise(float_flag_invalid, s);
-            nan = float64_silence_nan(f64, s);
-        }
-        if (s->default_nan_mode) {
-            nan =  float64_default_nan(s);
-        }
-        return nan;
-    } else if (float64_is_zero(f64)) {
-        float_raise(float_flag_divbyzero, s);
-        return float64_set_sign(float64_infinity, float64_is_neg(f64));
-    } else if (float64_is_neg(f64)) {
-        float_raise(float_flag_invalid, s);
-        return float64_default_nan(s);
-    } else if (float64_is_infinity(f64)) {
-        return float64_zero;
-    }
-
-    f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
-
-    /* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
-    val = deposit64(0, 61, 1, f64_sign);
-    val = deposit64(val, 52, 11, f64_exp);
-    val = deposit64(val, 44, 8, extract64(f64_frac, 52 - 8, 8));
-    return make_float64(val);
-}
-
-uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp)
-{
-    /* float_status *s = fpstp; */
-    int input, estimate;
-
-    if ((a & 0x80000000) == 0) {
-        return 0xffffffff;
-    }
-
-    input = extract32(a, 23, 9);
-    estimate = recip_estimate(input);
-
-    return deposit32(0, (32 - 9), 9, estimate);
-}
-
-uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp)
-{
-    int estimate;
-
-    if ((a & 0xc0000000) == 0) {
-        return 0xffffffff;
-    }
-
-    estimate = do_recip_sqrt_estimate(extract32(a, 23, 9));
-
-    return deposit32(0, 23, 9, estimate);
-}
-
-/* VFPv4 fused multiply-accumulate */
-float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    return float32_muladd(a, b, c, 0, fpst);
-}
-
-float64 VFP_HELPER(muladd, d)(float64 a, float64 b, float64 c, void *fpstp)
-{
-    float_status *fpst = fpstp;
-    return float64_muladd(a, b, c, 0, fpst);
-}
-
-/* ARMv8 round to integral */
-float32 HELPER(rints_exact)(float32 x, void *fp_status)
-{
-    return float32_round_to_int(x, fp_status);
-}
-
-float64 HELPER(rintd_exact)(float64 x, void *fp_status)
-{
-    return float64_round_to_int(x, fp_status);
-}
-
-float32 HELPER(rints)(float32 x, void *fp_status)
-{
-    int old_flags = get_float_exception_flags(fp_status), new_flags;
-    float32 ret;
-
-    ret = float32_round_to_int(x, fp_status);
-
-    /* Suppress any inexact exceptions the conversion produced */
-    if (!(old_flags & float_flag_inexact)) {
-        new_flags = get_float_exception_flags(fp_status);
-        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
-    }
-
-    return ret;
-}
-
-float64 HELPER(rintd)(float64 x, void *fp_status)
-{
-    int old_flags = get_float_exception_flags(fp_status), new_flags;
-    float64 ret;
-
-    ret = float64_round_to_int(x, fp_status);
-
-    new_flags = get_float_exception_flags(fp_status);
-
-    /* Suppress any inexact exceptions the conversion produced */
-    if (!(old_flags & float_flag_inexact)) {
-        new_flags = get_float_exception_flags(fp_status);
-        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
-    }
-
-    return ret;
-}
-
-/* Convert ARM rounding mode to softfloat */
-int arm_rmode_to_sf(int rmode)
-{
-    switch (rmode) {
-    case FPROUNDING_TIEAWAY:
-        rmode = float_round_ties_away;
-        break;
-    case FPROUNDING_ODD:
-        /* FIXME: add support for TIEAWAY and ODD */
-        qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
-                      rmode);
-        /* fall through for now */
-    case FPROUNDING_TIEEVEN:
-    default:
-        rmode = float_round_nearest_even;
-        break;
-    case FPROUNDING_POSINF:
-        rmode = float_round_up;
-        break;
-    case FPROUNDING_NEGINF:
-        rmode = float_round_down;
-        break;
-    case FPROUNDING_ZERO:
-        rmode = float_round_to_zero;
-        break;
-    }
-    return rmode;
-}
-
-/* CRC helpers.
- * The upper bytes of val (above the number specified by 'bytes') must have
- * been zeroed out by the caller.
- */
-uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
-{
-    uint8_t buf[4];
-
-    stl_le_p(buf, val);
-
-    /* zlib crc32 converts the accumulator and output to one's complement.  */
-    return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
-}
-
-uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
-{
-    uint8_t buf[4];
-
-    stl_le_p(buf, val);
-
-    /* Linux crc32c converts the output to one's complement.  */
-    return crc32c(acc, buf, bytes) ^ 0xffffffff;
-}
-
-/* Return the exception level to which FP-disabled exceptions should
- * be taken, or 0 if FP is enabled.
- */
-int fp_exception_el(CPUARMState *env, int cur_el)
-{
-#ifndef CONFIG_USER_ONLY
-    int fpen;
-
-    /* CPACR and the CPTR registers don't exist before v6, so FP is
-     * always accessible
-     */
-    if (!arm_feature(env, ARM_FEATURE_V6)) {
-        return 0;
-    }
-
-    /* The CPACR controls traps to EL1, or PL1 if we're 32 bit:
-     * 0, 2 : trap EL0 and EL1/PL1 accesses
-     * 1    : trap only EL0 accesses
-     * 3    : trap no accesses
-     */
-    fpen = extract32(env->cp15.cpacr_el1, 20, 2);
-    switch (fpen) {
-    case 0:
-    case 2:
-        if (cur_el == 0 || cur_el == 1) {
-            /* Trap to PL1, which might be EL1 or EL3 */
-            if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
-                return 3;
-            }
-            return 1;
-        }
-        if (cur_el == 3 && !is_a64(env)) {
-            /* Secure PL1 running at EL3 */
-            return 3;
-        }
-        break;
-    case 1:
-        if (cur_el == 0) {
-            return 1;
-        }
-        break;
-    case 3:
-        break;
-    }
-
-    /* For the CPTR registers we don't need to guard with an ARM_FEATURE
-     * check because zero bits in the registers mean "don't trap".
+    /* For the CPTR registers we don't need to guard with an ARM_FEATURE
+     * check because zero bits in the registers mean "don't trap".
       */
  
      /* CPTR_EL2 : present in v7VE or v8 */
@@ -13829,8 +13685,8 @@ int fp_exception_el(CPUARMState *env, int cur_el)
      return 0;
  }
  
-ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
-                                                bool secstate, bool priv)
+ARMMMUIdx arm_v7m_mmu_idx_all(CPUARMState *env,
+                              bool secstate, bool priv, bool negpri)
  {
      ARMMMUIdx mmu_idx = ARM_MMU_IDX_M;
  
@@ -13838,7 +13694,7 @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
          mmu_idx |= ARM_MMU_IDX_M_PRIV;
      }
  
-    if (armv7m_nvic_neg_prio_requested(env->nvic, secstate)) {
+    if (negpri) {
          mmu_idx |= ARM_MMU_IDX_M_NEGPRI;
      }
  
@@ -13849,6 +13705,14 @@ ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
      return mmu_idx;
  }
  
+ARMMMUIdx arm_v7m_mmu_idx_for_secstate_and_priv(CPUARMState *env,
+                                                bool secstate, bool priv)
+{
+    bool negpri = armv7m_nvic_neg_prio_requested(env->nvic, secstate);
+
+    return arm_v7m_mmu_idx_all(env, secstate, priv, negpri);
+}
+
  /* Return the MMU index for a v7M CPU in the specified security state */
  ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate)
  {
@@ -13894,7 +13758,7 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
      uint32_t flags = 0;
  
      if (is_a64(env)) {
-        ARMCPU *cpu = arm_env_get_cpu(env);
+        ARMCPU *cpu = env_archcpu(env);
          uint64_t sctlr;
  
          *pc = env->pc;
@@ -13936,12 +13800,8 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
              flags = FIELD_DP32(flags, TBFLAG_A64, ZCR_LEN, zcr_len);
          }
  
-        if (current_el == 0) {
-            /* FIXME: ARMv8.1-VHE S2 translation regime.  */
-            sctlr = env->cp15.sctlr_el[1];
-        } else {
-            sctlr = env->cp15.sctlr_el[current_el];
-        }
+        sctlr = arm_sctlr(env, current_el);
+
          if (cpu_isar_feature(aa64_pauth, cpu)) {
              /*
               * In order to save space in flags, we record only whether
@@ -13970,10 +13830,14 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
          flags = FIELD_DP32(flags, TBFLAG_A32, SCTLR_B, arm_sctlr_b(env));
          flags = FIELD_DP32(flags, TBFLAG_A32, NS, !access_secure_reg(env));
          if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
-            || arm_el_is_aa64(env, 1)) {
+            || arm_el_is_aa64(env, 1) || arm_feature(env, ARM_FEATURE_M)) {
              flags = FIELD_DP32(flags, TBFLAG_A32, VFPEN, 1);
          }
-        flags = FIELD_DP32(flags, TBFLAG_A32, XSCALE_CPAR, env->cp15.c15_cpar);
+        /* Note that XSCALE_CPAR shares bits with VECSTRIDE */
+        if (arm_feature(env, ARM_FEATURE_XSCALE)) {
+            flags = FIELD_DP32(flags, TBFLAG_A32,
+                               XSCALE_CPAR, env->cp15.c15_cpar);
+        }
      }
  
      flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX, arm_to_core_mmu_idx(mmu_idx));
@@ -14016,6 +13880,32 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
          flags = FIELD_DP32(flags, TBFLAG_A32, STACKCHECK, 1);
      }
  
+    if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
+        FIELD_EX32(env->v7m.fpccr[M_REG_S], V7M_FPCCR, S) != env->v7m.secure) {
+        flags = FIELD_DP32(flags, TBFLAG_A32, FPCCR_S_WRONG, 1);
+    }
+
+    if (arm_feature(env, ARM_FEATURE_M) &&
+        (env->v7m.fpccr[env->v7m.secure] & R_V7M_FPCCR_ASPEN_MASK) &&
+        (!(env->v7m.control[M_REG_S] & R_V7M_CONTROL_FPCA_MASK) ||
+         (env->v7m.secure &&
+          !(env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK)))) {
+        /*
+         * ASPEN is set, but FPCA/SFPA indicate that there is no active
+         * FP context; we must create a new FP context before executing
+         * any FP insn.
+         */
+        flags = FIELD_DP32(flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED, 1);
+    }
+
+    if (arm_feature(env, ARM_FEATURE_M)) {
+        bool is_secure = env->v7m.fpccr[M_REG_S] & R_V7M_FPCCR_S_MASK;
+
+        if (env->v7m.fpccr[is_secure] & R_V7M_FPCCR_LSPACT_MASK) {
+            flags = FIELD_DP32(flags, TBFLAG_A32, LSPACT, 1);
+        }
+    }
+
      *pflags = flags;
      *cs_base = 0;
  }
@@ -14041,7 +13931,7 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
      uint64_t pmask;
  
      assert(vq >= 1 && vq <= ARM_MAX_VQ);
-    assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
+    assert(vq <= env_archcpu(env)->sve_max_vq);
  
      /* Zap the high bits of the zregs.  */
      for (i = 0; i < 32; i++) {
@@ -14067,7 +13957,7 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
  void aarch64_sve_change_el(CPUARMState *env, int old_el,
                             int new_el, bool el0_a64)
  {
-    ARMCPU *cpu = arm_env_get_cpu(env);
+    ARMCPU *cpu = env_archcpu(env);
      int old_len, new_len;
      bool old_a64, new_a64;