target/arm: Implement HCR.VI and VF

[qemu.git] / target / arm / helper.c
diff --git a/target/arm/helper.c b/target/arm/helper.c

index 0604a0efbe2c0f64b1962170c5aa8eb32c8c5459..5017fd67e1c3f14cc559d3e3bf687521be78e47f 100644 (file)
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -17,6 +17,7 @@
  #include "exec/semihost.h"
  #include "sysemu/kvm.h"
  #include "fpu/softfloat.h"
+#include "qemu/range.h"
  
  #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
  
@@ -55,6 +56,8 @@ static void v8m_security_lookup(CPUARMState *env, uint32_t address,
                                  V8M_SAttributes *sattrs);
  #endif
  
+static void switch_mode(CPUARMState *env, int mode);
+
  static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
  {
      int nregs;
@@ -443,9 +446,11 @@ static CPAccessResult access_tdosa(CPUARMState *env, const ARMCPRegInfo *ri,
                                     bool isread)
  {
      int el = arm_current_el(env);
+    bool mdcr_el2_tdosa = (env->cp15.mdcr_el2 & MDCR_TDOSA) ||
+        (env->cp15.mdcr_el2 & MDCR_TDE) ||
+        (env->cp15.hcr_el2 & HCR_TGE);
  
-    if (el < 2 && (env->cp15.mdcr_el2 & MDCR_TDOSA)
-        && !arm_is_secure_below_el3(env)) {
+    if (el < 2 && mdcr_el2_tdosa && !arm_is_secure_below_el3(env)) {
          return CP_ACCESS_TRAP_EL2;
      }
      if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDOSA)) {
@@ -461,9 +466,11 @@ static CPAccessResult access_tdra(CPUARMState *env, const ARMCPRegInfo *ri,
                                    bool isread)
  {
      int el = arm_current_el(env);
+    bool mdcr_el2_tdra = (env->cp15.mdcr_el2 & MDCR_TDRA) ||
+        (env->cp15.mdcr_el2 & MDCR_TDE) ||
+        (env->cp15.hcr_el2 & HCR_TGE);
  
-    if (el < 2 && (env->cp15.mdcr_el2 & MDCR_TDRA)
-        && !arm_is_secure_below_el3(env)) {
+    if (el < 2 && mdcr_el2_tdra && !arm_is_secure_below_el3(env)) {
          return CP_ACCESS_TRAP_EL2;
      }
      if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDA)) {
@@ -479,9 +486,11 @@ static CPAccessResult access_tda(CPUARMState *env, const ARMCPRegInfo *ri,
                                    bool isread)
  {
      int el = arm_current_el(env);
+    bool mdcr_el2_tda = (env->cp15.mdcr_el2 & MDCR_TDA) ||
+        (env->cp15.mdcr_el2 & MDCR_TDE) ||
+        (env->cp15.hcr_el2 & HCR_TGE);
  
-    if (el < 2 && (env->cp15.mdcr_el2 & MDCR_TDA)
-        && !arm_is_secure_below_el3(env)) {
+    if (el < 2 && mdcr_el2_tda && !arm_is_secure_below_el3(env)) {
          return CP_ACCESS_TRAP_EL2;
      }
      if (el < 3 && (env->cp15.mdcr_el3 & MDCR_TDA)) {
@@ -545,12 +554,61 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
      raw_write(env, ri, value);
  }
  
+/* IS variants of TLB operations must affect all cores */
+static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
+{
+    CPUState *cs = ENV_GET_CPU(env);
+
+    tlb_flush_all_cpus_synced(cs);
+}
+
+static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
+{
+    CPUState *cs = ENV_GET_CPU(env);
+
+    tlb_flush_all_cpus_synced(cs);
+}
+
+static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
+{
+    CPUState *cs = ENV_GET_CPU(env);
+
+    tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
+}
+
+static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                             uint64_t value)
+{
+    CPUState *cs = ENV_GET_CPU(env);
+
+    tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
+}
+
+/*
+ * Non-IS variants of TLB operations are upgraded to
+ * IS versions if we are at NS EL1 and HCR_EL2.FB is set to
+ * force broadcast of these operations.
+ */
+static bool tlb_force_broadcast(CPUARMState *env)
+{
+    return (env->cp15.hcr_el2 & HCR_FB) &&
+        arm_current_el(env) == 1 && arm_is_secure_below_el3(env);
+}
+
  static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
                            uint64_t value)
  {
      /* Invalidate all (TLBIALL) */
      ARMCPU *cpu = arm_env_get_cpu(env);
  
+    if (tlb_force_broadcast(env)) {
+        tlbiall_is_write(env, NULL, value);
+        return;
+    }
+
      tlb_flush(CPU(cpu));
  }
  
@@ -560,6 +618,11 @@ static void tlbimva_write(CPUARMState *env, const ARMCPRegInfo *ri,
      /* Invalidate single TLB entry by MVA and ASID (TLBIMVA) */
      ARMCPU *cpu = arm_env_get_cpu(env);
  
+    if (tlb_force_broadcast(env)) {
+        tlbimva_is_write(env, NULL, value);
+        return;
+    }
+
      tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK);
  }
  
@@ -569,6 +632,11 @@ static void tlbiasid_write(CPUARMState *env, const ARMCPRegInfo *ri,
      /* Invalidate by ASID (TLBIASID) */
      ARMCPU *cpu = arm_env_get_cpu(env);
  
+    if (tlb_force_broadcast(env)) {
+        tlbiasid_is_write(env, NULL, value);
+        return;
+    }
+
      tlb_flush(CPU(cpu));
  }
  
@@ -578,40 +646,12 @@ static void tlbimvaa_write(CPUARMState *env, const ARMCPRegInfo *ri,
      /* Invalidate single entry by MVA, all ASIDs (TLBIMVAA) */
      ARMCPU *cpu = arm_env_get_cpu(env);
  
-    tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK);
-}
-
-/* IS variants of TLB operations must affect all cores */
-static void tlbiall_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                             uint64_t value)
-{
-    CPUState *cs = ENV_GET_CPU(env);
-
-    tlb_flush_all_cpus_synced(cs);
-}
-
-static void tlbiasid_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                             uint64_t value)
-{
-    CPUState *cs = ENV_GET_CPU(env);
-
-    tlb_flush_all_cpus_synced(cs);
-}
-
-static void tlbimva_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                             uint64_t value)
-{
-    CPUState *cs = ENV_GET_CPU(env);
-
-    tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
-}
-
-static void tlbimvaa_is_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                             uint64_t value)
-{
-    CPUState *cs = ENV_GET_CPU(env);
+    if (tlb_force_broadcast(env)) {
+        tlbimvaa_is_write(env, NULL, value);
+        return;
+    }
  
-    tlb_flush_page_all_cpus_synced(cs, value & TARGET_PAGE_MASK);
+    tlb_flush_page(CPU(cpu), value & TARGET_PAGE_MASK);
  }
  
  static void tlbiall_nsnh_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -1172,6 +1212,7 @@ static void pmcntenclr_write(CPUARMState *env, const ARMCPRegInfo *ri,
  static void pmovsr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                           uint64_t value)
  {
+    value &= pmu_counter_mask(env);
      env->cp15.c9_pmovsr &= ~value;
  }
  
@@ -1288,12 +1329,26 @@ static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri)
      CPUState *cs = ENV_GET_CPU(env);
      uint64_t ret = 0;
  
-    if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
-        ret |= CPSR_I;
+    if (arm_hcr_el2_imo(env)) {
+        if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) {
+            ret |= CPSR_I;
+        }
+    } else {
+        if (cs->interrupt_request & CPU_INTERRUPT_HARD) {
+            ret |= CPSR_I;
+        }
      }
-    if (cs->interrupt_request & CPU_INTERRUPT_FIQ) {
-        ret |= CPSR_F;
+
+    if (arm_hcr_el2_fmo(env)) {
+        if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) {
+            ret |= CPSR_F;
+        }
+    } else {
+        if (cs->interrupt_request & CPU_INTERRUPT_FIQ) {
+            ret |= CPSR_F;
+        }
      }
+
      /* External aborts are not possible in QEMU so A bit is always clear */
      return ret;
  }
@@ -1416,12 +1471,14 @@ static const ARMCPRegInfo v7_cp_reginfo[] = {
        .writefn = pmintenset_write, .raw_writefn = raw_write,
        .resetvalue = 0x0 },
      { .name = "PMINTENCLR", .cp = 15, .crn = 9, .crm = 14, .opc1 = 0, .opc2 = 2,
-      .access = PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS,
+      .access = PL1_RW, .accessfn = access_tpm,
+      .type = ARM_CP_ALIAS | ARM_CP_IO,
        .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
        .writefn = pmintenclr_write, },
      { .name = "PMINTENCLR_EL1", .state = ARM_CP_STATE_AA64,
        .opc0 = 3, .opc1 = 0, .crn = 9, .crm = 14, .opc2 = 2,
-      .access = PL1_RW, .accessfn = access_tpm, .type = ARM_CP_ALIAS,
+      .access = PL1_RW, .accessfn = access_tpm,
+      .type = ARM_CP_ALIAS | ARM_CP_IO,
        .fieldoffset = offsetof(CPUARMState, cp15.c9_pminten),
        .writefn = pmintenclr_write },
      { .name = "CCSIDR", .state = ARM_CP_STATE_BOTH,
@@ -2260,13 +2317,15 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value,
           * * The Non-secure TTBCR.EAE bit is set to 1
           * * The implementation includes EL2, and the value of HCR.VM is 1
           *
+         * (Note that HCR.DC makes HCR.VM behave as if it is 1.)
+         *
           * ATS1Hx always uses the 64bit format (not supported yet).
           */
          format64 = arm_s1_regime_using_lpae_format(env, mmu_idx);
  
          if (arm_feature(env, ARM_FEATURE_EL2)) {
              if (mmu_idx == ARMMMUIdx_S12NSE0 || mmu_idx == ARMMMUIdx_S12NSE1) {
-                format64 |= env->cp15.hcr_el2 & HCR_VM;
+                format64 |= env->cp15.hcr_el2 & (HCR_VM | HCR_DC);
              } else {
                  format64 |= arm_current_el(env) == 2;
              }
@@ -3073,22 +3132,6 @@ static CPAccessResult aa64_cacheop_access(CPUARMState *env,
   * Page D4-1736 (DDI0487A.b)
   */
  
-static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                                    uint64_t value)
-{
-    CPUState *cs = ENV_GET_CPU(env);
-
-    if (arm_is_secure_below_el3(env)) {
-        tlb_flush_by_mmuidx(cs,
-                            ARMMMUIdxBit_S1SE1 |
-                            ARMMMUIdxBit_S1SE0);
-    } else {
-        tlb_flush_by_mmuidx(cs,
-                            ARMMMUIdxBit_S12NSE1 |
-                            ARMMMUIdxBit_S12NSE0);
-    }
-}
-
  static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                        uint64_t value)
  {
@@ -3106,6 +3149,27 @@ static void tlbi_aa64_vmalle1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
      }
  }
  
+static void tlbi_aa64_vmalle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                    uint64_t value)
+{
+    CPUState *cs = ENV_GET_CPU(env);
+
+    if (tlb_force_broadcast(env)) {
+        tlbi_aa64_vmalle1_write(env, NULL, value);
+        return;
+    }
+
+    if (arm_is_secure_below_el3(env)) {
+        tlb_flush_by_mmuidx(cs,
+                            ARMMMUIdxBit_S1SE1 |
+                            ARMMMUIdxBit_S1SE0);
+    } else {
+        tlb_flush_by_mmuidx(cs,
+                            ARMMMUIdxBit_S12NSE1 |
+                            ARMMMUIdxBit_S12NSE0);
+    }
+}
+
  static void tlbi_aa64_alle1_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                    uint64_t value)
  {
@@ -3195,29 +3259,6 @@ static void tlbi_aa64_alle3is_write(CPUARMState *env, const ARMCPRegInfo *ri,
      tlb_flush_by_mmuidx_all_cpus_synced(cs, ARMMMUIdxBit_S1E3);
  }
  
-static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
-                                 uint64_t value)
-{
-    /* Invalidate by VA, EL1&0 (AArch64 version).
-     * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1,
-     * since we don't support flush-for-specific-ASID-only or
-     * flush-last-level-only.
-     */
-    ARMCPU *cpu = arm_env_get_cpu(env);
-    CPUState *cs = CPU(cpu);
-    uint64_t pageaddr = sextract64(value << 12, 0, 56);
-
-    if (arm_is_secure_below_el3(env)) {
-        tlb_flush_page_by_mmuidx(cs, pageaddr,
-                                 ARMMMUIdxBit_S1SE1 |
-                                 ARMMMUIdxBit_S1SE0);
-    } else {
-        tlb_flush_page_by_mmuidx(cs, pageaddr,
-                                 ARMMMUIdxBit_S12NSE1 |
-                                 ARMMMUIdxBit_S12NSE0);
-    }
-}
-
  static void tlbi_aa64_vae2_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                   uint64_t value)
  {
@@ -3265,6 +3306,34 @@ static void tlbi_aa64_vae1is_write(CPUARMState *env, const ARMCPRegInfo *ri,
      }
  }
  
+static void tlbi_aa64_vae1_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                                 uint64_t value)
+{
+    /* Invalidate by VA, EL1&0 (AArch64 version).
+     * Currently handles all of VAE1, VAAE1, VAALE1 and VALE1,
+     * since we don't support flush-for-specific-ASID-only or
+     * flush-last-level-only.
+     */
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
+    uint64_t pageaddr = sextract64(value << 12, 0, 56);
+
+    if (tlb_force_broadcast(env)) {
+        tlbi_aa64_vae1is_write(env, NULL, value);
+        return;
+    }
+
+    if (arm_is_secure_below_el3(env)) {
+        tlb_flush_page_by_mmuidx(cs, pageaddr,
+                                 ARMMMUIdxBit_S1SE1 |
+                                 ARMMMUIdxBit_S1SE0);
+    } else {
+        tlb_flush_page_by_mmuidx(cs, pageaddr,
+                                 ARMMMUIdxBit_S12NSE1 |
+                                 ARMMMUIdxBit_S12NSE0);
+    }
+}
+
  static void tlbi_aa64_vae2is_write(CPUARMState *env, const ARMCPRegInfo *ri,
                                     uint64_t value)
  {
@@ -3743,15 +3812,19 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
  
  /* Used to describe the behaviour of EL2 regs when EL2 does not exist.  */
  static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
-    { .name = "VBAR_EL2", .state = ARM_CP_STATE_AA64,
+    { .name = "VBAR_EL2", .state = ARM_CP_STATE_BOTH,
        .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0,
        .access = PL2_RW,
        .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore },
-    { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64,
+    { .name = "HCR_EL2", .state = ARM_CP_STATE_BOTH,
        .type = ARM_CP_NO_RAW,
        .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
        .access = PL2_RW,
-      .readfn = arm_cp_read_zero, .writefn = arm_cp_write_ignore },
+      .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0,
+      .access = PL2_RW,
+      .type = ARM_CP_CONST, .resetvalue = 0 },
      { .name = "CPTR_EL2", .state = ARM_CP_STATE_BOTH,
        .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 2,
        .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
@@ -3760,14 +3833,14 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
        .access = PL2_RW, .type = ARM_CP_CONST,
        .resetvalue = 0 },
      { .name = "HMAIR1", .state = ARM_CP_STATE_AA32,
-      .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1,
+      .cp = 15, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1,
        .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
      { .name = "AMAIR_EL2", .state = ARM_CP_STATE_BOTH,
        .opc0 = 3, .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 0,
        .access = PL2_RW, .type = ARM_CP_CONST,
        .resetvalue = 0 },
-    { .name = "HMAIR1", .state = ARM_CP_STATE_AA32,
-      .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 1,
+    { .name = "HAMAIR1", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 1,
        .access = PL2_RW, .type = ARM_CP_CONST,
        .resetvalue = 0 },
      { .name = "AFSR0_EL2", .state = ARM_CP_STATE_BOTH,
@@ -3836,12 +3909,29 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
      { .name = "HSTR_EL2", .state = ARM_CP_STATE_BOTH,
        .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 3,
        .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "FAR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0,
+      .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "HIFAR", .state = ARM_CP_STATE_AA32,
+      .type = ARM_CP_CONST,
+      .cp = 15, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 2,
+      .access = PL2_RW, .resetvalue = 0 },
+    REGINFO_SENTINEL
+};
+
+/* Ditto, but for registers which exist in ARMv8 but not v7 */
+static const ARMCPRegInfo el3_no_el2_v8_cp_reginfo[] = {
+    { .name = "HCR2", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4,
+      .access = PL2_RW,
+      .type = ARM_CP_CONST, .resetvalue = 0 },
      REGINFO_SENTINEL
  };
  
  static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
  {
      ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = ENV_GET_CPU(env);
      uint64_t valid_mask = HCR_MASK;
  
      if (arm_feature(env, ARM_FEATURE_EL3)) {
@@ -3860,39 +3950,103 @@ static void hcr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
      /* Clear RES0 bits.  */
      value &= valid_mask;
  
+    /*
+     * VI and VF are kept in cs->interrupt_request. Modifying that
+     * requires that we have the iothread lock, which is done by
+     * marking the reginfo structs as ARM_CP_IO.
+     * Note that if a write to HCR pends a VIRQ or VFIQ it is never
+     * possible for it to be taken immediately, because VIRQ and
+     * VFIQ are masked unless running at EL0 or EL1, and HCR
+     * can only be written at EL2.
+     */
+    g_assert(qemu_mutex_iothread_locked());
+    if (value & HCR_VI) {
+        cs->interrupt_request |= CPU_INTERRUPT_VIRQ;
+    } else {
+        cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ;
+    }
+    if (value & HCR_VF) {
+        cs->interrupt_request |= CPU_INTERRUPT_VFIQ;
+    } else {
+        cs->interrupt_request &= ~CPU_INTERRUPT_VFIQ;
+    }
+    value &= ~(HCR_VI | HCR_VF);
+
      /* These bits change the MMU setup:
       * HCR_VM enables stage 2 translation
       * HCR_PTW forbids certain page-table setups
       * HCR_DC Disables stage1 and enables stage2 translation
       */
-    if ((raw_read(env, ri) ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) {
+    if ((env->cp15.hcr_el2 ^ value) & (HCR_VM | HCR_PTW | HCR_DC)) {
          tlb_flush(CPU(cpu));
      }
-    raw_write(env, ri, value);
+    env->cp15.hcr_el2 = value;
+}
+
+static void hcr_writehigh(CPUARMState *env, const ARMCPRegInfo *ri,
+                          uint64_t value)
+{
+    /* Handle HCR2 write, i.e. write to high half of HCR_EL2 */
+    value = deposit64(env->cp15.hcr_el2, 32, 32, value);
+    hcr_write(env, NULL, value);
+}
+
+static void hcr_writelow(CPUARMState *env, const ARMCPRegInfo *ri,
+                         uint64_t value)
+{
+    /* Handle HCR write, i.e. write to low half of HCR_EL2 */
+    value = deposit64(env->cp15.hcr_el2, 0, 32, value);
+    hcr_write(env, NULL, value);
+}
+
+static uint64_t hcr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    /* The VI and VF bits live in cs->interrupt_request */
+    uint64_t ret = env->cp15.hcr_el2 & ~(HCR_VI | HCR_VF);
+    CPUState *cs = ENV_GET_CPU(env);
+
+    if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) {
+        ret |= HCR_VI;
+    }
+    if (cs->interrupt_request & CPU_INTERRUPT_VFIQ) {
+        ret |= HCR_VF;
+    }
+    return ret;
  }
  
  static const ARMCPRegInfo el2_cp_reginfo[] = {
      { .name = "HCR_EL2", .state = ARM_CP_STATE_AA64,
+      .type = ARM_CP_IO,
        .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
        .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
-      .writefn = hcr_write },
+      .writefn = hcr_write, .readfn = hcr_read },
+    { .name = "HCR", .state = ARM_CP_STATE_AA32,
+      .type = ARM_CP_ALIAS | ARM_CP_IO,
+      .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
+      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
+      .writefn = hcr_writelow, .readfn = hcr_read },
      { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64,
        .type = ARM_CP_ALIAS,
        .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1,
        .access = PL2_RW,
        .fieldoffset = offsetof(CPUARMState, elr_el[2]) },
-    { .name = "ESR_EL2", .state = ARM_CP_STATE_AA64,
+    { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH,
        .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0,
        .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.esr_el[2]) },
-    { .name = "FAR_EL2", .state = ARM_CP_STATE_AA64,
+    { .name = "FAR_EL2", .state = ARM_CP_STATE_BOTH,
        .opc0 = 3, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 0,
        .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.far_el[2]) },
+    { .name = "HIFAR", .state = ARM_CP_STATE_AA32,
+      .type = ARM_CP_ALIAS,
+      .cp = 15, .opc1 = 4, .crn = 6, .crm = 0, .opc2 = 2,
+      .access = PL2_RW,
+      .fieldoffset = offsetofhigh32(CPUARMState, cp15.far_el[2]) },
      { .name = "SPSR_EL2", .state = ARM_CP_STATE_AA64,
        .type = ARM_CP_ALIAS,
        .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 0,
        .access = PL2_RW,
        .fieldoffset = offsetof(CPUARMState, banked_spsr[BANK_HYP]) },
-    { .name = "VBAR_EL2", .state = ARM_CP_STATE_AA64,
+    { .name = "VBAR_EL2", .state = ARM_CP_STATE_BOTH,
        .opc0 = 3, .opc1 = 4, .crn = 12, .crm = 0, .opc2 = 0,
        .access = PL2_RW, .writefn = vbar_write,
        .fieldoffset = offsetof(CPUARMState, cp15.vbar_el[2]),
@@ -3910,7 +4064,7 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
        .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.mair_el[2]),
        .resetvalue = 0 },
      { .name = "HMAIR1", .state = ARM_CP_STATE_AA32,
-      .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1,
+      .cp = 15, .opc1 = 4, .crn = 10, .crm = 2, .opc2 = 1,
        .access = PL2_RW, .type = ARM_CP_ALIAS,
        .fieldoffset = offsetofhigh32(CPUARMState, cp15.mair_el[2]) },
      { .name = "AMAIR_EL2", .state = ARM_CP_STATE_BOTH,
@@ -3918,8 +4072,8 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
        .access = PL2_RW, .type = ARM_CP_CONST,
        .resetvalue = 0 },
      /* HAMAIR1 is mapped to AMAIR_EL2[63:32] */
-    { .name = "HMAIR1", .state = ARM_CP_STATE_AA32,
-      .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 1,
+    { .name = "HAMAIR1", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 10, .crm = 3, .opc2 = 1,
        .access = PL2_RW, .type = ARM_CP_CONST,
        .resetvalue = 0 },
      { .name = "AFSR0_EL2", .state = ARM_CP_STATE_BOTH,
@@ -4105,6 +4259,16 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
      REGINFO_SENTINEL
  };
  
+static const ARMCPRegInfo el2_v8_cp_reginfo[] = {
+    { .name = "HCR2", .state = ARM_CP_STATE_AA32,
+      .type = ARM_CP_ALIAS | ARM_CP_IO,
+      .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 4,
+      .access = PL2_RW,
+      .fieldoffset = offsetofhigh32(CPUARMState, cp15.hcr_el2),
+      .writefn = hcr_writehigh },
+    REGINFO_SENTINEL
+};
+
  static CPAccessResult nsacr_access(CPUARMState *env, const ARMCPRegInfo *ri,
                                     bool isread)
  {
@@ -4337,78 +4501,105 @@ static const ARMCPRegInfo debug_lpae_cp_reginfo[] = {
      REGINFO_SENTINEL
  };
  
-/* Return the exception level to which SVE-disabled exceptions should
- * be taken, or 0 if SVE is enabled.
+/* Return the exception level to which exceptions should be taken
+ * via SVEAccessTrap.  If an exception should be routed through
+ * AArch64.AdvSIMDFPAccessTrap, return 0; fp_exception_el should
+ * take care of raising that exception.
+ * C.f. the ARM pseudocode function CheckSVEEnabled.
   */
-static int sve_exception_el(CPUARMState *env)
+int sve_exception_el(CPUARMState *env, int el)
  {
  #ifndef CONFIG_USER_ONLY
-    unsigned current_el = arm_current_el(env);
+    if (el <= 1) {
+        bool disabled = false;
  
-    /* The CPACR.ZEN controls traps to EL1:
-     * 0, 2 : trap EL0 and EL1 accesses
-     * 1    : trap only EL0 accesses
-     * 3    : trap no accesses
-     */
-    switch (extract32(env->cp15.cpacr_el1, 16, 2)) {
-    default:
-        if (current_el <= 1) {
-            /* Trap to PL1, which might be EL1 or EL3 */
-            if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
-                return 3;
-            }
-            return 1;
+        /* The CPACR.ZEN controls traps to EL1:
+         * 0, 2 : trap EL0 and EL1 accesses
+         * 1    : trap only EL0 accesses
+         * 3    : trap no accesses
+         */
+        if (!extract32(env->cp15.cpacr_el1, 16, 1)) {
+            disabled = true;
+        } else if (!extract32(env->cp15.cpacr_el1, 17, 1)) {
+            disabled = el == 0;
          }
-        break;
-    case 1:
-        if (current_el == 0) {
-            return 1;
+        if (disabled) {
+            /* route_to_el2 */
+            return (arm_feature(env, ARM_FEATURE_EL2)
+                    && !arm_is_secure(env)
+                    && (env->cp15.hcr_el2 & HCR_TGE) ? 2 : 1);
          }
-        break;
-    case 3:
-        break;
-    }
  
-    /* Similarly for CPACR.FPEN, after having checked ZEN.  */
-    switch (extract32(env->cp15.cpacr_el1, 20, 2)) {
-    default:
-        if (current_el <= 1) {
-            if (arm_is_secure(env) && !arm_el_is_aa64(env, 3)) {
-                return 3;
-            }
-            return 1;
+        /* Check CPACR.FPEN.  */
+        if (!extract32(env->cp15.cpacr_el1, 20, 1)) {
+            disabled = true;
+        } else if (!extract32(env->cp15.cpacr_el1, 21, 1)) {
+            disabled = el == 0;
          }
-        break;
-    case 1:
-        if (current_el == 0) {
-            return 1;
+        if (disabled) {
+            return 0;
          }
-        break;
-    case 3:
-        break;
      }
  
-    /* CPTR_EL2.  Check both TZ and TFP.  */
-    if (current_el <= 2
-        && (env->cp15.cptr_el[2] & (CPTR_TFP | CPTR_TZ))
-        && !arm_is_secure_below_el3(env)) {
-        return 2;
+    /* CPTR_EL2.  Since TZ and TFP are positive,
+     * they will be zero when EL2 is not present.
+     */
+    if (el <= 2 && !arm_is_secure_below_el3(env)) {
+        if (env->cp15.cptr_el[2] & CPTR_TZ) {
+            return 2;
+        }
+        if (env->cp15.cptr_el[2] & CPTR_TFP) {
+            return 0;
+        }
      }
  
-    /* CPTR_EL3.  Check both EZ and TFP.  */
-    if (!(env->cp15.cptr_el[3] & CPTR_EZ)
-        || (env->cp15.cptr_el[3] & CPTR_TFP)) {
+    /* CPTR_EL3.  Since EZ is negative we must check for EL3.  */
+    if (arm_feature(env, ARM_FEATURE_EL3)
+        && !(env->cp15.cptr_el[3] & CPTR_EZ)) {
          return 3;
      }
  #endif
      return 0;
  }
  
+/*
+ * Given that SVE is enabled, return the vector length for EL.
+ */
+uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    uint32_t zcr_len = cpu->sve_max_vq - 1;
+
+    if (el <= 1) {
+        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
+    }
+    if (el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
+        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
+    }
+    if (el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
+        zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
+    }
+    return zcr_len;
+}
+
  static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
                        uint64_t value)
  {
+    int cur_el = arm_current_el(env);
+    int old_len = sve_zcr_len_for_el(env, cur_el);
+    int new_len;
+
      /* Bits other than [3:0] are RAZ/WI.  */
      raw_write(env, ri, value & 0xf);
+
+    /*
+     * Because we arrived here, we know both FP and SVE are enabled;
+     * otherwise we would have trapped access to the ZCR_ELn register.
+     */
+    new_len = sve_zcr_len_for_el(env, cur_el);
+    if (new_len < old_len) {
+        aarch64_sve_narrow_vq(env, new_len + 1);
+    }
  }
  
  static const ARMCPRegInfo zcr_el1_reginfo = {
@@ -4780,7 +4971,7 @@ static uint64_t id_pfr1_read(CPUARMState *env, const ARMCPRegInfo *ri)
  static uint64_t id_aa64pfr0_read(CPUARMState *env, const ARMCPRegInfo *ri)
  {
      ARMCPU *cpu = arm_env_get_cpu(env);
-    uint64_t pfr0 = cpu->id_aa64pfr0;
+    uint64_t pfr0 = cpu->isar.id_aa64pfr0;
  
      if (env->gicv3state) {
          pfr0 |= 1 << 24;
@@ -4847,27 +5038,27 @@ void register_cp_regs_for_features(ARMCPU *cpu)
              { .name = "ID_ISAR0", .state = ARM_CP_STATE_BOTH,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 0,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar0 },
+              .resetvalue = cpu->isar.id_isar0 },
              { .name = "ID_ISAR1", .state = ARM_CP_STATE_BOTH,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 1,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar1 },
+              .resetvalue = cpu->isar.id_isar1 },
              { .name = "ID_ISAR2", .state = ARM_CP_STATE_BOTH,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 2,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar2 },
+              .resetvalue = cpu->isar.id_isar2 },
              { .name = "ID_ISAR3", .state = ARM_CP_STATE_BOTH,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 3,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar3 },
+              .resetvalue = cpu->isar.id_isar3 },
              { .name = "ID_ISAR4", .state = ARM_CP_STATE_BOTH,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 4,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar4 },
+              .resetvalue = cpu->isar.id_isar4 },
              { .name = "ID_ISAR5", .state = ARM_CP_STATE_BOTH,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 5,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar5 },
+              .resetvalue = cpu->isar.id_isar5 },
              { .name = "ID_MMFR4", .state = ARM_CP_STATE_BOTH,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 6,
                .access = PL1_R, .type = ARM_CP_CONST,
@@ -4875,7 +5066,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
              { .name = "ID_ISAR6", .state = ARM_CP_STATE_BOTH,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 2, .opc2 = 7,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_isar6 },
+              .resetvalue = cpu->isar.id_isar6 },
              REGINFO_SENTINEL
          };
          define_arm_cp_regs(cpu, v6_idregs);
@@ -4946,7 +5137,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
              { .name = "ID_AA64PFR1_EL1", .state = ARM_CP_STATE_AA64,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 1,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_aa64pfr1},
+              .resetvalue = cpu->isar.id_aa64pfr1},
              { .name = "ID_AA64PFR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 2,
                .access = PL1_R, .type = ARM_CP_CONST,
@@ -4955,9 +5146,10 @@ void register_cp_regs_for_features(ARMCPU *cpu)
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 3,
                .access = PL1_R, .type = ARM_CP_CONST,
                .resetvalue = 0 },
-            { .name = "ID_AA64PFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
+            { .name = "ID_AA64ZFR0_EL1", .state = ARM_CP_STATE_AA64,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 4,
                .access = PL1_R, .type = ARM_CP_CONST,
+              /* At present, only SVEver == 0 is defined anyway.  */
                .resetvalue = 0 },
              { .name = "ID_AA64PFR5_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 4, .opc2 = 5,
@@ -5006,11 +5198,11 @@ void register_cp_regs_for_features(ARMCPU *cpu)
              { .name = "ID_AA64ISAR0_EL1", .state = ARM_CP_STATE_AA64,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 0,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_aa64isar0 },
+              .resetvalue = cpu->isar.id_aa64isar0 },
              { .name = "ID_AA64ISAR1_EL1", .state = ARM_CP_STATE_AA64,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 1,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->id_aa64isar1 },
+              .resetvalue = cpu->isar.id_aa64isar1 },
              { .name = "ID_AA64ISAR2_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 6, .opc2 = 2,
                .access = PL1_R, .type = ARM_CP_CONST,
@@ -5070,15 +5262,15 @@ void register_cp_regs_for_features(ARMCPU *cpu)
              { .name = "MVFR0_EL1", .state = ARM_CP_STATE_AA64,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 0,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->mvfr0 },
+              .resetvalue = cpu->isar.mvfr0 },
              { .name = "MVFR1_EL1", .state = ARM_CP_STATE_AA64,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 1,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->mvfr1 },
+              .resetvalue = cpu->isar.mvfr1 },
              { .name = "MVFR2_EL1", .state = ARM_CP_STATE_AA64,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 2,
                .access = PL1_R, .type = ARM_CP_CONST,
-              .resetvalue = cpu->mvfr2 },
+              .resetvalue = cpu->isar.mvfr2 },
              { .name = "MVFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64,
                .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 3, .opc2 = 3,
                .access = PL1_R, .type = ARM_CP_CONST,
@@ -5156,6 +5348,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
          };
          define_arm_cp_regs(cpu, vpidr_regs);
          define_arm_cp_regs(cpu, el2_cp_reginfo);
+        if (arm_feature(env, ARM_FEATURE_V8)) {
+            define_arm_cp_regs(cpu, el2_v8_cp_reginfo);
+        }
          /* RVBAR_EL2 is only implemented if EL2 is the highest EL */
          if (!arm_feature(env, ARM_FEATURE_EL3)) {
              ARMCPRegInfo rvbar = {
@@ -5188,6 +5383,9 @@ void register_cp_regs_for_features(ARMCPU *cpu)
              };
              define_arm_cp_regs(cpu, vpidr_regs);
              define_arm_cp_regs(cpu, el3_no_el2_cp_reginfo);
+            if (arm_feature(env, ARM_FEATURE_V8)) {
+                define_arm_cp_regs(cpu, el3_no_el2_v8_cp_reginfo);
+            }
          }
      }
      if (arm_feature(env, ARM_FEATURE_EL3)) {
@@ -5436,6 +5634,16 @@ void register_cp_regs_for_features(ARMCPU *cpu)
              REGINFO_SENTINEL
          };
          define_arm_cp_regs(cpu, auxcr_reginfo);
+        if (arm_feature(env, ARM_FEATURE_V8)) {
+            /* HACTLR2 maps to ACTLR_EL2[63:32] and is not in ARMv7 */
+            ARMCPRegInfo hactlr2_reginfo = {
+                .name = "HACTLR2", .state = ARM_CP_STATE_AA32,
+                .cp = 15, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 3,
+                .access = PL2_RW, .type = ARM_CP_CONST,
+                .resetvalue = 0
+            };
+            define_one_arm_cp_reg(cpu, &hactlr2_reginfo);
+        }
      }
  
      if (arm_feature(env, ARM_FEATURE_CBAR)) {
@@ -5508,7 +5716,7 @@ void register_cp_regs_for_features(ARMCPU *cpu)
          define_one_arm_cp_reg(cpu, &sctlr);
      }
  
-    if (arm_feature(env, ARM_FEATURE_SVE)) {
+    if (cpu_isar_feature(aa64_sve, cpu)) {
          define_one_arm_cp_reg(cpu, &zcr_el1_reginfo);
          if (arm_feature(env, ARM_FEATURE_EL2)) {
              define_one_arm_cp_reg(cpu, &zcr_el2_reginfo);
@@ -5590,12 +5798,6 @@ void arm_cpu_list(FILE *f, fprintf_function cpu_fprintf)
      (*cpu_fprintf)(f, "Available CPUs:\n");
      g_slist_foreach(list, arm_cpu_list_entry, &s);
      g_slist_free(list);
-#ifdef CONFIG_KVM
-    /* The 'host' CPU type is dynamically registered only if KVM is
-     * enabled, so we have to special-case it here:
-     */
-    (*cpu_fprintf)(f, "  host (only available in KVM mode)\n");
-#endif
  }
  
  static void arm_cpu_add_definition(gpointer data, gpointer user_data)
@@ -6104,7 +6306,17 @@ void cpsr_write(CPUARMState *env, uint32_t val, uint32_t mask,
                  mask |= CPSR_IL;
                  val |= CPSR_IL;
              }
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "Illegal AArch32 mode switch attempt from %s to %s\n",
+                          aarch32_mode_name(env->uncached_cpsr),
+                          aarch32_mode_name(val));
          } else {
+            qemu_log_mask(CPU_LOG_INT, "%s %s to %s PC 0x%" PRIx32 "\n",
+                          write_type == CPSRWriteExceptionReturn ?
+                          "Exception return from AArch32" :
+                          "AArch32 mode switch from",
+                          aarch32_mode_name(env->uncached_cpsr),
+                          aarch32_mode_name(val), env->regs[15]);
              switch_mode(env, val & CPSR_M);
          }
      }
@@ -6202,7 +6414,7 @@ uint32_t HELPER(v7m_tt)(CPUARMState *env, uint32_t addr, uint32_t op)
      return 0;
  }
  
-void switch_mode(CPUARMState *env, int mode)
+static void switch_mode(CPUARMState *env, int mode)
  {
      ARMCPU *cpu = arm_env_get_cpu(env);
  
@@ -6224,7 +6436,7 @@ void aarch64_sync_64_to_32(CPUARMState *env)
  
  #else
  
-void switch_mode(CPUARMState *env, int mode)
+static void switch_mode(CPUARMState *env, int mode)
  {
      int old_mode;
      int i;
@@ -6335,15 +6547,15 @@ uint32_t arm_phys_excp_target_el(CPUState *cs, uint32_t excp_idx,
      switch (excp_idx) {
      case EXCP_IRQ:
          scr = ((env->cp15.scr_el3 & SCR_IRQ) == SCR_IRQ);
-        hcr = ((env->cp15.hcr_el2 & HCR_IMO) == HCR_IMO);
+        hcr = arm_hcr_el2_imo(env);
          break;
      case EXCP_FIQ:
          scr = ((env->cp15.scr_el3 & SCR_FIQ) == SCR_FIQ);
-        hcr = ((env->cp15.hcr_el2 & HCR_FMO) == HCR_FMO);
+        hcr = arm_hcr_el2_fmo(env);
          break;
      default:
          scr = ((env->cp15.scr_el3 & SCR_EA) == SCR_EA);
-        hcr = ((env->cp15.hcr_el2 & HCR_AMO) == HCR_AMO);
+        hcr = arm_hcr_el2_amo(env);
          break;
      };
  
@@ -6368,7 +6580,7 @@ static bool v7m_stack_write(ARMCPU *cpu, uint32_t addr, uint32_t value,
      target_ulong page_size;
      hwaddr physaddr;
      int prot;
-    ARMMMUFaultInfo fi;
+    ARMMMUFaultInfo fi = {};
      bool secure = mmu_idx & ARM_MMU_IDX_M_S;
      int exc;
      bool exc_secure;
@@ -6430,7 +6642,7 @@ static bool v7m_stack_read(ARMCPU *cpu, uint32_t *dest, uint32_t addr,
      target_ulong page_size;
      hwaddr physaddr;
      int prot;
-    ARMMMUFaultInfo fi;
+    ARMMMUFaultInfo fi = {};
      bool secure = mmu_idx & ARM_MMU_IDX_M_S;
      int exc;
      bool exc_secure;
@@ -6481,18 +6693,6 @@ pend_fault:
      return false;
  }
  
-/* Return true if we're using the process stack pointer (not the MSP) */
-static bool v7m_using_psp(CPUARMState *env)
-{
-    /* Handler mode always uses the main stack; for thread mode
-     * the CONTROL.SPSEL bit determines the answer.
-     * Note that in v7M it is not possible to be in Handler mode with
-     * CONTROL.SPSEL non-zero, but in v8M it is, so we must check both.
-     */
-    return !arm_v7m_is_handler_mode(env) &&
-        env->v7m.control[env->v7m.secure] & R_V7M_CONTROL_SPSEL_MASK;
-}
-
  /* Write to v7M CONTROL.SPSEL bit for the specified security bank.
   * This may change the current stack pointer between Main and Process
   * stack pointers if it is done for the CONTROL register for the current
@@ -6649,6 +6849,10 @@ void HELPER(v7m_blxns)(CPUARMState *env, uint32_t dest)
                        "BLXNS with misaligned SP is UNPREDICTABLE\n");
      }
  
+    if (sp < v7m_sp_limit(env)) {
+        raise_exception(env, EXCP_STKOF, 0, 1);
+    }
+
      saved_psr = env->v7m.exception;
      if (env->v7m.control[M_REG_S] & R_V7M_CONTROL_SFPA_MASK) {
          saved_psr |= XPSR_SFPA;
@@ -6778,6 +6982,8 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
      uint32_t frameptr;
      ARMMMUIdx mmu_idx;
      bool stacked_ok;
+    uint32_t limit;
+    bool want_psp;
  
      if (dotailchain) {
          bool mode = lr & R_V7M_EXCRET_MODE_MASK;
@@ -6787,12 +6993,34 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
          mmu_idx = arm_v7m_mmu_idx_for_secstate_and_priv(env, M_REG_S, priv);
          frame_sp_p = get_v7m_sp_ptr(env, M_REG_S, mode,
                                      lr & R_V7M_EXCRET_SPSEL_MASK);
+        want_psp = mode && (lr & R_V7M_EXCRET_SPSEL_MASK);
+        if (want_psp) {
+            limit = env->v7m.psplim[M_REG_S];
+        } else {
+            limit = env->v7m.msplim[M_REG_S];
+        }
      } else {
          mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
          frame_sp_p = &env->regs[13];
+        limit = v7m_sp_limit(env);
      }
  
      frameptr = *frame_sp_p - 0x28;
+    if (frameptr < limit) {
+        /*
+         * Stack limit failure: set SP to the limit value, and generate
+         * STKOF UsageFault. Stack pushes below the limit must not be
+         * performed. It is IMPDEF whether pushes above the limit are
+         * performed; we choose not to.
+         */
+        qemu_log_mask(CPU_LOG_INT,
+                      "...STKOF during callee-saves register stacking\n");
+        env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+                                env->v7m.secure);
+        *frame_sp_p = limit;
+        return true;
+    }
  
      /* Write as much of the stack frame as we can. A write failure may
       * cause us to pend a derived exception.
@@ -6816,10 +7044,7 @@ static bool v7m_push_callee_stack(ARMCPU *cpu, uint32_t lr, bool dotailchain,
          v7m_stack_write(cpu, frameptr + 0x24, env->regs[11], mmu_idx,
                          ignore_faults);
  
-    /* Update SP regardless of whether any of the stack accesses failed.
-     * When we implement v8M stack limit checking then this attempt to
-     * update SP might also fail and result in a derived exception.
-     */
+    /* Update SP regardless of whether any of the stack accesses failed. */
      *frame_sp_p = frameptr;
  
      return !stacked_ok;
@@ -6839,6 +7064,8 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
      bool push_failed = false;
  
      armv7m_nvic_get_pending_irq_info(env->nvic, &exc, &targets_secure);
+    qemu_log_mask(CPU_LOG_INT, "...taking pending %s exception %d\n",
+                  targets_secure ? "secure" : "nonsecure", exc);
  
      if (arm_feature(env, ARM_FEATURE_V8)) {
          if (arm_feature(env, ARM_FEATURE_M_SECURITY) &&
@@ -6863,7 +7090,7 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
                   * not already saved.
                   */
                  if (lr & R_V7M_EXCRET_DCRS_MASK &&
-                    !(dotailchain && (lr & R_V7M_EXCRET_ES_MASK))) {
+                    !(dotailchain && !(lr & R_V7M_EXCRET_ES_MASK))) {
                      push_failed = v7m_push_callee_stack(cpu, lr, dotailchain,
                                                          ignore_stackfaults);
                  }
@@ -6912,12 +7139,15 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr, bool dotailchain,
           * we might now want to take a different exception which
           * targets a different security state, so try again from the top.
           */
+        qemu_log_mask(CPU_LOG_INT,
+                      "...derived exception on callee-saves register stacking");
          v7m_exception_taken(cpu, lr, true, true);
          return;
      }
  
      if (!arm_v7m_load_vector(cpu, exc, targets_secure, &addr)) {
          /* Vector load failed: derived exception */
+        qemu_log_mask(CPU_LOG_INT, "...derived exception on vector table load");
          v7m_exception_taken(cpu, lr, true, true);
          return;
      }
@@ -6960,7 +7190,27 @@ static bool v7m_push_stack(ARMCPU *cpu)
          xpsr |= XPSR_SPREALIGN;
      }
  
-    frameptr -= 0x20;
+    frameptr -= 0x20;
+
+    if (arm_feature(env, ARM_FEATURE_V8)) {
+        uint32_t limit = v7m_sp_limit(env);
+
+        if (frameptr < limit) {
+            /*
+             * Stack limit failure: set SP to the limit value, and generate
+             * STKOF UsageFault. Stack pushes below the limit must not be
+             * performed. It is IMPDEF whether pushes above the limit are
+             * performed; we choose not to.
+             */
+            qemu_log_mask(CPU_LOG_INT,
+                          "...STKOF during stacking\n");
+            env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+            armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
+                                    env->v7m.secure);
+            env->regs[13] = limit;
+            return true;
+        }
+    }
  
      /* Write as much of the stack frame as we can. If we fail a stack
       * write this will result in a derived exception being pended
@@ -6977,10 +7227,7 @@ static bool v7m_push_stack(ARMCPU *cpu)
          v7m_stack_write(cpu, frameptr + 24, env->regs[15], mmu_idx, false) &&
          v7m_stack_write(cpu, frameptr + 28, xpsr, mmu_idx, false);
  
-    /* Update SP regardless of whether any of the stack accesses failed.
-     * When we implement v8M stack limit checking then this attempt to
-     * update SP might also fail and result in a derived exception.
-     */
+    /* Update SP regardless of whether any of the stack accesses failed. */
      env->regs[13] = frameptr;
  
      return !stacked_ok;
@@ -7046,6 +7293,7 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
              /* For all other purposes, treat ES as 0 (R_HXSR) */
              excret &= ~R_V7M_EXCRET_ES_MASK;
          }
+        exc_secure = excret & R_V7M_EXCRET_ES_MASK;
      }
  
      if (env->v7m.exception != ARMV7M_EXCP_NMI) {
@@ -7056,7 +7304,6 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
           * which security state's faultmask to clear. (v8M ARM ARM R_KBNF.)
           */
          if (arm_feature(env, ARM_FEATURE_M_SECURITY)) {
-            exc_secure = excret & R_V7M_EXCRET_ES_MASK;
              if (armv7m_nvic_raw_execution_priority(env->nvic) >= 0) {
                  env->v7m.faultmask[exc_secure] = 0;
              }
@@ -7125,12 +7372,22 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
          }
      }
  
+    /*
+     * Set CONTROL.SPSEL from excret.SPSEL. Since we're still in
+     * Handler mode (and will be until we write the new XPSR.Interrupt
+     * field) this does not switch around the current stack pointer.
+     * We must do this before we do any kind of tailchaining, including
+     * for the derived exceptions on integrity check failures, or we will
+     * give the guest an incorrect EXCRET.SPSEL value on exception entry.
+     */
+    write_v7m_control_spsel_for_secstate(env, return_to_sp_process, exc_secure);
+
      if (sfault) {
          env->v7m.sfsr |= R_V7M_SFSR_INVER_MASK;
          armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
-        v7m_exception_taken(cpu, excret, true, false);
          qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing "
                        "stackframe: failed EXC_RETURN.ES validity check\n");
+        v7m_exception_taken(cpu, excret, true, false);
          return;
      }
  
@@ -7140,17 +7397,27 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
           */
          env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
          armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
-        v7m_exception_taken(cpu, excret, true, false);
          qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
                        "stackframe: failed exception return integrity check\n");
+        v7m_exception_taken(cpu, excret, true, false);
          return;
      }
  
-    /* Set CONTROL.SPSEL from excret.SPSEL. Since we're still in
-     * Handler mode (and will be until we write the new XPSR.Interrupt
-     * field) this does not switch around the current stack pointer.
-     */
-    write_v7m_control_spsel_for_secstate(env, return_to_sp_process, exc_secure);
+    /*
+     * Tailchaining: if there is currently a pending exception that
+     * is high enough priority to preempt execution at the level we're
+     * about to return to, then just directly take that exception now,
+     * avoiding an unstack-and-then-stack. Note that now we have
+     * deactivated the previous exception by calling armv7m_nvic_complete_irq()
+     * our current execution priority is already the execution priority we are
+     * returning to -- none of the state we would unstack or set based on
+     * the EXCRET value affects it.
+     */
+    if (armv7m_nvic_can_take_pending_exception(env->nvic)) {
+        qemu_log_mask(CPU_LOG_INT, "...tailchaining to pending exception\n");
+        v7m_exception_taken(cpu, excret, true, false);
+        return;
+    }
  
      switch_v7m_security_state(env, return_to_secure);
  
@@ -7197,15 +7464,14 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
                  /* Take a SecureFault on the current stack */
                  env->v7m.sfsr |= R_V7M_SFSR_INVIS_MASK;
                  armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SECURE, false);
-                v7m_exception_taken(cpu, excret, true, false);
                  qemu_log_mask(CPU_LOG_INT, "...taking SecureFault on existing "
                                "stackframe: failed exception return integrity "
                                "signature check\n");
+                v7m_exception_taken(cpu, excret, true, false);
                  return;
              }
  
              pop_ok = pop_ok &&
-                v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) &&
                  v7m_stack_read(cpu, &env->regs[4], frameptr + 0x8, mmu_idx) &&
                  v7m_stack_read(cpu, &env->regs[5], frameptr + 0xc, mmu_idx) &&
                  v7m_stack_read(cpu, &env->regs[6], frameptr + 0x10, mmu_idx) &&
@@ -7233,6 +7499,7 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
              /* v7m_stack_read() pended a fault, so take it (as a tail
               * chained exception on the same stack frame)
               */
+            qemu_log_mask(CPU_LOG_INT, "...derived exception on unstacking\n");
              v7m_exception_taken(cpu, excret, true, false);
              return;
          }
@@ -7269,10 +7536,10 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
                  armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE,
                                          env->v7m.secure);
                  env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
-                v7m_exception_taken(cpu, excret, true, false);
                  qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on existing "
                                "stackframe: failed exception return integrity "
                                "check\n");
+                v7m_exception_taken(cpu, excret, true, false);
                  return;
              }
          }
@@ -7308,9 +7575,9 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
          armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, false);
          env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVPC_MASK;
          ignore_stackfaults = v7m_push_stack(cpu);
-        v7m_exception_taken(cpu, excret, false, ignore_stackfaults);
          qemu_log_mask(CPU_LOG_INT, "...taking UsageFault on new stackframe: "
                        "failed exception return integrity check\n");
+        v7m_exception_taken(cpu, excret, false, ignore_stackfaults);
          return;
      }
  
@@ -7413,6 +7680,7 @@ static void arm_log_exception(int idx)
              [EXCP_SEMIHOST] = "Semihosting call",
              [EXCP_NOCP] = "v7M NOCP UsageFault",
              [EXCP_INVSTATE] = "v7M INVSTATE UsageFault",
+            [EXCP_STKOF] = "v8M STKOF UsageFault",
          };
  
          if (idx >= 0 && idx < ARRAY_SIZE(excnames)) {
@@ -7568,6 +7836,10 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
          armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
          env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_INVSTATE_MASK;
          break;
+    case EXCP_STKOF:
+        armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_USAGE, env->v7m.secure);
+        env->v7m.cfsr[env->v7m.secure] |= R_V7M_CFSR_STKOF_MASK;
+        break;
      case EXCP_SWI:
          /* The PC already points to the next instruction.  */
          armv7m_nvic_set_pending(env->nvic, ARMV7M_EXCP_SVC, env->v7m.secure);
@@ -7726,7 +7998,6 @@ void arm_v7m_cpu_do_interrupt(CPUState *cs)
  
      ignore_stackfaults = v7m_push_stack(cpu);
      v7m_exception_taken(cpu, lr, false, ignore_stackfaults);
-    qemu_log_mask(CPU_LOG_INT, "... as %d\n", env->v7m.exception);
  }
  
  /* Function used to synchronize QEMU's AArch64 register set with AArch32
@@ -7935,6 +8206,125 @@ void aarch64_sync_64_to_32(CPUARMState *env)
      env->regs[15] = env->pc;
  }
  
+static void take_aarch32_exception(CPUARMState *env, int new_mode,
+                                   uint32_t mask, uint32_t offset,
+                                   uint32_t newpc)
+{
+    /* Change the CPU state so as to actually take the exception. */
+    switch_mode(env, new_mode);
+    /*
+     * For exceptions taken to AArch32 we must clear the SS bit in both
+     * PSTATE and in the old-state value we save to SPSR_<mode>, so zero it now.
+     */
+    env->uncached_cpsr &= ~PSTATE_SS;
+    env->spsr = cpsr_read(env);
+    /* Clear IT bits.  */
+    env->condexec_bits = 0;
+    /* Switch to the new mode, and to the correct instruction set.  */
+    env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode;
+    /* Set new mode endianness */
+    env->uncached_cpsr &= ~CPSR_E;
+    if (env->cp15.sctlr_el[arm_current_el(env)] & SCTLR_EE) {
+        env->uncached_cpsr |= CPSR_E;
+    }
+    /* J and IL must always be cleared for exception entry */
+    env->uncached_cpsr &= ~(CPSR_IL | CPSR_J);
+    env->daif |= mask;
+
+    if (new_mode == ARM_CPU_MODE_HYP) {
+        env->thumb = (env->cp15.sctlr_el[2] & SCTLR_TE) != 0;
+        env->elr_el[2] = env->regs[15];
+    } else {
+        /*
+         * this is a lie, as there was no c1_sys on V4T/V5, but who cares
+         * and we should just guard the thumb mode on V4
+         */
+        if (arm_feature(env, ARM_FEATURE_V4T)) {
+            env->thumb =
+                (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_TE) != 0;
+        }
+        env->regs[14] = env->regs[15] + offset;
+    }
+    env->regs[15] = newpc;
+}
+
+static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs)
+{
+    /*
+     * Handle exception entry to Hyp mode; this is sufficiently
+     * different to entry to other AArch32 modes that we handle it
+     * separately here.
+     *
+     * The vector table entry used is always the 0x14 Hyp mode entry point,
+     * unless this is an UNDEF/HVC/abort taken from Hyp to Hyp.
+     * The offset applied to the preferred return address is always zero
+     * (see DDI0487C.a section G1.12.3).
+     * PSTATE A/I/F masks are set based only on the SCR.EA/IRQ/FIQ values.
+     */
+    uint32_t addr, mask;
+    ARMCPU *cpu = ARM_CPU(cs);
+    CPUARMState *env = &cpu->env;
+
+    switch (cs->exception_index) {
+    case EXCP_UDEF:
+        addr = 0x04;
+        break;
+    case EXCP_SWI:
+        addr = 0x14;
+        break;
+    case EXCP_BKPT:
+        /* Fall through to prefetch abort.  */
+    case EXCP_PREFETCH_ABORT:
+        env->cp15.ifar_s = env->exception.vaddress;
+        qemu_log_mask(CPU_LOG_INT, "...with HIFAR 0x%x\n",
+                      (uint32_t)env->exception.vaddress);
+        addr = 0x0c;
+        break;
+    case EXCP_DATA_ABORT:
+        env->cp15.dfar_s = env->exception.vaddress;
+        qemu_log_mask(CPU_LOG_INT, "...with HDFAR 0x%x\n",
+                      (uint32_t)env->exception.vaddress);
+        addr = 0x10;
+        break;
+    case EXCP_IRQ:
+        addr = 0x18;
+        break;
+    case EXCP_FIQ:
+        addr = 0x1c;
+        break;
+    case EXCP_HVC:
+        addr = 0x08;
+        break;
+    case EXCP_HYP_TRAP:
+        addr = 0x14;
+    default:
+        cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
+    }
+
+    if (cs->exception_index != EXCP_IRQ && cs->exception_index != EXCP_FIQ) {
+        env->cp15.esr_el[2] = env->exception.syndrome;
+    }
+
+    if (arm_current_el(env) != 2 && addr < 0x14) {
+        addr = 0x14;
+    }
+
+    mask = 0;
+    if (!(env->cp15.scr_el3 & SCR_EA)) {
+        mask |= CPSR_A;
+    }
+    if (!(env->cp15.scr_el3 & SCR_IRQ)) {
+        mask |= CPSR_I;
+    }
+    if (!(env->cp15.scr_el3 & SCR_FIQ)) {
+        mask |= CPSR_F;
+    }
+
+    addr += env->cp15.hvbar;
+
+    take_aarch32_exception(env, ARM_CPU_MODE_HYP, mask, 0, addr);
+}
+
  static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
  {
      ARMCPU *cpu = ARM_CPU(cs);
@@ -7970,6 +8360,11 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
          env->cp15.mdscr_el1 = deposit64(env->cp15.mdscr_el1, 2, 4, moe);
      }
  
+    if (env->exception.target_el == 2) {
+        arm_cpu_do_interrupt_aarch32_hyp(cs);
+        return;
+    }
+
      /* TODO: Vectored interrupt controller.  */
      switch (cs->exception_index) {
      case EXCP_UDEF:
@@ -8077,29 +8472,7 @@ static void arm_cpu_do_interrupt_aarch32(CPUState *cs)
          env->cp15.scr_el3 &= ~SCR_NS;
      }
  
-    switch_mode (env, new_mode);
-    /* For exceptions taken to AArch32 we must clear the SS bit in both
-     * PSTATE and in the old-state value we save to SPSR_<mode>, so zero it now.
-     */
-    env->uncached_cpsr &= ~PSTATE_SS;
-    env->spsr = cpsr_read(env);
-    /* Clear IT bits.  */
-    env->condexec_bits = 0;
-    /* Switch to the new mode, and to the correct instruction set.  */
-    env->uncached_cpsr = (env->uncached_cpsr & ~CPSR_M) | new_mode;
-    /* Set new mode endianness */
-    env->uncached_cpsr &= ~CPSR_E;
-    if (env->cp15.sctlr_el[arm_current_el(env)] & SCTLR_EE) {
-        env->uncached_cpsr |= CPSR_E;
-    }
-    env->daif |= mask;
-    /* this is a lie, as the was no c1_sys on V4T/V5, but who cares
-     * and we should just guard the thumb mode on V4 */
-    if (arm_feature(env, ARM_FEATURE_V4T)) {
-        env->thumb = (A32_BANKED_CURRENT_REG_GET(env, sctlr) & SCTLR_TE) != 0;
-    }
-    env->regs[14] = env->regs[15] + offset;
-    env->regs[15] = addr;
+    take_aarch32_exception(env, new_mode, mask, offset, addr);
  }
  
  /* Handle exception entry to a target EL which is using AArch64 */
@@ -8110,8 +8483,15 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
      unsigned int new_el = env->exception.target_el;
      target_ulong addr = env->cp15.vbar_el[new_el];
      unsigned int new_mode = aarch64_pstate_mode(new_el, true);
+    unsigned int cur_el = arm_current_el(env);
+
+    /*
+     * Note that new_el can never be 0.  If cur_el is 0, then
+     * el0_a64 is is_a64(), else el0_a64 is ignored.
+     */
+    aarch64_sve_change_el(env, cur_el, new_el, is_a64(env));
  
-    if (arm_current_el(env) < new_el) {
+    if (cur_el < new_el) {
          /* Entry vector offset depends on whether the implemented EL
           * immediately lower than the target level is using AArch32 or AArch64
           */
@@ -8393,8 +8773,23 @@ static inline bool regime_translation_disabled(CPUARMState *env,
      }
  
      if (mmu_idx == ARMMMUIdx_S2NS) {
-        return (env->cp15.hcr_el2 & HCR_VM) == 0;
+        /* HCR.DC means HCR.VM behaves as 1 */
+        return (env->cp15.hcr_el2 & (HCR_DC | HCR_VM)) == 0;
+    }
+
+    if (env->cp15.hcr_el2 & HCR_TGE) {
+        /* TGE means that NS EL0/1 act as if SCTLR_EL1.M is zero */
+        if (!regime_is_secure(env, mmu_idx) && regime_el(env, mmu_idx) == 1) {
+            return true;
+        }
      }
+
+    if ((env->cp15.hcr_el2 & HCR_DC) &&
+        (mmu_idx == ARMMMUIdx_S1NSE0 || mmu_idx == ARMMMUIdx_S1NSE1)) {
+        /* HCR.DC means SCTLR_EL1.M behaves as 0 */
+        return true;
+    }
+
      return (regime_sctlr(env, mmu_idx) & SCTLR_M) == 0;
  }
  
@@ -9669,6 +10064,20 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
              }
  
              if (address < base || address > base + rmask) {
+                /*
+                 * Address not in this region. We must check whether the
+                 * region covers addresses in the same page as our address.
+                 * In that case we must not report a size that covers the
+                 * whole page for a subsequent hit against a different MPU
+                 * region or the background region, because it would result in
+                 * incorrect TLB hits for subsequent accesses to addresses that
+                 * are in this MPU region.
+                 */
+                if (ranges_overlap(base, rmask,
+                                   address & TARGET_PAGE_MASK,
+                                   TARGET_PAGE_SIZE)) {
+                    *page_size = 1;
+                }
                  continue;
              }
  
@@ -9786,17 +10195,6 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, uint32_t address,
  
      fi->type = ARMFault_Permission;
      fi->level = 1;
-    /*
-     * Core QEMU code can't handle execution from small pages yet, so
-     * don't try it. This way we'll get an MPU exception, rather than
-     * eventually causing QEMU to exit in get_page_addr_code().
-     */
-    if (*page_size < TARGET_PAGE_SIZE && (*prot & PAGE_EXEC)) {
-        qemu_log_mask(LOG_UNIMP,
-                      "MPU: No support for execution from regions "
-                      "smaller than 1K\n");
-        *prot &= ~PAGE_EXEC;
-    }
      return !(*prot & (1 << access_type));
  }
  
@@ -9888,6 +10286,22 @@ static void v8m_security_lookup(CPUARMState *env, uint32_t address,
                          sattrs->srvalid = true;
                          sattrs->sregion = r;
                      }
+                } else {
+                    /*
+                     * Address not in this region. We must check whether the
+                     * region covers addresses in the same page as our address.
+                     * In that case we must not report a size that covers the
+                     * whole page for a subsequent hit against a different MPU
+                     * region or the background region, because it would result
+                     * in incorrect TLB hits for subsequent accesses to
+                     * addresses that are in this MPU region.
+                     */
+                    if (limit >= base &&
+                        ranges_overlap(base, limit - base + 1,
+                                       addr_page_base,
+                                       TARGET_PAGE_SIZE)) {
+                        sattrs->subpage = true;
+                    }
                  }
              }
          }
@@ -9963,6 +10377,21 @@ static bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
              }
  
              if (address < base || address > limit) {
+                /*
+                 * Address not in this region. We must check whether the
+                 * region covers addresses in the same page as our address.
+                 * In that case we must not report a size that covers the
+                 * whole page for a subsequent hit against a different MPU
+                 * region or the background region, because it would result in
+                 * incorrect TLB hits for subsequent accesses to addresses that
+                 * are in this MPU region.
+                 */
+                if (limit >= base &&
+                    ranges_overlap(base, limit - base + 1,
+                                   addr_page_base,
+                                   TARGET_PAGE_SIZE)) {
+                    *is_subpage = true;
+                }
                  continue;
              }
  
@@ -10016,18 +10445,6 @@ static bool pmsav8_mpu_lookup(CPUARMState *env, uint32_t address,
  
      fi->type = ARMFault_Permission;
      fi->level = 1;
-    /*
-     * Core QEMU code can't handle execution from small pages yet, so
-     * don't try it. This means any attempted execution will generate
-     * an MPU exception, rather than eventually causing QEMU to exit in
-     * get_page_addr_code().
-     */
-    if (*is_subpage && (*prot & PAGE_EXEC)) {
-        qemu_log_mask(LOG_UNIMP,
-                      "MPU: No support for execution from regions "
-                      "smaller than 1K\n");
-        *prot &= ~PAGE_EXEC;
-    }
      return !(*prot & (1 << access_type));
  }
  
@@ -10353,6 +10770,16 @@ static bool get_phys_addr(CPUARMState *env, target_ulong address,
  
              /* Combine the S1 and S2 cache attributes, if needed */
              if (!ret && cacheattrs != NULL) {
+                if (env->cp15.hcr_el2 & HCR_DC) {
+                    /*
+                     * HCR.DC forces the first stage attributes to
+                     *  Normal Non-Shareable,
+                     *  Inner Write-Back Read-Allocate Write-Allocate,
+                     *  Outer Write-Back Read-Allocate Write-Allocate.
+                     */
+                    cacheattrs->attrs = 0xff;
+                    cacheattrs->shareability = 0;
+                }
                  *cacheattrs = combine_cacheattrs(*cacheattrs, cacheattrs2);
              }
  
@@ -10670,13 +11097,13 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
              env->v7m.primask[M_REG_NS] = val & 1;
              return;
          case 0x91: /* BASEPRI_NS */
-            if (!env->v7m.secure) {
+            if (!env->v7m.secure || !arm_feature(env, ARM_FEATURE_M_MAIN)) {
                  return;
              }
              env->v7m.basepri[M_REG_NS] = val & 0xff;
              return;
          case 0x93: /* FAULTMASK_NS */
-            if (!env->v7m.secure) {
+            if (!env->v7m.secure || !arm_feature(env, ARM_FEATURE_M_MAIN)) {
                  return;
              }
              env->v7m.faultmask[M_REG_NS] = val & 1;
@@ -10688,8 +11115,10 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
              write_v7m_control_spsel_for_secstate(env,
                                                   val & R_V7M_CONTROL_SPSEL_MASK,
                                                   M_REG_NS);
-            env->v7m.control[M_REG_NS] &= ~R_V7M_CONTROL_NPRIV_MASK;
-            env->v7m.control[M_REG_NS] |= val & R_V7M_CONTROL_NPRIV_MASK;
+            if (arm_feature(env, ARM_FEATURE_M_MAIN)) {
+                env->v7m.control[M_REG_NS] &= ~R_V7M_CONTROL_NPRIV_MASK;
+                env->v7m.control[M_REG_NS] |= val & R_V7M_CONTROL_NPRIV_MASK;
+            }
              return;
          case 0x98: /* SP_NS */
          {
@@ -10697,11 +11126,23 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
               * currently in handler mode or not, using the NS CONTROL.SPSEL.
               */
              bool spsel = env->v7m.control[M_REG_NS] & R_V7M_CONTROL_SPSEL_MASK;
+            bool is_psp = !arm_v7m_is_handler_mode(env) && spsel;
+            uint32_t limit;
  
              if (!env->v7m.secure) {
                  return;
              }
-            if (!arm_v7m_is_handler_mode(env) && spsel) {
+
+            limit = is_psp ? env->v7m.psplim[false] : env->v7m.msplim[false];
+
+            if (val < limit) {
+                CPUState *cs = CPU(arm_env_get_cpu(env));
+
+                cpu_restore_state(cs, GETPC(), true);
+                raise_exception(env, EXCP_STKOF, 0, 1);
+            }
+
+            if (is_psp) {
                  env->v7m.other_ss_psp = val;
              } else {
                  env->v7m.other_ss_msp = val;
@@ -10758,9 +11199,15 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
          env->v7m.primask[env->v7m.secure] = val & 1;
          break;
      case 17: /* BASEPRI */
+        if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+            goto bad_reg;
+        }
          env->v7m.basepri[env->v7m.secure] = val & 0xff;
          break;
      case 18: /* BASEPRI_MAX */
+        if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+            goto bad_reg;
+        }
          val &= 0xff;
          if (val != 0 && (val < env->v7m.basepri[env->v7m.secure]
                           || env->v7m.basepri[env->v7m.secure] == 0)) {
@@ -10768,6 +11215,9 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
          }
          break;
      case 19: /* FAULTMASK */
+        if (!arm_feature(env, ARM_FEATURE_M_MAIN)) {
+            goto bad_reg;
+        }
          env->v7m.faultmask[env->v7m.secure] = val & 1;
          break;
      case 20: /* CONTROL */
@@ -10782,8 +11232,10 @@ void HELPER(v7m_msr)(CPUARMState *env, uint32_t maskreg, uint32_t val)
              !arm_v7m_is_handler_mode(env)) {
              write_v7m_control_spsel(env, (val & R_V7M_CONTROL_SPSEL_MASK) != 0);
          }
-        env->v7m.control[env->v7m.secure] &= ~R_V7M_CONTROL_NPRIV_MASK;
-        env->v7m.control[env->v7m.secure] |= val & R_V7M_CONTROL_NPRIV_MASK;
+        if (arm_feature(env, ARM_FEATURE_M_MAIN)) {
+            env->v7m.control[env->v7m.secure] &= ~R_V7M_CONTROL_NPRIV_MASK;
+            env->v7m.control[env->v7m.secure] |= val & R_V7M_CONTROL_NPRIV_MASK;
+        }
          break;
      default:
      bad_reg:
@@ -11243,9 +11695,13 @@ uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
      fpscr = (env->vfp.xregs[ARM_VFP_FPSCR] & 0xffc8ffff)
              | (env->vfp.vec_len << 16)
              | (env->vfp.vec_stride << 20);
+
      i = get_float_exception_flags(&env->vfp.fp_status);
      i |= get_float_exception_flags(&env->vfp.standard_fp_status);
-    i |= get_float_exception_flags(&env->vfp.fp_status_f16);
+    /* FZ16 does not generate an input denormal exception.  */
+    i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
+          & ~float_flag_input_denormal);
+
      fpscr |= vfp_exceptbits_from_host(i);
      return fpscr;
  }
@@ -11280,6 +11736,11 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
      int i;
      uint32_t changed;
  
+    /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
+    if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) {
+        val &= ~FPCR_FZ16;
+    }
+
      changed = env->vfp.xregs[ARM_VFP_FPSCR];
      env->vfp.xregs[ARM_VFP_FPSCR] = (val & 0xffc8ffff);
      env->vfp.vec_len = (val >> 16) & 7;
@@ -11470,45 +11931,30 @@ float32 VFP_HELPER(fcvts, d)(float64 x, CPUARMState *env)
  #define VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype) \
  float##fsz HELPER(vfp_##name##to##p)(uint##isz##_t  x, uint32_t shift, \
                                       void *fpstp) \
-{ \
-    float_status *fpst = fpstp; \
-    float##fsz tmp; \
-    tmp = itype##_to_##float##fsz(x, fpst); \
-    return float##fsz##_scalbn(tmp, -(int)shift, fpst); \
-}
+{ return itype##_to_##float##fsz##_scalbn(x, -shift, fpstp); }
  
-/* Notice that we want only input-denormal exception flags from the
- * scalbn operation: the other possible flags (overflow+inexact if
- * we overflow to infinity, output-denormal) aren't correct for the
- * complete scale-and-convert operation.
- */
-#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, round) \
-uint##isz##_t HELPER(vfp_to##name##p##round)(float##fsz x, \
-                                             uint32_t shift, \
-                                             void *fpstp) \
-{ \
-    float_status *fpst = fpstp; \
-    int old_exc_flags = get_float_exception_flags(fpst); \
-    float##fsz tmp; \
-    if (float##fsz##_is_any_nan(x)) { \
-        float_raise(float_flag_invalid, fpst); \
-        return 0; \
-    } \
-    tmp = float##fsz##_scalbn(x, shift, fpst); \
-    old_exc_flags |= get_float_exception_flags(fpst) \
-        & float_flag_input_denormal; \
-    set_float_exception_flags(old_exc_flags, fpst); \
-    return float##fsz##_to_##itype##round(tmp, fpst); \
+#define VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, ROUND, suff)   \
+uint##isz##_t HELPER(vfp_to##name##p##suff)(float##fsz x, uint32_t shift, \
+                                            void *fpst)                   \
+{                                                                         \
+    if (unlikely(float##fsz##_is_any_nan(x))) {                           \
+        float_raise(float_flag_invalid, fpst);                            \
+        return 0;                                                         \
+    }                                                                     \
+    return float##fsz##_to_##itype##_scalbn(x, ROUND, shift, fpst);       \
  }
  
  #define VFP_CONV_FIX(name, p, fsz, isz, itype)                   \
  VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, _round_to_zero) \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, )
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
+                         float_round_to_zero, _round_to_zero)    \
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
+                         get_float_rounding_mode(fpst), )
  
  #define VFP_CONV_FIX_A64(name, p, fsz, isz, itype)               \
  VFP_CONV_FIX_FLOAT(name, p, fsz, isz, itype)                     \
-VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype, )
+VFP_CONV_FLOAT_FIX_ROUND(name, p, fsz, isz, itype,               \
+                         get_float_rounding_mode(fpst), )
  
  VFP_CONV_FIX(sh, d, 64, 64, int16)
  VFP_CONV_FIX(sl, d, 64, 64, int32)
@@ -11528,87 +11974,84 @@ VFP_CONV_FIX_A64(uq, s, 32, 64, uint64)
  #undef VFP_CONV_FLOAT_FIX_ROUND
  #undef VFP_CONV_FIX_A64
  
-/* Conversion to/from f16 can overflow to infinity before/after scaling.
- * Therefore we convert to f64, scale, and then convert f64 to f16; or
- * vice versa for conversion to integer.
- *
- * For 16- and 32-bit integers, the conversion to f64 never rounds.
- * For 64-bit integers, any integer that would cause rounding will also
- * overflow to f16 infinity, so there is no double rounding problem.
- */
-
-static float16 do_postscale_fp16(float64 f, int shift, float_status *fpst)
-{
-    return float64_to_float16(float64_scalbn(f, -shift, fpst), true, fpst);
-}
-
  uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst)
  {
-    return do_postscale_fp16(int32_to_float64(x, fpst), shift, fpst);
+    return int32_to_float16_scalbn(x, -shift, fpst);
  }
  
  uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst)
  {
-    return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst);
+    return uint32_to_float16_scalbn(x, -shift, fpst);
  }
  
  uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst)
  {
-    return do_postscale_fp16(int64_to_float64(x, fpst), shift, fpst);
+    return int64_to_float16_scalbn(x, -shift, fpst);
  }
  
  uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst)
  {
-    return do_postscale_fp16(uint64_to_float64(x, fpst), shift, fpst);
+    return uint64_to_float16_scalbn(x, -shift, fpst);
  }
  
-static float64 do_prescale_fp16(float16 f, int shift, float_status *fpst)
+uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
  {
-    if (unlikely(float16_is_any_nan(f))) {
+    if (unlikely(float16_is_any_nan(x))) {
          float_raise(float_flag_invalid, fpst);
          return 0;
-    } else {
-        int old_exc_flags = get_float_exception_flags(fpst);
-        float64 ret;
-
-        ret = float16_to_float64(f, true, fpst);
-        ret = float64_scalbn(ret, shift, fpst);
-        old_exc_flags |= get_float_exception_flags(fpst)
-            & float_flag_input_denormal;
-        set_float_exception_flags(old_exc_flags, fpst);
-
-        return ret;
      }
-}
-
-uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst)
-{
-    return float64_to_int16(do_prescale_fp16(x, shift, fpst), fpst);
+    return float16_to_int16_scalbn(x, get_float_rounding_mode(fpst),
+                                   shift, fpst);
  }
  
  uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst)
  {
-    return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst);
+    if (unlikely(float16_is_any_nan(x))) {
+        float_raise(float_flag_invalid, fpst);
+        return 0;
+    }
+    return float16_to_uint16_scalbn(x, get_float_rounding_mode(fpst),
+                                    shift, fpst);
  }
  
  uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst)
  {
-    return float64_to_int32(do_prescale_fp16(x, shift, fpst), fpst);
+    if (unlikely(float16_is_any_nan(x))) {
+        float_raise(float_flag_invalid, fpst);
+        return 0;
+    }
+    return float16_to_int32_scalbn(x, get_float_rounding_mode(fpst),
+                                   shift, fpst);
  }
  
  uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst)
  {
-    return float64_to_uint32(do_prescale_fp16(x, shift, fpst), fpst);
+    if (unlikely(float16_is_any_nan(x))) {
+        float_raise(float_flag_invalid, fpst);
+        return 0;
+    }
+    return float16_to_uint32_scalbn(x, get_float_rounding_mode(fpst),
+                                    shift, fpst);
  }
  
  uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst)
  {
-    return float64_to_int64(do_prescale_fp16(x, shift, fpst), fpst);
+    if (unlikely(float16_is_any_nan(x))) {
+        float_raise(float_flag_invalid, fpst);
+        return 0;
+    }
+    return float16_to_int64_scalbn(x, get_float_rounding_mode(fpst),
+                                   shift, fpst);
  }
  
  uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst)
  {
-    return float64_to_uint64(do_prescale_fp16(x, shift, fpst), fpst);
+    if (unlikely(float16_is_any_nan(x))) {
+        float_raise(float_flag_invalid, fpst);
+        return 0;
+    }
+    return float16_to_uint64_scalbn(x, get_float_rounding_mode(fpst),
+                                    shift, fpst);
  }
  
  /* Set the current fp rounding mode and return the old one.
@@ -12237,6 +12680,7 @@ int arm_rmode_to_sf(int rmode)
          /* FIXME: add support for TIEAWAY and ODD */
          qemu_log_mask(LOG_UNIMP, "arm: unimplemented rounding mode: %d\n",
                        rmode);
+        /* fall through for now */
      case FPROUNDING_TIEEVEN:
      default:
          rmode = float_round_nearest_even;
@@ -12281,11 +12725,10 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
  /* Return the exception level to which FP-disabled exceptions should
   * be taken, or 0 if FP is enabled.
   */
-static inline int fp_exception_el(CPUARMState *env)
+int fp_exception_el(CPUARMState *env, int cur_el)
  {
  #ifndef CONFIG_USER_ONLY
      int fpen;
-    int cur_el = arm_current_el(env);
  
      /* CPACR and the CPTR registers don't exist before v6, so FP is
       * always accessible
@@ -12348,37 +12791,34 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                            target_ulong *cs_base, uint32_t *pflags)
  {
      ARMMMUIdx mmu_idx = core_to_arm_mmu_idx(env, cpu_mmu_index(env, false));
-    int fp_el = fp_exception_el(env);
+    int current_el = arm_current_el(env);
+    int fp_el = fp_exception_el(env, current_el);
      uint32_t flags;
  
      if (is_a64(env)) {
-        int sve_el = sve_exception_el(env);
-        uint32_t zcr_len;
+        ARMCPU *cpu = arm_env_get_cpu(env);
  
          *pc = env->pc;
          flags = ARM_TBFLAG_AARCH64_STATE_MASK;
          /* Get control bits for tagged addresses */
          flags |= (arm_regime_tbi0(env, mmu_idx) << ARM_TBFLAG_TBI0_SHIFT);
          flags |= (arm_regime_tbi1(env, mmu_idx) << ARM_TBFLAG_TBI1_SHIFT);
-        flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT;
  
-        /* If SVE is disabled, but FP is enabled,
-           then the effective len is 0.  */
-        if (sve_el != 0 && fp_el == 0) {
-            zcr_len = 0;
-        } else {
-            int current_el = arm_current_el(env);
+        if (cpu_isar_feature(aa64_sve, cpu)) {
+            int sve_el = sve_exception_el(env, current_el);
+            uint32_t zcr_len;
  
-            zcr_len = env->vfp.zcr_el[current_el <= 1 ? 1 : current_el];
-            zcr_len &= 0xf;
-            if (current_el < 2 && arm_feature(env, ARM_FEATURE_EL2)) {
-                zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[2]);
-            }
-            if (current_el < 3 && arm_feature(env, ARM_FEATURE_EL3)) {
-                zcr_len = MIN(zcr_len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
+            /* If SVE is disabled, but FP is enabled,
+             * then the effective len is 0.
+             */
+            if (sve_el != 0 && fp_el == 0) {
+                zcr_len = 0;
+            } else {
+                zcr_len = sve_zcr_len_for_el(env, current_el);
              }
+            flags |= sve_el << ARM_TBFLAG_SVEEXC_EL_SHIFT;
+            flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT;
          }
-        flags |= zcr_len << ARM_TBFLAG_ZCR_LEN_SHIFT;
      } else {
          *pc = env->regs[15];
          flags = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
@@ -12427,6 +12867,103 @@ void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
          flags |= ARM_TBFLAG_HANDLER_MASK;
      }
  
+    /* v8M always applies stack limit checks unless CCR.STKOFHFNMIGN is
+     * suppressing them because the requested execution priority is less than 0.
+     */
+    if (arm_feature(env, ARM_FEATURE_V8) &&
+        arm_feature(env, ARM_FEATURE_M) &&
+        !((mmu_idx  & ARM_MMU_IDX_M_NEGPRI) &&
+          (env->v7m.ccr[env->v7m.secure] & R_V7M_CCR_STKOFHFNMIGN_MASK))) {
+        flags |= ARM_TBFLAG_STACKCHECK_MASK;
+    }
+
      *pflags = flags;
      *cs_base = 0;
  }
+
+#ifdef TARGET_AARCH64
+/*
+ * The manual says that when SVE is enabled and VQ is widened the
+ * implementation is allowed to zero the previously inaccessible
+ * portion of the registers.  The corollary to that is that when
+ * SVE is enabled and VQ is narrowed we are also allowed to zero
+ * the now inaccessible portion of the registers.
+ *
+ * The intent of this is that no predicate bit beyond VQ is ever set.
+ * Which means that some operations on predicate registers themselves
+ * may operate on full uint64_t or even unrolled across the maximum
+ * uint64_t[4].  Performing 4 bits of host arithmetic unconditionally
+ * may well be cheaper than conditionals to restrict the operation
+ * to the relevant portion of a uint16_t[16].
+ */
+void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq)
+{
+    int i, j;
+    uint64_t pmask;
+
+    assert(vq >= 1 && vq <= ARM_MAX_VQ);
+    assert(vq <= arm_env_get_cpu(env)->sve_max_vq);
+
+    /* Zap the high bits of the zregs.  */
+    for (i = 0; i < 32; i++) {
+        memset(&env->vfp.zregs[i].d[2 * vq], 0, 16 * (ARM_MAX_VQ - vq));
+    }
+
+    /* Zap the high bits of the pregs and ffr.  */
+    pmask = 0;
+    if (vq & 3) {
+        pmask = ~(-1ULL << (16 * (vq & 3)));
+    }
+    for (j = vq / 4; j < ARM_MAX_VQ / 4; j++) {
+        for (i = 0; i < 17; ++i) {
+            env->vfp.pregs[i].p[j] &= pmask;
+        }
+        pmask = 0;
+    }
+}
+
+/*
+ * Notice a change in SVE vector size when changing EL.
+ */
+void aarch64_sve_change_el(CPUARMState *env, int old_el,
+                           int new_el, bool el0_a64)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    int old_len, new_len;
+    bool old_a64, new_a64;
+
+    /* Nothing to do if no SVE.  */
+    if (!cpu_isar_feature(aa64_sve, cpu)) {
+        return;
+    }
+
+    /* Nothing to do if FP is disabled in either EL.  */
+    if (fp_exception_el(env, old_el) || fp_exception_el(env, new_el)) {
+        return;
+    }
+
+    /*
+     * DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
+     * at ELx, or not available because the EL is in AArch32 state, then
+     * for all purposes other than a direct read, the ZCR_ELx.LEN field
+     * has an effective value of 0".
+     *
+     * Consider EL2 (aa64, vq=4) -> EL0 (aa32) -> EL1 (aa64, vq=0).
+     * If we ignore aa32 state, we would fail to see the vq4->vq0 transition
+     * from EL2->EL1.  Thus we go ahead and narrow when entering aa32 so that
+     * we already have the correct register contents when encountering the
+     * vq0->vq0 transition between EL0->EL1.
+     */
+    old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
+    old_len = (old_a64 && !sve_exception_el(env, old_el)
+               ? sve_zcr_len_for_el(env, old_el) : 0);
+    new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
+    new_len = (new_a64 && !sve_exception_el(env, new_el)
+               ? sve_zcr_len_for_el(env, new_el) : 0);
+
+    /* When changing vector length, clear inaccessible state.  */
+    if (new_len < old_len) {
+        aarch64_sve_narrow_vq(env, new_len + 1);
+    }
+}
+#endif