+ env->xregs[rs] = int128_getlo(oldv);
+ env->xregs[rs + 1] = int128_gethi(oldv);
+}
+
+void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
+ uint64_t new_hi, uint64_t new_lo)
+{
+ Int128 oldv, cmpv, newv;
+ uintptr_t ra = GETPC();
+ int mem_idx;
+ TCGMemOpIdx oi;
+
+ assert(HAVE_CMPXCHG128);
+
+ mem_idx = cpu_mmu_index(env, false);
+ oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
+
+ cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
+ newv = int128_make128(new_lo, new_hi);
+ oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
+
+ env->xregs[rs + 1] = int128_getlo(oldv);
+ env->xregs[rs] = int128_gethi(oldv);
+}
+
+/*
+ * AdvSIMD half-precision
+ */
+
+#define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
+
+#define ADVSIMD_HALFOP(name) \
+uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \
+{ \
+ float_status *fpst = fpstp; \
+ return float16_ ## name(a, b, fpst); \
+}
+
+ADVSIMD_HALFOP(add)
+ADVSIMD_HALFOP(sub)
+ADVSIMD_HALFOP(mul)
+ADVSIMD_HALFOP(div)
+ADVSIMD_HALFOP(min)
+ADVSIMD_HALFOP(max)
+ADVSIMD_HALFOP(minnum)
+ADVSIMD_HALFOP(maxnum)
+
+#define ADVSIMD_TWOHALFOP(name) \
+uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
+{ \
+ float16 a1, a2, b1, b2; \
+ uint32_t r1, r2; \
+ float_status *fpst = fpstp; \
+ a1 = extract32(two_a, 0, 16); \
+ a2 = extract32(two_a, 16, 16); \
+ b1 = extract32(two_b, 0, 16); \
+ b2 = extract32(two_b, 16, 16); \
+ r1 = float16_ ## name(a1, b1, fpst); \
+ r2 = float16_ ## name(a2, b2, fpst); \
+ return deposit32(r1, 16, 16, r2); \
+}
+
+ADVSIMD_TWOHALFOP(add)
+ADVSIMD_TWOHALFOP(sub)
+ADVSIMD_TWOHALFOP(mul)
+ADVSIMD_TWOHALFOP(div)
+ADVSIMD_TWOHALFOP(min)
+ADVSIMD_TWOHALFOP(max)
+ADVSIMD_TWOHALFOP(minnum)
+ADVSIMD_TWOHALFOP(maxnum)
+
+/* Data processing - scalar floating-point and advanced SIMD */
+static float16 float16_mulx(float16 a, float16 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ a = float16_squash_input_denormal(a, fpst);
+ b = float16_squash_input_denormal(b, fpst);
+
+ if ((float16_is_zero(a) && float16_is_infinity(b)) ||
+ (float16_is_infinity(a) && float16_is_zero(b))) {
+ /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
+ return make_float16((1U << 14) |
+ ((float16_val(a) ^ float16_val(b)) & (1U << 15)));
+ }
+ return float16_mul(a, b, fpst);
+}
+
+ADVSIMD_HALFOP(mulx)
+ADVSIMD_TWOHALFOP(mulx)
+
+/* fused multiply-accumulate */
+uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c,
+ void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return float16_muladd(a, b, c, 0, fpst);
+}
+
+uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
+ uint32_t two_c, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float16 a1, a2, b1, b2, c1, c2;
+ uint32_t r1, r2;
+ a1 = extract32(two_a, 0, 16);
+ a2 = extract32(two_a, 16, 16);
+ b1 = extract32(two_b, 0, 16);
+ b2 = extract32(two_b, 16, 16);
+ c1 = extract32(two_c, 0, 16);
+ c2 = extract32(two_c, 16, 16);
+ r1 = float16_muladd(a1, b1, c1, 0, fpst);
+ r2 = float16_muladd(a2, b2, c2, 0, fpst);
+ return deposit32(r1, 16, 16, r2);
+}
+
+/*
+ * Floating point comparisons produce an integer result. Softfloat
+ * routines return float_relation types which we convert to the 0/-1
+ * Neon requires.
+ */
+
+#define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
+
+uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ int compare = float16_compare_quiet(a, b, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_equal);
+}
+
+uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ int compare = float16_compare(a, b, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater ||
+ compare == float_relation_equal);
+}
+
+uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ int compare = float16_compare(a, b, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater);
+}
+
+uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float16 f0 = float16_abs(a);
+ float16 f1 = float16_abs(b);
+ int compare = float16_compare(f0, f1, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater ||
+ compare == float_relation_equal);
+}
+
+uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ float16 f0 = float16_abs(a);
+ float16 f1 = float16_abs(b);
+ int compare = float16_compare(f0, f1, fpst);
+ return ADVSIMD_CMPRES(compare == float_relation_greater);
+}
+
+/* round to integral */
+uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status)
+{
+ return float16_round_to_int(x, fp_status);
+}
+
+uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
+{
+ int old_flags = get_float_exception_flags(fp_status), new_flags;
+ float16 ret;
+
+ ret = float16_round_to_int(x, fp_status);
+
+ /* Suppress any inexact exceptions the conversion produced */
+ if (!(old_flags & float_flag_inexact)) {
+ new_flags = get_float_exception_flags(fp_status);
+ set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
+ }
+
+ return ret;
+}
+
+/*
+ * Half-precision floating point conversion functions
+ *
+ * There are a multitude of conversion functions with various
+ * different rounding modes. This is dealt with by the calling code
+ * setting the mode appropriately before calling the helper.
+ */
+
+uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ /* Invalid if we are passed a NaN */
+ if (float16_is_any_nan(a)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_int16(a, fpst);
+}
+
+uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
+{
+ float_status *fpst = fpstp;
+
+ /* Invalid if we are passed a NaN */
+ if (float16_is_any_nan(a)) {
+ float_raise(float_flag_invalid, fpst);
+ return 0;
+ }
+ return float16_to_uint16(a, fpst);
+}
+
+static int el_from_spsr(uint32_t spsr)
+{
+ /* Return the exception level that this SPSR is requesting a return to,
+ * or -1 if it is invalid (an illegal return)
+ */
+ if (spsr & PSTATE_nRW) {
+ switch (spsr & CPSR_M) {
+ case ARM_CPU_MODE_USR:
+ return 0;
+ case ARM_CPU_MODE_HYP:
+ return 2;
+ case ARM_CPU_MODE_FIQ:
+ case ARM_CPU_MODE_IRQ:
+ case ARM_CPU_MODE_SVC:
+ case ARM_CPU_MODE_ABT:
+ case ARM_CPU_MODE_UND:
+ case ARM_CPU_MODE_SYS:
+ return 1;
+ case ARM_CPU_MODE_MON:
+ /* Returning to Mon from AArch64 is never possible,
+ * so this is an illegal return.
+ */
+ default:
+ return -1;
+ }