d[i] = ror32(n[i] ^ m[i], shr);
}
}
+
+void HELPER(fmmla_s)(void *vd, void *vn, void *vm, void *va,
+ void *status, uint32_t desc)
+{
+ intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float32) * 4);
+
+ for (s = 0; s < opr_sz; ++s) {
+ float32 *n = vn + s * sizeof(float32) * 4;
+ float32 *m = vm + s * sizeof(float32) * 4;
+ float32 *a = va + s * sizeof(float32) * 4;
+ float32 *d = vd + s * sizeof(float32) * 4;
+ float32 n00 = n[H4(0)], n01 = n[H4(1)];
+ float32 n10 = n[H4(2)], n11 = n[H4(3)];
+ float32 m00 = m[H4(0)], m01 = m[H4(1)];
+ float32 m10 = m[H4(2)], m11 = m[H4(3)];
+ float32 p0, p1;
+
+ /* i = 0, j = 0 */
+ p0 = float32_mul(n00, m00, status);
+ p1 = float32_mul(n01, m01, status);
+ d[H4(0)] = float32_add(a[H4(0)], float32_add(p0, p1, status), status);
+
+ /* i = 0, j = 1 */
+ p0 = float32_mul(n00, m10, status);
+ p1 = float32_mul(n01, m11, status);
+ d[H4(1)] = float32_add(a[H4(1)], float32_add(p0, p1, status), status);
+
+ /* i = 1, j = 0 */
+ p0 = float32_mul(n10, m00, status);
+ p1 = float32_mul(n11, m01, status);
+ d[H4(2)] = float32_add(a[H4(2)], float32_add(p0, p1, status), status);
+
+ /* i = 1, j = 1 */
+ p0 = float32_mul(n10, m10, status);
+ p1 = float32_mul(n11, m11, status);
+ d[H4(3)] = float32_add(a[H4(3)], float32_add(p0, p1, status), status);
+ }
+}
+
+void HELPER(fmmla_d)(void *vd, void *vn, void *vm, void *va,
+ void *status, uint32_t desc)
+{
+ intptr_t s, opr_sz = simd_oprsz(desc) / (sizeof(float64) * 4);
+
+ for (s = 0; s < opr_sz; ++s) {
+ float64 *n = vn + s * sizeof(float64) * 4;
+ float64 *m = vm + s * sizeof(float64) * 4;
+ float64 *a = va + s * sizeof(float64) * 4;
+ float64 *d = vd + s * sizeof(float64) * 4;
+ float64 n00 = n[0], n01 = n[1], n10 = n[2], n11 = n[3];
+ float64 m00 = m[0], m01 = m[1], m10 = m[2], m11 = m[3];
+ float64 p0, p1;
+
+ /* i = 0, j = 0 */
+ p0 = float64_mul(n00, m00, status);
+ p1 = float64_mul(n01, m01, status);
+ d[0] = float64_add(a[0], float64_add(p0, p1, status), status);
+
+ /* i = 0, j = 1 */
+ p0 = float64_mul(n00, m10, status);
+ p1 = float64_mul(n01, m11, status);
+ d[1] = float64_add(a[1], float64_add(p0, p1, status), status);
+
+ /* i = 1, j = 0 */
+ p0 = float64_mul(n10, m00, status);
+ p1 = float64_mul(n11, m01, status);
+ d[2] = float64_add(a[2], float64_add(p0, p1, status), status);
+
+ /* i = 1, j = 1 */
+ p0 = float64_mul(n10, m10, status);
+ p1 = float64_mul(n11, m11, status);
+ d[3] = float64_add(a[3], float64_add(p0, p1, status), status);
+ }
+}
* SVE Integer Multiply-Add (unpredicated)
*/
+static bool trans_FMMLA(DisasContext *s, arg_rrrr_esz *a)
+{
+ gen_helper_gvec_4_ptr *fn;
+
+ switch (a->esz) {
+ case MO_32:
+ if (!dc_isar_feature(aa64_sve_f32mm, s)) {
+ return false;
+ }
+ fn = gen_helper_fmmla_s;
+ break;
+ case MO_64:
+ if (!dc_isar_feature(aa64_sve_f64mm, s)) {
+ return false;
+ }
+ fn = gen_helper_fmmla_d;
+ break;
+ default:
+ return false;
+ }
+
+ if (sve_access_check(s)) {
+ unsigned vsz = vec_full_reg_size(s);
+ TCGv_ptr status = fpstatus_ptr(FPST_FPCR);
+ tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ vec_full_reg_offset(s, a->ra),
+ status, vsz, vsz, 0, fn);
+ tcg_temp_free_ptr(status);
+ }
+ return true;
+}
+
static bool do_sqdmlal_zzzw(DisasContext *s, arg_rrrr_esz *a,
bool sel1, bool sel2)
{