1 // SPDX-License-Identifier: GPL-2.0-only
3 * IEEE754 floating point arithmetic
4 * double precision: MADDF.f (Fused Multiply Add)
5 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
7 * MIPS floating point support
8 * Copyright (C) 2015 Imagination Technologies, Ltd.
12 #include "ieee754dp.h"
15 /* 128 bits shift right logical with rounding. */
16 static void srl128(u64 *hptr, u64 *lptr, int count)
21 *lptr = *hptr != 0 || *lptr != 0;
23 } else if (count >= 64) {
25 *lptr = *hptr | (*lptr != 0);
28 *lptr = *hptr >> (count - 64);
29 *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
34 *lptr = low >> count | *hptr << (64 - count);
35 *lptr |= (low << (64 - count)) != 0;
36 *hptr = *hptr >> count;
40 static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
41 union ieee754dp y, enum maddf_flags flags)
72 * Handle the cases when at least one of x, y or z is a NaN.
73 * Order of precedence is sNaN, qNaN and z, x, y.
75 if (zc == IEEE754_CLASS_SNAN)
76 return ieee754dp_nanxcpt(z);
77 if (xc == IEEE754_CLASS_SNAN)
78 return ieee754dp_nanxcpt(x);
79 if (yc == IEEE754_CLASS_SNAN)
80 return ieee754dp_nanxcpt(y);
81 if (zc == IEEE754_CLASS_QNAN)
83 if (xc == IEEE754_CLASS_QNAN)
85 if (yc == IEEE754_CLASS_QNAN)
88 if (zc == IEEE754_CLASS_DNORM)
90 /* ZERO z cases are handled separately below */
92 switch (CLPAIR(xc, yc)) {
97 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
98 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
99 ieee754_setcx(IEEE754_INVALID_OPERATION);
100 return ieee754dp_indef();
102 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
103 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
104 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
105 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
106 case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
107 if ((zc == IEEE754_CLASS_INF) &&
108 ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
109 ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
111 * Cases of addition of infinities with opposite signs
112 * or subtraction of infinities with same signs.
114 ieee754_setcx(IEEE754_INVALID_OPERATION);
115 return ieee754dp_indef();
118 * z is here either not an infinity, or an infinity having the
119 * same sign as product (x*y) (in case of MADDF.D instruction)
120 * or product -(x*y) (in MSUBF.D case). The result must be an
121 * infinity, and its sign is determined only by the value of
122 * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
124 if (flags & MADDF_NEGATE_PRODUCT)
125 return ieee754dp_inf(1 ^ (xs ^ ys));
127 return ieee754dp_inf(xs ^ ys);
129 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
130 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
131 case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
132 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
133 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
134 if (zc == IEEE754_CLASS_INF)
135 return ieee754dp_inf(zs);
136 if (zc == IEEE754_CLASS_ZERO) {
137 /* Handle cases +0 + (-0) and similar ones. */
138 if ((!(flags & MADDF_NEGATE_PRODUCT)
139 && (zs == (xs ^ ys))) ||
140 ((flags & MADDF_NEGATE_PRODUCT)
141 && (zs != (xs ^ ys))))
143 * Cases of addition of zeros of equal signs
144 * or subtraction of zeroes of opposite signs.
145 * The sign of the resulting zero is in any
146 * such case determined only by the sign of z.
150 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
152 /* x*y is here 0, and z is not 0, so just return z */
155 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
159 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
160 if (zc == IEEE754_CLASS_INF)
161 return ieee754dp_inf(zs);
165 case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
166 if (zc == IEEE754_CLASS_INF)
167 return ieee754dp_inf(zs);
171 case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
172 if (zc == IEEE754_CLASS_INF)
173 return ieee754dp_inf(zs);
174 /* continue to real computations */
177 /* Finally get to do some computation */
180 * Do the multiplication bit first
182 * rm = xm * ym, re = xe + ye basically
184 * At this point xm and ym should have been normalized.
186 assert(xm & DP_HIDDEN_BIT);
187 assert(ym & DP_HIDDEN_BIT);
191 if (flags & MADDF_NEGATE_PRODUCT)
194 /* shunt to top of word */
195 xm <<= 64 - (DP_FBITS + 1);
196 ym <<= 64 - (DP_FBITS + 1);
199 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
207 lrm = DPXMULT(lxm, lym);
208 hrm = DPXMULT(hxm, hym);
210 t = DPXMULT(lxm, hym);
212 at = lrm + (t << 32);
216 hrm = hrm + (t >> 32);
218 t = DPXMULT(hxm, lym);
220 at = lrm + (t << 32);
224 hrm = hrm + (t >> 32);
226 /* Put explicit bit at bit 126 if necessary */
227 if ((int64_t)hrm < 0) {
228 lrm = (hrm << 63) | (lrm >> 1);
233 assert(hrm & (1 << 62));
235 if (zc == IEEE754_CLASS_ZERO) {
237 * Move explicit bit from bit 126 to bit 55 since the
238 * ieee754dp_format code expects the mantissa to be
239 * 56 bits wide (53 + 3 rounding bits).
241 srl128(&hrm, &lrm, (126 - 55));
242 return ieee754dp_format(rs, re, lrm);
245 /* Move explicit bit from bit 52 to bit 126 */
248 assert(hzm & (1 << 62));
250 /* Make the exponents the same */
253 * Have to shift y fraction right to align.
256 srl128(&hrm, &lrm, s);
258 } else if (re > ze) {
260 * Have to shift x fraction right to align.
263 srl128(&hzm, &lzm, s);
267 assert(ze <= DP_EMAX);
269 /* Do the addition */
272 * Generate 128 bit result by adding two 127 bit numbers
273 * leaving result in hzm:lzm, zs and ze.
275 hzm = hzm + hrm + (lzm > (lzm + lrm));
277 if ((int64_t)hzm < 0) { /* carry out */
278 srl128(&hzm, &lzm, 1);
282 if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
283 hzm = hzm - hrm - (lzm < lrm);
286 hzm = hrm - hzm - (lrm < lzm);
290 if (lzm == 0 && hzm == 0)
291 return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
294 * Put explicit bit at bit 126 if necessary.
297 /* left shift by 63 or 64 bits */
298 if ((int64_t)lzm < 0) {
299 /* MSB of lzm is the explicit bit */
311 while ((hzm >> (62 - t)) == 0)
316 hzm = hzm << t | lzm >> (64 - t);
323 * Move explicit bit from bit 126 to bit 55 since the
324 * ieee754dp_format code expects the mantissa to be
325 * 56 bits wide (53 + 3 rounding bits).
327 srl128(&hzm, &lzm, (126 - 55));
329 return ieee754dp_format(zs, ze, lzm);
332 union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
335 return _dp_maddf(z, x, y, 0);
338 union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
341 return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);