softfloat: Fix division

author Richard Henderson <[email protected]>

Wed, 3 Oct 2018 14:35:51 +0000 (09:35 -0500)

committer Richard Henderson <[email protected]>

Fri, 5 Oct 2018 17:57:41 +0000 (12:57 -0500)
author Richard Henderson <[email protected]>
Wed, 3 Oct 2018 14:35:51 +0000 (09:35 -0500)
committer Richard Henderson <[email protected]>
Fri, 5 Oct 2018 17:57:41 +0000 (12:57 -0500)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c

index 71da0f68bbffa4358d0c0a7a101dbad310a41b47..46ae2061724406c93a05364e77f4d311e19606e7 100644 (file)
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1112,19 +1112,38 @@ static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
      bool sign = a.sign ^ b.sign;
  
      if (a.cls == float_class_normal && b.cls == float_class_normal) {
-        uint64_t temp_lo, temp_hi;
+        uint64_t n0, n1, q, r;
          int exp = a.exp - b.exp;
+
+        /*
+         * We want a 2*N / N-bit division to produce exactly an N-bit
+         * result, so that we do not lose any precision and so that we
+         * do not have to renormalize afterward.  If A.frac < B.frac,
+         * then division would produce an (N-1)-bit result; shift A left
+         * by one to produce the an N-bit result, and decrement the
+         * exponent to match.
+         *
+         * The udiv_qrnnd algorithm that we're using requires normalization,
+         * i.e. the msb of the denominator must be set.  Since we know that
+         * DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
+         * by one (more), and the remainder must be shifted right by one.
+         */
          if (a.frac < b.frac) {
              exp -= 1;
-            shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
-                              &temp_hi, &temp_lo);
+            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 2, &n1, &n0);
          } else {
-            shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
-                              &temp_hi, &temp_lo);
+            shift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1, &n1, &n0);
          }
-        /* LSB of quot is set if inexact which roundandpack will use
-         * to set flags. Yet again we re-use a for the result */
-        a.frac = div128To64(temp_lo, temp_hi, b.frac);
+        q = udiv_qrnnd(&r, n1, n0, b.frac << 1);
+
+        /*
+         * Set lsb if there is a remainder, to set inexact.
+         * As mentioned above, to find the actual value of the remainder we
+         * would need to shift right, but (1) we are only concerned about
+         * non-zero-ness, and (2) the remainder will always be even because
+         * both inputs to the division primitive are even.
+         */
+        a.frac = q | (r != 0);
          a.sign = sign;
          a.exp = exp;
          return a;
diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h

index edc682139e49b3c93b9d577b2be843c9a47b71d8..a1d99c730d3fd218d4386ee2af0afdf2e018eb36 100644 (file)
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -329,15 +329,30 @@ static inline void
  | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
  *----------------------------------------------------------------------------*/
  
-static inline void
- shortShift128Left(
-     uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
+static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count,
+                                     uint64_t *z0Ptr, uint64_t *z1Ptr)
  {
+    *z1Ptr = a1 << count;
+    *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
+}
  
-    *z1Ptr = a1<<count;
-    *z0Ptr =
-        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' may be greater than 64.  The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
  
+static inline void shift128Left(uint64_t a0, uint64_t a1, int count,
+                                uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+    if (count < 64) {
+        *z1Ptr = a1 << count;
+        *z0Ptr = count == 0 ? a0 : (a0 << count) | (a1 >> (-count & 63));
+    } else {
+        *z1Ptr = 0;
+        *z0Ptr = a1 << (count - 64);
+    }
  }
  
  /*----------------------------------------------------------------------------
@@ -619,7 +634,8 @@ static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
   *
   * Licensed under the GPLv2/LGPLv3
   */
-static inline uint64_t div128To64(uint64_t n0, uint64_t n1, uint64_t d)
+static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
+                                  uint64_t n0, uint64_t d)
  {
      uint64_t d0, d1, q0, q1, r1, r0, m;
  
@@ -658,8 +674,8 @@ static inline uint64_t div128To64(uint64_t n0, uint64_t n1, uint64_t d)
      }
      r0 -= m;
  
-    /* Return remainder in LSB */
-    return (q1 << 32) | q0 | (r0 != 0);
+    *r = r0;
+    return (q1 << 32) | q0;
  }
  
  /*----------------------------------------------------------------------------
author	Richard Henderson <[email protected]>
	Wed, 3 Oct 2018 14:35:51 +0000 (09:35 -0500)
committer	Richard Henderson <[email protected]>
	Fri, 5 Oct 2018 17:57:41 +0000 (12:57 -0500)
fpu/softfloat.c		patch \| blob \| blame \| history
include/fpu/softfloat-macros.h		patch \| blob \| blame \| history