softfloat: Provide complete set of accessors for fp state

[qemu.git] / fpu / softfloat.c
diff --git a/fpu/softfloat.c b/fpu/softfloat.c

index b29256a8ebe3bb9aab7ac33f0b4fcd563ecebce0..4abcd36b1570ef71c3b04c72a94f5534f90449e8 100644 (file)
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -40,7 +40,7 @@ these four paragraphs for those parts of this code that are retained.
   */
  #include "config.h"
  
-#include "softfloat.h"
+#include "fpu/softfloat.h"
  
  /*----------------------------------------------------------------------------
  | Primitive arithmetic functions, including multi-word arithmetic, and
@@ -59,21 +59,6 @@ these four paragraphs for those parts of this code that are retained.
  *----------------------------------------------------------------------------*/
  #include "softfloat-specialize.h"
  
-void set_float_rounding_mode(int val STATUS_PARAM)
-{
-    STATUS(float_rounding_mode) = val;
-}
-
-void set_float_exception_flags(int val STATUS_PARAM)
-{
-    STATUS(float_exception_flags) = val;
-}
-
-void set_floatx80_rounding_precision(int val STATUS_PARAM)
-{
-    STATUS(floatx80_rounding_precision) = val;
-}
-
  /*----------------------------------------------------------------------------
  | Returns the fraction bits of the half-precision floating-point value `a'.
  *----------------------------------------------------------------------------*/
@@ -203,6 +188,56 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU
  
  }
  
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit unsigned integer corresponding to the
+| input.  Ordinarily, the fixed-point input is simply rounded to an integer,
+| with the inexact exception raised if the input cannot be represented exactly
+| as an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackUint64(flag zSign, uint64_t absZ0,
+                                uint64_t absZ1 STATUS_PARAM)
+{
+    int8 roundingMode;
+    flag roundNearestEven, increment;
+
+    roundingMode = STATUS(float_rounding_mode);
+    roundNearestEven = (roundingMode == float_round_nearest_even);
+    increment = ((int64_t)absZ1 < 0);
+    if (!roundNearestEven) {
+        if (roundingMode == float_round_to_zero) {
+            increment = 0;
+        } else if (absZ1) {
+            if (zSign) {
+                increment = (roundingMode == float_round_down) && absZ1;
+            } else {
+                increment = (roundingMode == float_round_up) && absZ1;
+            }
+        }
+    }
+    if (increment) {
+        ++absZ0;
+        if (absZ0 == 0) {
+            float_raise(float_flag_invalid STATUS_VAR);
+            return LIT64(0xFFFFFFFFFFFFFFFF);
+        }
+        absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
+    }
+
+    if (zSign && absZ0) {
+        float_raise(float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+
+    if (absZ1) {
+        STATUS(float_exception_flags) |= float_flag_inexact;
+    }
+    return absZ0;
+}
+
  /*----------------------------------------------------------------------------
  | Returns the fraction bits of the single-precision floating-point value `a'.
  *----------------------------------------------------------------------------*/
@@ -1121,7 +1156,7 @@ static float128
  | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  *----------------------------------------------------------------------------*/
  
-float32 int32_to_float32( int32 a STATUS_PARAM )
+float32 int32_to_float32(int32_t a STATUS_PARAM)
  {
      flag zSign;
  
@@ -1138,7 +1173,7 @@ float32 int32_to_float32( int32 a STATUS_PARAM )
  | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  *----------------------------------------------------------------------------*/
  
-float64 int32_to_float64( int32 a STATUS_PARAM )
+float64 int32_to_float64(int32_t a STATUS_PARAM)
  {
      flag zSign;
      uint32 absA;
@@ -1161,7 +1196,7 @@ float64 int32_to_float64( int32 a STATUS_PARAM )
  | Arithmetic.
  *----------------------------------------------------------------------------*/
  
-floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
+floatx80 int32_to_floatx80(int32_t a STATUS_PARAM)
  {
      flag zSign;
      uint32 absA;
@@ -1183,7 +1218,7 @@ floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
  | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  *----------------------------------------------------------------------------*/
  
-float128 int32_to_float128( int32 a STATUS_PARAM )
+float128 int32_to_float128(int32_t a STATUS_PARAM)
  {
      flag zSign;
      uint32 absA;
@@ -1205,7 +1240,7 @@ float128 int32_to_float128( int32 a STATUS_PARAM )
  | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  *----------------------------------------------------------------------------*/
  
-float32 int64_to_float32( int64 a STATUS_PARAM )
+float32 int64_to_float32(int64_t a STATUS_PARAM)
  {
      flag zSign;
      uint64 absA;
@@ -1231,14 +1266,14 @@ float32 int64_to_float32( int64 a STATUS_PARAM )
  
  }
  
-float32 uint64_to_float32( uint64 a STATUS_PARAM )
+float32 uint64_to_float32(uint64_t a STATUS_PARAM)
  {
      int8 shiftCount;
  
      if ( a == 0 ) return float32_zero;
      shiftCount = countLeadingZeros64( a ) - 40;
      if ( 0 <= shiftCount ) {
-        return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
+        return packFloat32(0, 0x95 - shiftCount, a<<shiftCount);
      }
      else {
          shiftCount += 7;
@@ -1248,7 +1283,7 @@ float32 uint64_to_float32( uint64 a STATUS_PARAM )
          else {
              a <<= shiftCount;
          }
-        return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
+        return roundAndPackFloat32(0, 0x9C - shiftCount, a STATUS_VAR);
      }
  }
  
@@ -1258,7 +1293,7 @@ float32 uint64_to_float32( uint64 a STATUS_PARAM )
  | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  *----------------------------------------------------------------------------*/
  
-float64 int64_to_float64( int64 a STATUS_PARAM )
+float64 int64_to_float64(int64_t a STATUS_PARAM)
  {
      flag zSign;
  
@@ -1271,11 +1306,18 @@ float64 int64_to_float64( int64 a STATUS_PARAM )
  
  }
  
-float64 uint64_to_float64( uint64 a STATUS_PARAM )
+float64 uint64_to_float64(uint64_t a STATUS_PARAM)
  {
-    if ( a == 0 ) return float64_zero;
-    return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
+    int exp =  0x43C;
  
+    if (a == 0) {
+        return float64_zero;
+    }
+    if ((int64_t)a < 0) {
+        shift64RightJamming(a, 1, &a);
+        exp += 1;
+    }
+    return normalizeRoundAndPackFloat64(0, exp, a STATUS_VAR);
  }
  
  /*----------------------------------------------------------------------------
@@ -1285,7 +1327,7 @@ float64 uint64_to_float64( uint64 a STATUS_PARAM )
  | Arithmetic.
  *----------------------------------------------------------------------------*/
  
-floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
+floatx80 int64_to_floatx80(int64_t a STATUS_PARAM)
  {
      flag zSign;
      uint64 absA;
@@ -1305,7 +1347,7 @@ floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
  | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
  *----------------------------------------------------------------------------*/
  
-float128 int64_to_float128( int64 a STATUS_PARAM )
+float128 int64_to_float128(int64_t a STATUS_PARAM)
  {
      flag zSign;
      uint64 absA;
@@ -1332,6 +1374,14 @@ float128 int64_to_float128( int64 a STATUS_PARAM )
  
  }
  
+float128 uint64_to_float128(uint64_t a STATUS_PARAM)
+{
+    if (a == 0) {
+        return float128_zero;
+    }
+    return normalizeRoundAndPackFloat128(0, 0x406E, a, 0 STATUS_VAR);
+}
+
  /*----------------------------------------------------------------------------
  | Returns the result of converting the single-precision floating-point value
  | `a' to the 32-bit two's complement integer format.  The conversion is
@@ -1492,6 +1542,52 @@ int64 float32_to_int64( float32 a STATUS_PARAM )
  
  }
  
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit unsigned integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| unsigned integer is returned.  Otherwise, if the conversion overflows, the
+| largest unsigned integer is returned.  If the 'a' is negative, the result
+| is rounded and zero is returned; values that do not round to zero will
+| raise the inexact exception flag.
+*----------------------------------------------------------------------------*/
+
+uint64 float32_to_uint64(float32 a STATUS_PARAM)
+{
+    flag aSign;
+    int_fast16_t aExp, shiftCount;
+    uint32_t aSig;
+    uint64_t aSig64, aSigExtra;
+    a = float32_squash_input_denormal(a STATUS_VAR);
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    if ((aSign) && (aExp > 126)) {
+        float_raise(float_flag_invalid STATUS_VAR);
+        if (float32_is_any_nan(a)) {
+            return LIT64(0xFFFFFFFFFFFFFFFF);
+        } else {
+            return 0;
+        }
+    }
+    shiftCount = 0xBE - aExp;
+    if (aExp) {
+        aSig |= 0x00800000;
+    }
+    if (shiftCount < 0) {
+        float_raise(float_flag_invalid STATUS_VAR);
+        return LIT64(0xFFFFFFFFFFFFFFFF);
+    }
+
+    aSig64 = aSig;
+    aSig64 <<= 40;
+    shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
+    return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR);
+}
+
  /*----------------------------------------------------------------------------
  | Returns the result of converting the single-precision floating-point value
  | `a' to the 64-bit two's complement integer format.  The conversion is
@@ -2219,7 +2315,7 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
              }
          }
          /* Zero plus something non-zero : just return the something */
-        return make_float32(float32_val(c) ^ (signflip << 31));
+        return packFloat32(cSign ^ signflip, cExp, cSig);
      }
  
      if (aExp == 0) {
@@ -3007,7 +3103,7 @@ float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
          if (aSig) {
              return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
          }
-        return packFloat32(aSign, 0xff, aSig << 13);
+        return packFloat32(aSign, 0xff, 0);
      }
      if (aExp == 0) {
          int8 shiftCount;
@@ -3031,6 +3127,10 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
      uint32_t mask;
      uint32_t increment;
      int8 roundingMode;
+    int maxexp = ieee ? 15 : 16;
+    bool rounding_bumps_exp;
+    bool is_tiny = false;
+
      a = float32_squash_input_denormal(a STATUS_VAR);
  
      aSig = extractFloat32Frac( a );
@@ -3039,11 +3139,12 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
      if ( aExp == 0xFF ) {
          if (aSig) {
              /* Input is a NaN */
-            float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
              if (!ieee) {
+                float_raise(float_flag_invalid STATUS_VAR);
                  return packFloat16(aSign, 0, 0);
              }
-            return r;
+            return commonNaNToFloat16(
+                float32ToCommonNaN(a STATUS_VAR) STATUS_VAR);
          }
          /* Infinity */
          if (!ieee) {
@@ -3055,58 +3156,80 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
      if (aExp == 0 && aSig == 0) {
          return packFloat16(aSign, 0, 0);
      }
-    /* Decimal point between bits 22 and 23.  */
+    /* Decimal point between bits 22 and 23. Note that we add the 1 bit
+     * even if the input is denormal; however this is harmless because
+     * the largest possible single-precision denormal is still smaller
+     * than the smallest representable half-precision denormal, and so we
+     * will end up ignoring aSig and returning via the "always return zero"
+     * codepath.
+     */
      aSig |= 0x00800000;
      aExp -= 0x7f;
+    /* Calculate the mask of bits of the mantissa which are not
+     * representable in half-precision and will be lost.
+     */
      if (aExp < -14) {
+        /* Will be denormal in halfprec */
          mask = 0x00ffffff;
          if (aExp >= -24) {
              mask >>= 25 + aExp;
          }
      } else {
+        /* Normal number in halfprec */
          mask = 0x00001fff;
      }
-    if (aSig & mask) {
-        float_raise( float_flag_underflow STATUS_VAR );
-        roundingMode = STATUS(float_rounding_mode);
-        switch (roundingMode) {
-        case float_round_nearest_even:
-            increment = (mask + 1) >> 1;
-            if ((aSig & mask) == increment) {
-                increment = aSig & (increment << 1);
-            }
-            break;
-        case float_round_up:
-            increment = aSign ? 0 : mask;
-            break;
-        case float_round_down:
-            increment = aSign ? mask : 0;
-            break;
-        default: /* round_to_zero */
-            increment = 0;
-            break;
-        }
-        aSig += increment;
-        if (aSig >= 0x01000000) {
-            aSig >>= 1;
-            aExp++;
-        }
-    } else if (aExp < -14
-          && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
-        float_raise( float_flag_underflow STATUS_VAR);
-    }
  
-    if (ieee) {
-        if (aExp > 15) {
-            float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+    roundingMode = STATUS(float_rounding_mode);
+    switch (roundingMode) {
+    case float_round_nearest_even:
+        increment = (mask + 1) >> 1;
+        if ((aSig & mask) == increment) {
+            increment = aSig & (increment << 1);
+        }
+        break;
+    case float_round_up:
+        increment = aSign ? 0 : mask;
+        break;
+    case float_round_down:
+        increment = aSign ? mask : 0;
+        break;
+    default: /* round_to_zero */
+        increment = 0;
+        break;
+    }
+
+    rounding_bumps_exp = (aSig + increment >= 0x01000000);
+
+    if (aExp > maxexp || (aExp == maxexp && rounding_bumps_exp)) {
+        if (ieee) {
+            float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
              return packFloat16(aSign, 0x1f, 0);
-        }
-    } else {
-        if (aExp > 16) {
-            float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
+        } else {
+            float_raise(float_flag_invalid STATUS_VAR);
              return packFloat16(aSign, 0x1f, 0x3ff);
          }
      }
+
+    if (aExp < -14) {
+        /* Note that flush-to-zero does not affect half-precision results */
+        is_tiny =
+            (STATUS(float_detect_tininess) == float_tininess_before_rounding)
+            || (aExp < -15)
+            || (!rounding_bumps_exp);
+    }
+    if (aSig & mask) {
+        float_raise(float_flag_inexact STATUS_VAR);
+        if (is_tiny) {
+            float_raise(float_flag_underflow STATUS_VAR);
+        }
+    }
+
+    aSig += increment;
+    if (rounding_bumps_exp) {
+        aSig >>= 1;
+        aExp++;
+    }
+
      if (aExp < -24) {
          return packFloat16(aSign, 0, 0);
      }
@@ -3772,7 +3895,7 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
              }
          }
          /* Zero plus something non-zero : just return the something */
-        return make_float64(float64_val(c) ^ ((uint64_t)signflip << 63));
+        return packFloat64(cSign ^ signflip, cExp, cSig);
      }
  
      if (aExp == 0) {
@@ -3883,9 +4006,15 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
              }
              zExp -= shiftcount;
          } else {
-            shiftcount = countLeadingZeros64(zSig1) - 1;
-            zSig0 = zSig1 << shiftcount;
-            zExp -= (shiftcount + 64);
+            shiftcount = countLeadingZeros64(zSig1);
+            if (shiftcount == 0) {
+                zSig0 = (zSig1 >> 1) | (zSig1 & 1);
+                zExp -= 63;
+            } else {
+                shiftcount--;
+                zSig0 = zSig1 << shiftcount;
+                zExp -= (shiftcount + 64);
+            }
          }
          return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
      }
@@ -6397,12 +6526,12 @@ int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
  }
  
  /* misc functions */
-float32 uint32_to_float32( uint32 a STATUS_PARAM )
+float32 uint32_to_float32(uint32_t a STATUS_PARAM)
  {
      return int64_to_float32(a STATUS_VAR);
  }
  
-float64 uint32_to_float64( uint32 a STATUS_PARAM )
+float64 uint32_to_float64(uint32_t a STATUS_PARAM)
  {
      return int64_to_float64(a STATUS_VAR);
  }
@@ -6411,17 +6540,18 @@ uint32 float32_to_uint32( float32 a STATUS_PARAM )
  {
      int64_t v;
      uint32 res;
+    int old_exc_flags = get_float_exception_flags(status);
  
      v = float32_to_int64(a STATUS_VAR);
      if (v < 0) {
          res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
      } else if (v > 0xffffffff) {
          res = 0xffffffff;
-        float_raise( float_flag_invalid STATUS_VAR);
      } else {
-        res = v;
+        return v;
      }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
      return res;
  }
  
@@ -6429,17 +6559,58 @@ uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
  {
      int64_t v;
      uint32 res;
+    int old_exc_flags = get_float_exception_flags(status);
  
      v = float32_to_int64_round_to_zero(a STATUS_VAR);
      if (v < 0) {
          res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
      } else if (v > 0xffffffff) {
          res = 0xffffffff;
-        float_raise( float_flag_invalid STATUS_VAR);
      } else {
-        res = v;
+        return v;
+    }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
+    return res;
+}
+
+int_fast16_t float32_to_int16(float32 a STATUS_PARAM)
+{
+    int32_t v;
+    int_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
+
+    v = float32_to_int32(a STATUS_VAR);
+    if (v < -0x8000) {
+        res = -0x8000;
+    } else if (v > 0x7fff) {
+        res = 0x7fff;
+    } else {
+        return v;
+    }
+
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
+    return res;
+}
+
+uint_fast16_t float32_to_uint16(float32 a STATUS_PARAM)
+{
+    int32_t v;
+    uint_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
+
+    v = float32_to_int32(a STATUS_VAR);
+    if (v < 0) {
+        res = 0;
+    } else if (v > 0xffff) {
+        res = 0xffff;
+    } else {
+        return v;
      }
+
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
      return res;
  }
  
@@ -6447,53 +6618,92 @@ uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM)
  {
      int64_t v;
      uint_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
  
      v = float32_to_int64_round_to_zero(a STATUS_VAR);
      if (v < 0) {
          res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
      } else if (v > 0xffff) {
          res = 0xffff;
-        float_raise( float_flag_invalid STATUS_VAR);
      } else {
-        res = v;
+        return v;
      }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
      return res;
  }
  
  uint32 float64_to_uint32( float64 a STATUS_PARAM )
  {
-    int64_t v;
+    uint64_t v;
      uint32 res;
+    int old_exc_flags = get_float_exception_flags(status);
  
-    v = float64_to_int64(a STATUS_VAR);
-    if (v < 0) {
-        res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
-    } else if (v > 0xffffffff) {
+    v = float64_to_uint64(a STATUS_VAR);
+    if (v > 0xffffffff) {
          res = 0xffffffff;
-        float_raise( float_flag_invalid STATUS_VAR);
      } else {
-        res = v;
+        return v;
      }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
      return res;
  }
  
  uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
  {
-    int64_t v;
+    uint64_t v;
      uint32 res;
+    int old_exc_flags = get_float_exception_flags(status);
  
-    v = float64_to_int64_round_to_zero(a STATUS_VAR);
+    v = float64_to_uint64_round_to_zero(a STATUS_VAR);
+    if (v > 0xffffffff) {
+        res = 0xffffffff;
+    } else {
+        return v;
+    }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
+    return res;
+}
+
+int_fast16_t float64_to_int16(float64 a STATUS_PARAM)
+{
+    int64_t v;
+    int_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
+
+    v = float64_to_int32(a STATUS_VAR);
+    if (v < -0x8000) {
+        res = -0x8000;
+    } else if (v > 0x7fff) {
+        res = 0x7fff;
+    } else {
+        return v;
+    }
+
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
+    return res;
+}
+
+uint_fast16_t float64_to_uint16(float64 a STATUS_PARAM)
+{
+    int64_t v;
+    uint_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
+
+    v = float64_to_int32(a STATUS_VAR);
      if (v < 0) {
          res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
-    } else if (v > 0xffffffff) {
-        res = 0xffffffff;
-        float_raise( float_flag_invalid STATUS_VAR);
+    } else if (v > 0xffff) {
+        res = 0xffff;
      } else {
-        res = v;
+        return v;
      }
+
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
      return res;
  }
  
@@ -6501,41 +6711,75 @@ uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM)
  {
      int64_t v;
      uint_fast16_t res;
+    int old_exc_flags = get_float_exception_flags(status);
  
      v = float64_to_int64_round_to_zero(a STATUS_VAR);
      if (v < 0) {
          res = 0;
-        float_raise( float_flag_invalid STATUS_VAR);
      } else if (v > 0xffff) {
          res = 0xffff;
-        float_raise( float_flag_invalid STATUS_VAR);
      } else {
-        res = v;
+        return v;
      }
+    set_float_exception_flags(old_exc_flags, status);
+    float_raise(float_flag_invalid STATUS_VAR);
      return res;
  }
  
-/* FIXME: This looks broken.  */
-uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
-{
-    int64_t v;
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit unsigned integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  If the conversion overflows, the
+| largest unsigned integer is returned.  If 'a' is negative, the value is
+| rounded and zero is returned; negative values that do not round to zero
+| will raise the inexact exception.
+*----------------------------------------------------------------------------*/
  
-    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
-    v += float64_val(a);
-    v = float64_to_int64(make_float64(v) STATUS_VAR);
+uint64_t float64_to_uint64(float64 a STATUS_PARAM)
+{
+    flag aSign;
+    int_fast16_t aExp, shiftCount;
+    uint64_t aSig, aSigExtra;
+    a = float64_squash_input_denormal(a STATUS_VAR);
  
-    return v - INT64_MIN;
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    if (aSign && (aExp > 1022)) {
+        float_raise(float_flag_invalid STATUS_VAR);
+        if (float64_is_any_nan(a)) {
+            return LIT64(0xFFFFFFFFFFFFFFFF);
+        } else {
+            return 0;
+        }
+    }
+    if (aExp) {
+        aSig |= LIT64(0x0010000000000000);
+    }
+    shiftCount = 0x433 - aExp;
+    if (shiftCount <= 0) {
+        if (0x43E < aExp) {
+            float_raise(float_flag_invalid STATUS_VAR);
+            return LIT64(0xFFFFFFFFFFFFFFFF);
+        }
+        aSigExtra = 0;
+        aSig <<= -shiftCount;
+    } else {
+        shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
+    }
+    return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR);
  }
  
  uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
  {
-    int64_t v;
-
-    v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
-    v += float64_val(a);
-    v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
-
-    return v - INT64_MIN;
+    signed char current_rounding_mode = STATUS(float_rounding_mode);
+    set_float_rounding_mode(float_round_to_zero STATUS_VAR);
+    int64_t v = float64_to_uint64(a STATUS_VAR);
+    set_float_rounding_mode(current_rounding_mode STATUS_VAR);
+    return v;
  }
  
  #define COMPARE(s, nan_exp)                                                  \
@@ -6684,10 +6928,17 @@ int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
  /* min() and max() functions. These can't be implemented as
   * 'compare and pick one input' because that would mishandle
   * NaNs and +0 vs -0.
+ *
+ * minnum() and maxnum() functions. These are similar to the min()
+ * and max() functions but if one of the arguments is a QNaN and
+ * the other is numerical then the numerical argument is returned.
+ * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
+ * and maxNum() operations. min() and max() are the typical min/max
+ * semantics provided by many CPUs which predate that specification.
   */
-#define MINMAX(s, nan_exp)                                              \
+#define MINMAX(s)                                                       \
  INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
-                                        int ismin STATUS_PARAM )        \
+                                        int ismin, int isieee STATUS_PARAM) \
  {                                                                       \
      flag aSign, bSign;                                                  \
      uint ## s ## _t av, bv;                                             \
@@ -6695,6 +6946,15 @@ INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
      b = float ## s ## _squash_input_denormal(b STATUS_VAR);             \
      if (float ## s ## _is_any_nan(a) ||                                 \
          float ## s ## _is_any_nan(b)) {                                 \
+        if (isieee) {                                                   \
+            if (float ## s ## _is_quiet_nan(a) &&                       \
+                !float ## s ##_is_any_nan(b)) {                         \
+                return b;                                               \
+            } else if (float ## s ## _is_quiet_nan(b) &&                \
+                       !float ## s ## _is_any_nan(a)) {                 \
+                return a;                                               \
+            }                                                           \
+        }                                                               \
          return propagateFloat ## s ## NaN(a, b STATUS_VAR);             \
      }                                                                   \
      aSign = extractFloat ## s ## Sign(a);                               \
@@ -6718,16 +6978,26 @@ INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b,     \
                                                                          \
  float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM)  \
  {                                                                       \
-    return float ## s ## _minmax(a, b, 1 STATUS_VAR);                   \
+    return float ## s ## _minmax(a, b, 1, 0 STATUS_VAR);                \
  }                                                                       \
                                                                          \
  float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM)  \
  {                                                                       \
-    return float ## s ## _minmax(a, b, 0 STATUS_VAR);                   \
+    return float ## s ## _minmax(a, b, 0, 0 STATUS_VAR);                \
+}                                                                       \
+                                                                        \
+float ## s float ## s ## _minnum(float ## s a, float ## s b STATUS_PARAM) \
+{                                                                       \
+    return float ## s ## _minmax(a, b, 1, 1 STATUS_VAR);                \
+}                                                                       \
+                                                                        \
+float ## s float ## s ## _maxnum(float ## s a, float ## s b STATUS_PARAM) \
+{                                                                       \
+    return float ## s ## _minmax(a, b, 0, 1 STATUS_VAR);                \
  }
  
-MINMAX(32, 0xff)
-MINMAX(64, 0x7ff)
+MINMAX(32)
+MINMAX(64)
  
  
  /* Multiply A by 2 raised to the power N.  */
@@ -6748,10 +7018,13 @@ float32 float32_scalbn( float32 a, int n STATUS_PARAM )
          }
          return a;
      }
-    if ( aExp != 0 )
+    if (aExp != 0) {
          aSig |= 0x00800000;
-    else if ( aSig == 0 )
+    } else if (aSig == 0) {
          return a;
+    } else {
+        aExp++;
+    }
  
      if (n > 0x200) {
          n = 0x200;
@@ -6781,10 +7054,13 @@ float64 float64_scalbn( float64 a, int n STATUS_PARAM )
          }
          return a;
      }
-    if ( aExp != 0 )
+    if (aExp != 0) {
          aSig |= LIT64( 0x0010000000000000 );
-    else if ( aSig == 0 )
+    } else if (aSig == 0) {
          return a;
+    } else {
+        aExp++;
+    }
  
      if (n > 0x1000) {
          n = 0x1000;
@@ -6814,8 +7090,12 @@ floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
          return a;
      }
  
-    if (aExp == 0 && aSig == 0)
-        return a;
+    if (aExp == 0) {
+        if (aSig == 0) {
+            return a;
+        }
+        aExp++;
+    }
  
      if (n > 0x10000) {
          n = 0x10000;
@@ -6844,10 +7124,13 @@ float128 float128_scalbn( float128 a, int n STATUS_PARAM )
          }
          return a;
      }
-    if ( aExp != 0 )
+    if (aExp != 0) {
          aSig0 |= LIT64( 0x0001000000000000 );
-    else if ( aSig0 == 0 && aSig1 == 0 )
+    } else if (aSig0 == 0 && aSig1 == 0) {
          return a;
+    } else {
+        aExp++;
+    }
  
      if (n > 0x10000) {
          n = 0x10000;