*/
#include "config.h"
-#include "softfloat.h"
+#include "fpu/softfloat.h"
/*----------------------------------------------------------------------------
| Primitive arithmetic functions, including multi-word arithmetic, and
*----------------------------------------------------------------------------*/
#include "softfloat-specialize.h"
-void set_float_rounding_mode(int val STATUS_PARAM)
-{
- STATUS(float_rounding_mode) = val;
-}
-
-void set_float_exception_flags(int val STATUS_PARAM)
-{
- STATUS(float_exception_flags) = val;
-}
-
-void set_floatx80_rounding_precision(int val STATUS_PARAM)
-{
- STATUS(floatx80_rounding_precision) = val;
-}
-
/*----------------------------------------------------------------------------
| Returns the fraction bits of the half-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
}
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit unsigned integer corresponding to the
+| input. Ordinarily, the fixed-point input is simply rounded to an integer,
+| with the inexact exception raised if the input cannot be represented exactly
+| as an integer. However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackUint64(flag zSign, uint64_t absZ0,
+ uint64_t absZ1 STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven, increment;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = (roundingMode == float_round_nearest_even);
+ increment = ((int64_t)absZ1 < 0);
+ if (!roundNearestEven) {
+ if (roundingMode == float_round_to_zero) {
+ increment = 0;
+ } else if (absZ1) {
+ if (zSign) {
+ increment = (roundingMode == float_round_down) && absZ1;
+ } else {
+ increment = (roundingMode == float_round_up) && absZ1;
+ }
+ }
+ }
+ if (increment) {
+ ++absZ0;
+ if (absZ0 == 0) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ }
+ absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
+ }
+
+ if (zSign && absZ0) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+
+ if (absZ1) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return absZ0;
+}
+
/*----------------------------------------------------------------------------
| Returns the fraction bits of the single-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float32 int32_to_float32( int32 a STATUS_PARAM )
+float32 int32_to_float32(int32_t a STATUS_PARAM)
{
flag zSign;
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float64 int32_to_float64( int32 a STATUS_PARAM )
+float64 int32_to_float64(int32_t a STATUS_PARAM)
{
flag zSign;
uint32 absA;
| Arithmetic.
*----------------------------------------------------------------------------*/
-floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
+floatx80 int32_to_floatx80(int32_t a STATUS_PARAM)
{
flag zSign;
uint32 absA;
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float128 int32_to_float128( int32 a STATUS_PARAM )
+float128 int32_to_float128(int32_t a STATUS_PARAM)
{
flag zSign;
uint32 absA;
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float32 int64_to_float32( int64 a STATUS_PARAM )
+float32 int64_to_float32(int64_t a STATUS_PARAM)
{
flag zSign;
uint64 absA;
}
-float32 uint64_to_float32( uint64 a STATUS_PARAM )
+float32 uint64_to_float32(uint64_t a STATUS_PARAM)
{
int8 shiftCount;
if ( a == 0 ) return float32_zero;
shiftCount = countLeadingZeros64( a ) - 40;
if ( 0 <= shiftCount ) {
- return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
+ return packFloat32(0, 0x95 - shiftCount, a<<shiftCount);
}
else {
shiftCount += 7;
else {
a <<= shiftCount;
}
- return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
+ return roundAndPackFloat32(0, 0x9C - shiftCount, a STATUS_VAR);
}
}
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float64 int64_to_float64( int64 a STATUS_PARAM )
+float64 int64_to_float64(int64_t a STATUS_PARAM)
{
flag zSign;
}
-float64 uint64_to_float64( uint64 a STATUS_PARAM )
+float64 uint64_to_float64(uint64_t a STATUS_PARAM)
{
- if ( a == 0 ) return float64_zero;
- return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
+ int exp = 0x43C;
+ if (a == 0) {
+ return float64_zero;
+ }
+ if ((int64_t)a < 0) {
+ shift64RightJamming(a, 1, &a);
+ exp += 1;
+ }
+ return normalizeRoundAndPackFloat64(0, exp, a STATUS_VAR);
}
/*----------------------------------------------------------------------------
| Arithmetic.
*----------------------------------------------------------------------------*/
-floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
+floatx80 int64_to_floatx80(int64_t a STATUS_PARAM)
{
flag zSign;
uint64 absA;
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-float128 int64_to_float128( int64 a STATUS_PARAM )
+float128 int64_to_float128(int64_t a STATUS_PARAM)
{
flag zSign;
uint64 absA;
}
+float128 uint64_to_float128(uint64_t a STATUS_PARAM)
+{
+ if (a == 0) {
+ return float128_zero;
+ }
+ return normalizeRoundAndPackFloat128(0, 0x406E, a, 0 STATUS_VAR);
+}
+
/*----------------------------------------------------------------------------
| Returns the result of converting the single-precision floating-point value
| `a' to the 32-bit two's complement integer format. The conversion is
}
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit unsigned integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| unsigned integer is returned. Otherwise, if the conversion overflows, the
+| largest unsigned integer is returned. If the 'a' is negative, the result
+| is rounded and zero is returned; values that do not round to zero will
+| raise the inexact exception flag.
+*----------------------------------------------------------------------------*/
+
+uint64 float32_to_uint64(float32 a STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint32_t aSig;
+ uint64_t aSig64, aSigExtra;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat32Frac(a);
+ aExp = extractFloat32Exp(a);
+ aSign = extractFloat32Sign(a);
+ if ((aSign) && (aExp > 126)) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ if (float32_is_any_nan(a)) {
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ } else {
+ return 0;
+ }
+ }
+ shiftCount = 0xBE - aExp;
+ if (aExp) {
+ aSig |= 0x00800000;
+ }
+ if (shiftCount < 0) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ }
+
+ aSig64 = aSig;
+ aSig64 <<= 40;
+ shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
+ return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR);
+}
+
/*----------------------------------------------------------------------------
| Returns the result of converting the single-precision floating-point value
| `a' to the 64-bit two's complement integer format. The conversion is
}
}
/* Zero plus something non-zero : just return the something */
- return make_float32(float32_val(c) ^ (signflip << 31));
+ return packFloat32(cSign ^ signflip, cExp, cSig);
}
if (aExp == 0) {
if (aSig) {
return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
}
- return packFloat32(aSign, 0xff, aSig << 13);
+ return packFloat32(aSign, 0xff, 0);
}
if (aExp == 0) {
int8 shiftCount;
uint32_t mask;
uint32_t increment;
int8 roundingMode;
+ int maxexp = ieee ? 15 : 16;
+ bool rounding_bumps_exp;
+ bool is_tiny = false;
+
a = float32_squash_input_denormal(a STATUS_VAR);
aSig = extractFloat32Frac( a );
if ( aExp == 0xFF ) {
if (aSig) {
/* Input is a NaN */
- float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
if (!ieee) {
+ float_raise(float_flag_invalid STATUS_VAR);
return packFloat16(aSign, 0, 0);
}
- return r;
+ return commonNaNToFloat16(
+ float32ToCommonNaN(a STATUS_VAR) STATUS_VAR);
}
/* Infinity */
if (!ieee) {
if (aExp == 0 && aSig == 0) {
return packFloat16(aSign, 0, 0);
}
- /* Decimal point between bits 22 and 23. */
+ /* Decimal point between bits 22 and 23. Note that we add the 1 bit
+ * even if the input is denormal; however this is harmless because
+ * the largest possible single-precision denormal is still smaller
+ * than the smallest representable half-precision denormal, and so we
+ * will end up ignoring aSig and returning via the "always return zero"
+ * codepath.
+ */
aSig |= 0x00800000;
aExp -= 0x7f;
+ /* Calculate the mask of bits of the mantissa which are not
+ * representable in half-precision and will be lost.
+ */
if (aExp < -14) {
+ /* Will be denormal in halfprec */
mask = 0x00ffffff;
if (aExp >= -24) {
mask >>= 25 + aExp;
}
} else {
+ /* Normal number in halfprec */
mask = 0x00001fff;
}
- if (aSig & mask) {
- float_raise( float_flag_underflow STATUS_VAR );
- roundingMode = STATUS(float_rounding_mode);
- switch (roundingMode) {
- case float_round_nearest_even:
- increment = (mask + 1) >> 1;
- if ((aSig & mask) == increment) {
- increment = aSig & (increment << 1);
- }
- break;
- case float_round_up:
- increment = aSign ? 0 : mask;
- break;
- case float_round_down:
- increment = aSign ? mask : 0;
- break;
- default: /* round_to_zero */
- increment = 0;
- break;
- }
- aSig += increment;
- if (aSig >= 0x01000000) {
- aSig >>= 1;
- aExp++;
- }
- } else if (aExp < -14
- && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
- float_raise( float_flag_underflow STATUS_VAR);
- }
- if (ieee) {
- if (aExp > 15) {
- float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ roundingMode = STATUS(float_rounding_mode);
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ increment = (mask + 1) >> 1;
+ if ((aSig & mask) == increment) {
+ increment = aSig & (increment << 1);
+ }
+ break;
+ case float_round_up:
+ increment = aSign ? 0 : mask;
+ break;
+ case float_round_down:
+ increment = aSign ? mask : 0;
+ break;
+ default: /* round_to_zero */
+ increment = 0;
+ break;
+ }
+
+ rounding_bumps_exp = (aSig + increment >= 0x01000000);
+
+ if (aExp > maxexp || (aExp == maxexp && rounding_bumps_exp)) {
+ if (ieee) {
+ float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
return packFloat16(aSign, 0x1f, 0);
- }
- } else {
- if (aExp > 16) {
- float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
+ } else {
+ float_raise(float_flag_invalid STATUS_VAR);
return packFloat16(aSign, 0x1f, 0x3ff);
}
}
+
+ if (aExp < -14) {
+ /* Note that flush-to-zero does not affect half-precision results */
+ is_tiny =
+ (STATUS(float_detect_tininess) == float_tininess_before_rounding)
+ || (aExp < -15)
+ || (!rounding_bumps_exp);
+ }
+ if (aSig & mask) {
+ float_raise(float_flag_inexact STATUS_VAR);
+ if (is_tiny) {
+ float_raise(float_flag_underflow STATUS_VAR);
+ }
+ }
+
+ aSig += increment;
+ if (rounding_bumps_exp) {
+ aSig >>= 1;
+ aExp++;
+ }
+
if (aExp < -24) {
return packFloat16(aSign, 0, 0);
}
}
}
/* Zero plus something non-zero : just return the something */
- return make_float64(float64_val(c) ^ ((uint64_t)signflip << 63));
+ return packFloat64(cSign ^ signflip, cExp, cSig);
}
if (aExp == 0) {
}
zExp -= shiftcount;
} else {
- shiftcount = countLeadingZeros64(zSig1) - 1;
- zSig0 = zSig1 << shiftcount;
- zExp -= (shiftcount + 64);
+ shiftcount = countLeadingZeros64(zSig1);
+ if (shiftcount == 0) {
+ zSig0 = (zSig1 >> 1) | (zSig1 & 1);
+ zExp -= 63;
+ } else {
+ shiftcount--;
+ zSig0 = zSig1 << shiftcount;
+ zExp -= (shiftcount + 64);
+ }
}
return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
}
}
/* misc functions */
-float32 uint32_to_float32( uint32 a STATUS_PARAM )
+float32 uint32_to_float32(uint32_t a STATUS_PARAM)
{
return int64_to_float32(a STATUS_VAR);
}
-float64 uint32_to_float64( uint32 a STATUS_PARAM )
+float64 uint32_to_float64(uint32_t a STATUS_PARAM)
{
return int64_to_float64(a STATUS_VAR);
}
{
int64_t v;
uint32 res;
+ int old_exc_flags = get_float_exception_flags(status);
v = float32_to_int64(a STATUS_VAR);
if (v < 0) {
res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
} else if (v > 0xffffffff) {
res = 0xffffffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
}
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
{
int64_t v;
uint32 res;
+ int old_exc_flags = get_float_exception_flags(status);
v = float32_to_int64_round_to_zero(a STATUS_VAR);
if (v < 0) {
res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
} else if (v > 0xffffffff) {
res = 0xffffffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
+ }
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
+ return res;
+}
+
+int_fast16_t float32_to_int16(float32 a STATUS_PARAM)
+{
+ int32_t v;
+ int_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
+
+ v = float32_to_int32(a STATUS_VAR);
+ if (v < -0x8000) {
+ res = -0x8000;
+ } else if (v > 0x7fff) {
+ res = 0x7fff;
+ } else {
+ return v;
+ }
+
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
+ return res;
+}
+
+uint_fast16_t float32_to_uint16(float32 a STATUS_PARAM)
+{
+ int32_t v;
+ uint_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
+
+ v = float32_to_int32(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ } else if (v > 0xffff) {
+ res = 0xffff;
+ } else {
+ return v;
}
+
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
{
int64_t v;
uint_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
v = float32_to_int64_round_to_zero(a STATUS_VAR);
if (v < 0) {
res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
} else if (v > 0xffff) {
res = 0xffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
}
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
uint32 float64_to_uint32( float64 a STATUS_PARAM )
{
- int64_t v;
+ uint64_t v;
uint32 res;
+ int old_exc_flags = get_float_exception_flags(status);
- v = float64_to_int64(a STATUS_VAR);
- if (v < 0) {
- res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
- } else if (v > 0xffffffff) {
+ v = float64_to_uint64(a STATUS_VAR);
+ if (v > 0xffffffff) {
res = 0xffffffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
}
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
{
- int64_t v;
+ uint64_t v;
uint32 res;
+ int old_exc_flags = get_float_exception_flags(status);
- v = float64_to_int64_round_to_zero(a STATUS_VAR);
+ v = float64_to_uint64_round_to_zero(a STATUS_VAR);
+ if (v > 0xffffffff) {
+ res = 0xffffffff;
+ } else {
+ return v;
+ }
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
+ return res;
+}
+
+int_fast16_t float64_to_int16(float64 a STATUS_PARAM)
+{
+ int64_t v;
+ int_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
+
+ v = float64_to_int32(a STATUS_VAR);
+ if (v < -0x8000) {
+ res = -0x8000;
+ } else if (v > 0x7fff) {
+ res = 0x7fff;
+ } else {
+ return v;
+ }
+
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
+ return res;
+}
+
+uint_fast16_t float64_to_uint16(float64 a STATUS_PARAM)
+{
+ int64_t v;
+ uint_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
+
+ v = float64_to_int32(a STATUS_VAR);
if (v < 0) {
res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
- } else if (v > 0xffffffff) {
- res = 0xffffffff;
- float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffff) {
+ res = 0xffff;
} else {
- res = v;
+ return v;
}
+
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
{
int64_t v;
uint_fast16_t res;
+ int old_exc_flags = get_float_exception_flags(status);
v = float64_to_int64_round_to_zero(a STATUS_VAR);
if (v < 0) {
res = 0;
- float_raise( float_flag_invalid STATUS_VAR);
} else if (v > 0xffff) {
res = 0xffff;
- float_raise( float_flag_invalid STATUS_VAR);
} else {
- res = v;
+ return v;
}
+ set_float_exception_flags(old_exc_flags, status);
+ float_raise(float_flag_invalid STATUS_VAR);
return res;
}
-/* FIXME: This looks broken. */
-uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
-{
- int64_t v;
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit unsigned integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. If the conversion overflows, the
+| largest unsigned integer is returned. If 'a' is negative, the value is
+| rounded and zero is returned; negative values that do not round to zero
+| will raise the inexact exception.
+*----------------------------------------------------------------------------*/
- v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
- v += float64_val(a);
- v = float64_to_int64(make_float64(v) STATUS_VAR);
+uint64_t float64_to_uint64(float64 a STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint64_t aSig, aSigExtra;
+ a = float64_squash_input_denormal(a STATUS_VAR);
- return v - INT64_MIN;
+ aSig = extractFloat64Frac(a);
+ aExp = extractFloat64Exp(a);
+ aSign = extractFloat64Sign(a);
+ if (aSign && (aExp > 1022)) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ if (float64_is_any_nan(a)) {
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ } else {
+ return 0;
+ }
+ }
+ if (aExp) {
+ aSig |= LIT64(0x0010000000000000);
+ }
+ shiftCount = 0x433 - aExp;
+ if (shiftCount <= 0) {
+ if (0x43E < aExp) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return LIT64(0xFFFFFFFFFFFFFFFF);
+ }
+ aSigExtra = 0;
+ aSig <<= -shiftCount;
+ } else {
+ shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
+ }
+ return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR);
}
uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
{
- int64_t v;
-
- v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
- v += float64_val(a);
- v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
-
- return v - INT64_MIN;
+ signed char current_rounding_mode = STATUS(float_rounding_mode);
+ set_float_rounding_mode(float_round_to_zero STATUS_VAR);
+ int64_t v = float64_to_uint64(a STATUS_VAR);
+ set_float_rounding_mode(current_rounding_mode STATUS_VAR);
+ return v;
}
#define COMPARE(s, nan_exp) \
/* min() and max() functions. These can't be implemented as
* 'compare and pick one input' because that would mishandle
* NaNs and +0 vs -0.
+ *
+ * minnum() and maxnum() functions. These are similar to the min()
+ * and max() functions but if one of the arguments is a QNaN and
+ * the other is numerical then the numerical argument is returned.
+ * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
+ * and maxNum() operations. min() and max() are the typical min/max
+ * semantics provided by many CPUs which predate that specification.
*/
-#define MINMAX(s, nan_exp) \
+#define MINMAX(s) \
INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \
- int ismin STATUS_PARAM ) \
+ int ismin, int isieee STATUS_PARAM) \
{ \
flag aSign, bSign; \
uint ## s ## _t av, bv; \
b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
if (float ## s ## _is_any_nan(a) || \
float ## s ## _is_any_nan(b)) { \
+ if (isieee) { \
+ if (float ## s ## _is_quiet_nan(a) && \
+ !float ## s ##_is_any_nan(b)) { \
+ return b; \
+ } else if (float ## s ## _is_quiet_nan(b) && \
+ !float ## s ## _is_any_nan(a)) { \
+ return a; \
+ } \
+ } \
return propagateFloat ## s ## NaN(a, b STATUS_VAR); \
} \
aSign = extractFloat ## s ## Sign(a); \
\
float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM) \
{ \
- return float ## s ## _minmax(a, b, 1 STATUS_VAR); \
+ return float ## s ## _minmax(a, b, 1, 0 STATUS_VAR); \
} \
\
float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \
{ \
- return float ## s ## _minmax(a, b, 0 STATUS_VAR); \
+ return float ## s ## _minmax(a, b, 0, 0 STATUS_VAR); \
+} \
+ \
+float ## s float ## s ## _minnum(float ## s a, float ## s b STATUS_PARAM) \
+{ \
+ return float ## s ## _minmax(a, b, 1, 1 STATUS_VAR); \
+} \
+ \
+float ## s float ## s ## _maxnum(float ## s a, float ## s b STATUS_PARAM) \
+{ \
+ return float ## s ## _minmax(a, b, 0, 1 STATUS_VAR); \
}
-MINMAX(32, 0xff)
-MINMAX(64, 0x7ff)
+MINMAX(32)
+MINMAX(64)
/* Multiply A by 2 raised to the power N. */
}
return a;
}
- if ( aExp != 0 )
+ if (aExp != 0) {
aSig |= 0x00800000;
- else if ( aSig == 0 )
+ } else if (aSig == 0) {
return a;
+ } else {
+ aExp++;
+ }
if (n > 0x200) {
n = 0x200;
}
return a;
}
- if ( aExp != 0 )
+ if (aExp != 0) {
aSig |= LIT64( 0x0010000000000000 );
- else if ( aSig == 0 )
+ } else if (aSig == 0) {
return a;
+ } else {
+ aExp++;
+ }
if (n > 0x1000) {
n = 0x1000;
return a;
}
- if (aExp == 0 && aSig == 0)
- return a;
+ if (aExp == 0) {
+ if (aSig == 0) {
+ return a;
+ }
+ aExp++;
+ }
if (n > 0x10000) {
n = 0x10000;
}
return a;
}
- if ( aExp != 0 )
+ if (aExp != 0) {
aSig0 |= LIT64( 0x0001000000000000 );
- else if ( aSig0 == 0 && aSig1 == 0 )
+ } else if (aSig0 == 0 && aSig1 == 0) {
return a;
+ } else {
+ aExp++;
+ }
if (n > 0x10000) {
n = 0x10000;