return float64_is_infinity(a.s);
}
-/* Note: @fast_test and @post can be NULL */
static inline float32
float32_gen2(float32 xa, float32 xb, float_status *s,
hard_f32_op2_fn hard, soft_f32_op2_fn soft,
- f32_check_fn pre, f32_check_fn post,
- f32_check_fn fast_test, soft_f32_op2_fn fast_op)
+ f32_check_fn pre, f32_check_fn post)
{
union_float32 ua, ub, ur;
if (unlikely(!pre(ua, ub))) {
goto soft;
}
- if (fast_test && fast_test(ua, ub)) {
- return fast_op(ua.s, ub.s, s);
- }
ur.h = hard(ua.h, ub.h);
if (unlikely(f32_is_inf(ur))) {
s->float_exception_flags |= float_flag_overflow;
- } else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
- if (post == NULL || post(ua, ub)) {
- goto soft;
- }
+ } else if (unlikely(fabsf(ur.h) <= FLT_MIN) && post(ua, ub)) {
+ goto soft;
}
return ur.s;
static inline float64
float64_gen2(float64 xa, float64 xb, float_status *s,
hard_f64_op2_fn hard, soft_f64_op2_fn soft,
- f64_check_fn pre, f64_check_fn post,
- f64_check_fn fast_test, soft_f64_op2_fn fast_op)
+ f64_check_fn pre, f64_check_fn post)
{
union_float64 ua, ub, ur;
if (unlikely(!pre(ua, ub))) {
goto soft;
}
- if (fast_test && fast_test(ua, ub)) {
- return fast_op(ua.s, ub.s, s);
- }
ur.h = hard(ua.h, ub.h);
if (unlikely(f64_is_inf(ur))) {
s->float_exception_flags |= float_flag_overflow;
- } else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
- if (post == NULL || post(ua, ub)) {
- goto soft;
- }
+ } else if (unlikely(fabs(ur.h) <= DBL_MIN) && post(ua, ub)) {
+ goto soft;
}
return ur.s;
return soft(ua.s, ub.s, s);
}
-/*----------------------------------------------------------------------------
-| Returns the fraction bits of the half-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-static inline uint32_t extractFloat16Frac(float16 a)
-{
- return float16_val(a) & 0x3ff;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the exponent bits of the half-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-static inline int extractFloat16Exp(float16 a)
-{
- return (float16_val(a) >> 10) & 0x1f;
-}
-
/*----------------------------------------------------------------------------
| Returns the fraction bits of the single-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
| Returns the sign bit of the single-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
-static inline flag extractFloat32Sign(float32 a)
+static inline bool extractFloat32Sign(float32 a)
{
return float32_val(a) >> 31;
}
static inline uint64_t extractFloat64Frac(float64 a)
{
- return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
+ return float64_val(a) & UINT64_C(0x000FFFFFFFFFFFFF);
}
/*----------------------------------------------------------------------------
| Returns the sign bit of the double-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
-static inline flag extractFloat64Sign(float64 a)
+static inline bool extractFloat64Sign(float64 a)
{
return float64_val(a) >> 63;
}
| are propagated from function inputs to output. These details are target-
| specific.
*----------------------------------------------------------------------------*/
-#include "softfloat-specialize.h"
+#include "softfloat-specialize.inc.c"
/* Canonicalize EXP and FRAC, setting CLS. */
static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
p.cls = float_class_zero;
goto do_zero;
} else {
- bool is_tiny = (s->float_detect_tininess
- == float_tininess_before_rounding)
+ bool is_tiny = s->tininess_before_rounding
|| (exp < 0)
|| !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
case float_round_to_odd:
inc = frac & frac_lsb ? 0 : round_mask;
break;
+ default:
+ break;
}
flags |= float_flag_inexact;
frac += inc;
return a - b;
}
-static bool f32_addsub_post(union_float32 a, union_float32 b)
+static bool f32_addsubmul_post(union_float32 a, union_float32 b)
{
if (QEMU_HARDFLOAT_2F32_USE_FP) {
return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
return !(float32_is_zero(a.s) && float32_is_zero(b.s));
}
-static bool f64_addsub_post(union_float64 a, union_float64 b)
+static bool f64_addsubmul_post(union_float64 a, union_float64 b)
{
if (QEMU_HARDFLOAT_2F64_USE_FP) {
return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
hard_f32_op2_fn hard, soft_f32_op2_fn soft)
{
return float32_gen2(a, b, s, hard, soft,
- f32_is_zon2, f32_addsub_post, NULL, NULL);
+ f32_is_zon2, f32_addsubmul_post);
}
static float64 float64_addsub(float64 a, float64 b, float_status *s,
hard_f64_op2_fn hard, soft_f64_op2_fn soft)
{
return float64_gen2(a, b, s, hard, soft,
- f64_is_zon2, f64_addsub_post, NULL, NULL);
+ f64_is_zon2, f64_addsubmul_post);
}
float32 QEMU_FLATTEN
return a * b;
}
-static bool f32_mul_fast_test(union_float32 a, union_float32 b)
-{
- return float32_is_zero(a.s) || float32_is_zero(b.s);
-}
-
-static bool f64_mul_fast_test(union_float64 a, union_float64 b)
-{
- return float64_is_zero(a.s) || float64_is_zero(b.s);
-}
-
-static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
-{
- bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
-
- return float32_set_sign(float32_zero, signbit);
-}
-
-static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
-{
- bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
-
- return float64_set_sign(float64_zero, signbit);
-}
-
float32 QEMU_FLATTEN
float32_mul(float32 a, float32 b, float_status *s)
{
return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
- f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
+ f32_is_zon2, f32_addsubmul_post);
}
float64 QEMU_FLATTEN
float64_mul(float64 a, float64 b, float_status *s)
{
return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
- f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
+ f64_is_zon2, f64_addsubmul_post);
}
/*
float32_div(float32 a, float32 b, float_status *s)
{
return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
- f32_div_pre, f32_div_post, NULL, NULL);
+ f32_div_pre, f32_div_post);
}
float64 QEMU_FLATTEN
float64_div(float64 a, float64 b, float_status *s)
{
return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
- f64_div_pre, f64_div_post, NULL, NULL);
+ f64_div_pre, f64_div_post);
}
/*
return float16a_round_pack_canonical(pr, s, fmt16);
}
-float64 float32_to_float64(float32 a, float_status *s)
+static float64 QEMU_SOFTFLOAT_ATTR
+soft_float32_to_float64(float32 a, float_status *s)
{
FloatParts p = float32_unpack_canonical(a, s);
FloatParts pr = float_to_float(p, &float64_params, s);
return float64_round_pack_canonical(pr, s);
}
+float64 float32_to_float64(float32 a, float_status *s)
+{
+ if (likely(float32_is_normal(a))) {
+ /* Widening conversion can never produce inexact results. */
+ union_float32 uf;
+ union_float64 ud;
+ uf.s = a;
+ ud.h = uf.h;
+ return ud.s;
+ } else if (float32_is_zero(a)) {
+ return float64_set_sign(float64_zero, float32_is_neg(a));
+ } else {
+ return soft_float32_to_float64(a, s);
+ }
+}
+
float16 float64_to_float16(float64 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
* Arithmetic.
*/
-static FloatParts round_to_int(FloatParts a, int rmode,
+static FloatParts round_to_int(FloatParts a, FloatRoundMode rmode,
int scale, float_status *s)
{
switch (a.cls) {
* is returned.
*/
-static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
- int64_t min, int64_t max,
+static int64_t round_to_int_and_pack(FloatParts in, FloatRoundMode rmode,
+ int scale, int64_t min, int64_t max,
float_status *s)
{
uint64_t r;
}
}
-int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
+int16_t float16_to_int16_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_int_and_pack(float16_unpack_canonical(a, s),
rmode, scale, INT16_MIN, INT16_MAX, s);
}
-int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
+int32_t float16_to_int32_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_int_and_pack(float16_unpack_canonical(a, s),
rmode, scale, INT32_MIN, INT32_MAX, s);
}
-int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
+int64_t float16_to_int64_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_int_and_pack(float16_unpack_canonical(a, s),
rmode, scale, INT64_MIN, INT64_MAX, s);
}
-int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
+int16_t float32_to_int16_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_int_and_pack(float32_unpack_canonical(a, s),
rmode, scale, INT16_MIN, INT16_MAX, s);
}
-int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
+int32_t float32_to_int32_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_int_and_pack(float32_unpack_canonical(a, s),
rmode, scale, INT32_MIN, INT32_MAX, s);
}
-int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
+int64_t float32_to_int64_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_int_and_pack(float32_unpack_canonical(a, s),
rmode, scale, INT64_MIN, INT64_MAX, s);
}
-int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
+int16_t float64_to_int16_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_int_and_pack(float64_unpack_canonical(a, s),
rmode, scale, INT16_MIN, INT16_MAX, s);
}
-int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
+int32_t float64_to_int32_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_int_and_pack(float64_unpack_canonical(a, s),
rmode, scale, INT32_MIN, INT32_MAX, s);
}
-int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
+int64_t float64_to_int64_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_int_and_pack(float64_unpack_canonical(a, s),
* flag.
*/
-static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
- uint64_t max, float_status *s)
+static uint64_t round_to_uint_and_pack(FloatParts in, FloatRoundMode rmode,
+ int scale, uint64_t max,
+ float_status *s)
{
int orig_flags = get_float_exception_flags(s);
FloatParts p = round_to_int(in, rmode, scale, s);
}
}
-uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
+uint16_t float16_to_uint16_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_uint_and_pack(float16_unpack_canonical(a, s),
rmode, scale, UINT16_MAX, s);
}
-uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
+uint32_t float16_to_uint32_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_uint_and_pack(float16_unpack_canonical(a, s),
rmode, scale, UINT32_MAX, s);
}
-uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
+uint64_t float16_to_uint64_scalbn(float16 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_uint_and_pack(float16_unpack_canonical(a, s),
rmode, scale, UINT64_MAX, s);
}
-uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
+uint16_t float32_to_uint16_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_uint_and_pack(float32_unpack_canonical(a, s),
rmode, scale, UINT16_MAX, s);
}
-uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
+uint32_t float32_to_uint32_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_uint_and_pack(float32_unpack_canonical(a, s),
rmode, scale, UINT32_MAX, s);
}
-uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
+uint64_t float32_to_uint64_scalbn(float32 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_uint_and_pack(float32_unpack_canonical(a, s),
rmode, scale, UINT64_MAX, s);
}
-uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
+uint16_t float64_to_uint16_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_uint_and_pack(float64_unpack_canonical(a, s),
rmode, scale, UINT16_MAX, s);
}
-uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
+uint32_t float64_to_uint32_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_uint_and_pack(float64_unpack_canonical(a, s),
rmode, scale, UINT32_MAX, s);
}
-uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
+uint64_t float64_to_uint64_scalbn(float64 a, FloatRoundMode rmode, int scale,
float_status *s)
{
return round_to_uint_and_pack(float64_unpack_canonical(a, s),
#undef MINMAX
/* Floating point compare */
-static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
- float_status *s)
+static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
+ float_status *s)
{
if (is_nan(a.cls) || is_nan(b.cls)) {
if (!is_quiet ||
#undef COMPARE
-int float16_compare(float16 a, float16 b, float_status *s)
+FloatRelation float16_compare(float16 a, float16 b, float_status *s)
{
return soft_f16_compare(a, b, false, s);
}
-int float16_compare_quiet(float16 a, float16 b, float_status *s)
+FloatRelation float16_compare_quiet(float16 a, float16 b, float_status *s)
{
return soft_f16_compare(a, b, true, s);
}
-static int QEMU_FLATTEN
+static FloatRelation QEMU_FLATTEN
f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
{
union_float32 ua, ub;
return soft_f32_compare(ua.s, ub.s, is_quiet, s);
}
-int float32_compare(float32 a, float32 b, float_status *s)
+FloatRelation float32_compare(float32 a, float32 b, float_status *s)
{
return f32_compare(a, b, false, s);
}
-int float32_compare_quiet(float32 a, float32 b, float_status *s)
+FloatRelation float32_compare_quiet(float32 a, float32 b, float_status *s)
{
return f32_compare(a, b, true, s);
}
-static int QEMU_FLATTEN
+static FloatRelation QEMU_FLATTEN
f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
{
union_float64 ua, ub;
return soft_f64_compare(ua.s, ub.s, is_quiet, s);
}
-int float64_compare(float64 a, float64 b, float_status *s)
+FloatRelation float64_compare(float64 a, float64 b, float_status *s)
{
return f64_compare(a, b, false, s);
}
-int float64_compare_quiet(float64 a, float64 b, float_status *s)
+FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
{
return f64_compare(a, b, true, s);
}
*/
r.low = -(p.frac & 1);
r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
- r.high |= LIT64(0x7FFF000000000000);
+ r.high |= UINT64_C(0x7FFF000000000000);
r.high |= (uint64_t)p.sign << 63;
return r;
return float64_pack_raw(p);
}
+
+/*----------------------------------------------------------------------------
+| If `a' is denormal and we are in flush-to-zero mode then set the
+| input-denormal exception and return zero. Otherwise just return the value.
+*----------------------------------------------------------------------------*/
+
+static bool parts_squash_denormal(FloatParts p, float_status *status)
+{
+ if (p.exp == 0 && p.frac != 0) {
+ float_raise(float_flag_input_denormal, status);
+ return true;
+ }
+
+ return false;
+}
+
+float16 float16_squash_input_denormal(float16 a, float_status *status)
+{
+ if (status->flush_inputs_to_zero) {
+ FloatParts p = float16_unpack_raw(a);
+ if (parts_squash_denormal(p, status)) {
+ return float16_set_sign(float16_zero, p.sign);
+ }
+ }
+ return a;
+}
+
+float32 float32_squash_input_denormal(float32 a, float_status *status)
+{
+ if (status->flush_inputs_to_zero) {
+ FloatParts p = float32_unpack_raw(a);
+ if (parts_squash_denormal(p, status)) {
+ return float32_set_sign(float32_zero, p.sign);
+ }
+ }
+ return a;
+}
+
+float64 float64_squash_input_denormal(float64 a, float_status *status)
+{
+ if (status->flush_inputs_to_zero) {
+ FloatParts p = float64_unpack_raw(a);
+ if (parts_squash_denormal(p, status)) {
+ return float64_set_sign(float64_zero, p.sign);
+ }
+ }
+ return a;
+}
+
/*----------------------------------------------------------------------------
| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
| and 7, and returns the properly rounded 32-bit integer corresponding to the
| positive or negative integer is returned.
*----------------------------------------------------------------------------*/
-static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
+static int32_t roundAndPackInt32(bool zSign, uint64_t absZ,
+ float_status *status)
{
int8_t roundingMode;
- flag roundNearestEven;
+ bool roundNearestEven;
int8_t roundIncrement, roundBits;
int32_t z;
if ( zSign ) z = - z;
if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
float_raise(float_flag_invalid, status);
- return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
+ return zSign ? INT32_MIN : INT32_MAX;
}
if (roundBits) {
status->float_exception_flags |= float_flag_inexact;
| returned.
*----------------------------------------------------------------------------*/
-static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
+static int64_t roundAndPackInt64(bool zSign, uint64_t absZ0, uint64_t absZ1,
float_status *status)
{
int8_t roundingMode;
- flag roundNearestEven, increment;
+ bool roundNearestEven, increment;
int64_t z;
roundingMode = status->float_rounding_mode;
if ( z && ( ( z < 0 ) ^ zSign ) ) {
overflow:
float_raise(float_flag_invalid, status);
- return
- zSign ? (int64_t) LIT64( 0x8000000000000000 )
- : LIT64( 0x7FFFFFFFFFFFFFFF );
+ return zSign ? INT64_MIN : INT64_MAX;
}
if (absZ1) {
status->float_exception_flags |= float_flag_inexact;
| exception is raised and the largest unsigned integer is returned.
*----------------------------------------------------------------------------*/
-static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
+static int64_t roundAndPackUint64(bool zSign, uint64_t absZ0,
uint64_t absZ1, float_status *status)
{
int8_t roundingMode;
- flag roundNearestEven, increment;
+ bool roundNearestEven, increment;
roundingMode = status->float_rounding_mode;
roundNearestEven = (roundingMode == float_round_nearest_even);
++absZ0;
if (absZ0 == 0) {
float_raise(float_flag_invalid, status);
- return LIT64(0xFFFFFFFFFFFFFFFF);
+ return UINT64_MAX;
}
absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
}
return absZ0;
}
-/*----------------------------------------------------------------------------
-| If `a' is denormal and we are in flush-to-zero mode then set the
-| input-denormal exception and return zero. Otherwise just return the value.
-*----------------------------------------------------------------------------*/
-float32 float32_squash_input_denormal(float32 a, float_status *status)
-{
- if (status->flush_inputs_to_zero) {
- if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
- float_raise(float_flag_input_denormal, status);
- return make_float32(float32_val(a) & 0x80000000);
- }
- }
- return a;
-}
-
/*----------------------------------------------------------------------------
| Normalizes the subnormal single-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and
| Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
+static float32 roundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
float_status *status)
{
int8_t roundingMode;
- flag roundNearestEven;
+ bool roundNearestEven;
int8_t roundIncrement, roundBits;
- flag isTiny;
+ bool isTiny;
roundingMode = status->float_rounding_mode;
roundNearestEven = ( roundingMode == float_round_nearest_even );
float_raise(float_flag_output_denormal, status);
return packFloat32(zSign, 0, 0);
}
- isTiny =
- (status->float_detect_tininess
- == float_tininess_before_rounding)
- || ( zExp < -1 )
- || ( zSig + roundIncrement < 0x80000000 );
+ isTiny = status->tininess_before_rounding
+ || (zExp < -1)
+ || (zSig + roundIncrement < 0x80000000);
shift32RightJamming( zSig, - zExp, &zSig );
zExp = 0;
roundBits = zSig & 0x7F;
*----------------------------------------------------------------------------*/
static float32
- normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
+ normalizeRoundAndPackFloat32(bool zSign, int zExp, uint32_t zSig,
float_status *status)
{
int8_t shiftCount;
}
-/*----------------------------------------------------------------------------
-| If `a' is denormal and we are in flush-to-zero mode then set the
-| input-denormal exception and return zero. Otherwise just return the value.
-*----------------------------------------------------------------------------*/
-float64 float64_squash_input_denormal(float64 a, float_status *status)
-{
- if (status->flush_inputs_to_zero) {
- if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
- float_raise(float_flag_input_denormal, status);
- return make_float64(float64_val(a) & (1ULL << 63));
- }
- }
- return a;
-}
-
/*----------------------------------------------------------------------------
| Normalizes the subnormal double-precision floating-point value represented
| by the denormalized significand `aSig'. The normalized exponent and
| significand.
*----------------------------------------------------------------------------*/
-static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
+static inline float64 packFloat64(bool zSign, int zExp, uint64_t zSig)
{
return make_float64(
| Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
+static float64 roundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
float_status *status)
{
int8_t roundingMode;
- flag roundNearestEven;
+ bool roundNearestEven;
int roundIncrement, roundBits;
- flag isTiny;
+ bool isTiny;
roundingMode = status->float_rounding_mode;
roundNearestEven = ( roundingMode == float_round_nearest_even );
float_raise(float_flag_output_denormal, status);
return packFloat64(zSign, 0, 0);
}
- isTiny =
- (status->float_detect_tininess
- == float_tininess_before_rounding)
- || ( zExp < -1 )
- || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
+ isTiny = status->tininess_before_rounding
+ || (zExp < -1)
+ || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
shift64RightJamming( zSig, - zExp, &zSig );
zExp = 0;
roundBits = zSig & 0x3FF;
*----------------------------------------------------------------------------*/
static float64
- normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
+ normalizeRoundAndPackFloat64(bool zSign, int zExp, uint64_t zSig,
float_status *status)
{
int8_t shiftCount;
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
+floatx80 roundAndPackFloatx80(int8_t roundingPrecision, bool zSign,
int32_t zExp, uint64_t zSig0, uint64_t zSig1,
float_status *status)
{
int8_t roundingMode;
- flag roundNearestEven, increment, isTiny;
+ bool roundNearestEven, increment, isTiny;
int64_t roundIncrement, roundMask, roundBits;
roundingMode = status->float_rounding_mode;
roundNearestEven = ( roundingMode == float_round_nearest_even );
if ( roundingPrecision == 80 ) goto precision80;
if ( roundingPrecision == 64 ) {
- roundIncrement = LIT64( 0x0000000000000400 );
- roundMask = LIT64( 0x00000000000007FF );
+ roundIncrement = UINT64_C(0x0000000000000400);
+ roundMask = UINT64_C(0x00000000000007FF);
}
else if ( roundingPrecision == 32 ) {
- roundIncrement = LIT64( 0x0000008000000000 );
- roundMask = LIT64( 0x000000FFFFFFFFFF );
+ roundIncrement = UINT64_C(0x0000008000000000);
+ roundMask = UINT64_C(0x000000FFFFFFFFFF);
}
else {
goto precision80;
float_raise(float_flag_output_denormal, status);
return packFloatx80(zSign, 0, 0);
}
- isTiny =
- (status->float_detect_tininess
- == float_tininess_before_rounding)
- || ( zExp < 0 )
- || ( zSig0 <= zSig0 + roundIncrement );
+ isTiny = status->tininess_before_rounding
+ || (zExp < 0 )
+ || (zSig0 <= zSig0 + roundIncrement);
shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
zExp = 0;
roundBits = zSig0 & roundMask;
zSig0 += roundIncrement;
if ( zSig0 < roundIncrement ) {
++zExp;
- zSig0 = LIT64( 0x8000000000000000 );
+ zSig0 = UINT64_C(0x8000000000000000);
}
roundIncrement = roundMask + 1;
if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
if ( ( 0x7FFE < zExp )
|| ( ( zExp == 0x7FFE )
- && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
+ && ( zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF) )
&& increment
)
) {
floatx80_infinity_low);
}
if ( zExp <= 0 ) {
- isTiny =
- (status->float_detect_tininess
- == float_tininess_before_rounding)
- || ( zExp < 0 )
- || ! increment
- || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
+ isTiny = status->tininess_before_rounding
+ || (zExp < 0)
+ || !increment
+ || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
zExp = 0;
if (isTiny && zSig1) {
++zSig0;
if ( zSig0 == 0 ) {
++zExp;
- zSig0 = LIT64( 0x8000000000000000 );
+ zSig0 = UINT64_C(0x8000000000000000);
}
else {
zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
*----------------------------------------------------------------------------*/
floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
- flag zSign, int32_t zExp,
+ bool zSign, int32_t zExp,
uint64_t zSig0, uint64_t zSig1,
float_status *status)
{
static inline uint64_t extractFloat128Frac0( float128 a )
{
- return a.high & LIT64( 0x0000FFFFFFFFFFFF );
+ return a.high & UINT64_C(0x0000FFFFFFFFFFFF);
}
| Returns the sign bit of the quadruple-precision floating-point value `a'.
*----------------------------------------------------------------------------*/
-static inline flag extractFloat128Sign( float128 a )
+static inline bool extractFloat128Sign(float128 a)
{
-
- return a.high>>63;
-
+ return a.high >> 63;
}
/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
static inline float128
- packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
+packFloat128(bool zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
{
float128 z;
z.low = zSig1;
- z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
+ z.high = ((uint64_t)zSign << 63) + ((uint64_t)zExp << 48) + zSig0;
return z;
-
}
/*----------------------------------------------------------------------------
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
+static float128 roundAndPackFloat128(bool zSign, int32_t zExp,
uint64_t zSig0, uint64_t zSig1,
uint64_t zSig2, float_status *status)
{
int8_t roundingMode;
- flag roundNearestEven, increment, isTiny;
+ bool roundNearestEven, increment, isTiny;
roundingMode = status->float_rounding_mode;
roundNearestEven = ( roundingMode == float_round_nearest_even );
if ( ( 0x7FFD < zExp )
|| ( ( zExp == 0x7FFD )
&& eq128(
- LIT64( 0x0001FFFFFFFFFFFF ),
- LIT64( 0xFFFFFFFFFFFFFFFF ),
+ UINT64_C(0x0001FFFFFFFFFFFF),
+ UINT64_C(0xFFFFFFFFFFFFFFFF),
zSig0,
zSig1
)
packFloat128(
zSign,
0x7FFE,
- LIT64( 0x0000FFFFFFFFFFFF ),
- LIT64( 0xFFFFFFFFFFFFFFFF )
+ UINT64_C(0x0000FFFFFFFFFFFF),
+ UINT64_C(0xFFFFFFFFFFFFFFFF)
);
}
return packFloat128( zSign, 0x7FFF, 0, 0 );
float_raise(float_flag_output_denormal, status);
return packFloat128(zSign, 0, 0, 0);
}
- isTiny =
- (status->float_detect_tininess
- == float_tininess_before_rounding)
- || ( zExp < -1 )
- || ! increment
- || lt128(
- zSig0,
- zSig1,
- LIT64( 0x0001FFFFFFFFFFFF ),
- LIT64( 0xFFFFFFFFFFFFFFFF )
- );
+ isTiny = status->tininess_before_rounding
+ || (zExp < -1)
+ || !increment
+ || lt128(zSig0, zSig1,
+ UINT64_C(0x0001FFFFFFFFFFFF),
+ UINT64_C(0xFFFFFFFFFFFFFFFF));
shift128ExtraRightJamming(
zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
zExp = 0;
| point exponent.
*----------------------------------------------------------------------------*/
-static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
+static float128 normalizeRoundAndPackFloat128(bool zSign, int32_t zExp,
uint64_t zSig0, uint64_t zSig1,
float_status *status)
{
floatx80 int32_to_floatx80(int32_t a, float_status *status)
{
- flag zSign;
+ bool zSign;
uint32_t absA;
int8_t shiftCount;
uint64_t zSig;
float128 int32_to_float128(int32_t a, float_status *status)
{
- flag zSign;
+ bool zSign;
uint32_t absA;
int8_t shiftCount;
uint64_t zSig0;
floatx80 int64_to_floatx80(int64_t a, float_status *status)
{
- flag zSign;
+ bool zSign;
uint64_t absA;
int8_t shiftCount;
float128 int64_to_float128(int64_t a, float_status *status)
{
- flag zSign;
+ bool zSign;
uint64_t absA;
int8_t shiftCount;
int32_t zExp;
floatx80 float32_to_floatx80(float32 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int aExp;
uint32_t aSig;
aSign = extractFloat32Sign( a );
if ( aExp == 0xFF ) {
if (aSig) {
- return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
+ floatx80 res = commonNaNToFloatx80(float32ToCommonNaN(a, status),
+ status);
+ return floatx80_silence_nan(res, status);
}
return packFloatx80(aSign,
floatx80_infinity_high,
float128 float32_to_float128(float32 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int aExp;
uint32_t aSig;
float32 float32_rem(float32 a, float32 b, float_status *status)
{
- flag aSign, zSign;
+ bool aSign, zSign;
int aExp, bExp, expDiff;
uint32_t aSig, bSig;
uint32_t q;
float32 float32_exp2(float32 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int aExp;
uint32_t aSig;
float64 r, x, xn;
*----------------------------------------------------------------------------*/
float32 float32_log2(float32 a, float_status *status)
{
- flag aSign, zSign;
+ bool aSign, zSign;
int aExp;
uint32_t aSig, zSig, i;
return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
}
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise. The invalid exception is
-| raised if either operand is a NaN. Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_eq(float32 a, float32 b, float_status *status)
-{
- uint32_t av, bv;
- a = float32_squash_input_denormal(a, status);
- b = float32_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
- || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- av = float32_val(a);
- bv = float32_val(b);
- return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is less than
-| or equal to the corresponding value `b', and 0 otherwise. The invalid
-| exception is raised if either operand is a NaN. The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_le(float32 a, float32 b, float_status *status)
-{
- flag aSign, bSign;
- uint32_t av, bv;
- a = float32_squash_input_denormal(a, status);
- b = float32_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
- || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- aSign = extractFloat32Sign( a );
- bSign = extractFloat32Sign( b );
- av = float32_val(a);
- bv = float32_val(b);
- if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
- return ( av == bv ) || ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise. The invalid exception is
-| raised if either operand is a NaN. The comparison is performed according
-| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_lt(float32 a, float32 b, float_status *status)
-{
- flag aSign, bSign;
- uint32_t av, bv;
- a = float32_squash_input_denormal(a, status);
- b = float32_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
- || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- aSign = extractFloat32Sign( a );
- bSign = extractFloat32Sign( b );
- av = float32_val(a);
- bv = float32_val(b);
- if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
- return ( av != bv ) && ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise. The invalid exception is raised if either
-| operand is a NaN. The comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_unordered(float32 a, float32 b, float_status *status)
-{
- a = float32_squash_input_denormal(a, status);
- b = float32_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
- || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 1;
- }
- return 0;
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
-| exception. The comparison is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_eq_quiet(float32 a, float32 b, float_status *status)
-{
- a = float32_squash_input_denormal(a, status);
- b = float32_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
- || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
- ) {
- if (float32_is_signaling_nan(a, status)
- || float32_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- return ( float32_val(a) == float32_val(b) ) ||
- ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is less than or
-| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
-| cause an exception. Otherwise, the comparison is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_le_quiet(float32 a, float32 b, float_status *status)
-{
- flag aSign, bSign;
- uint32_t av, bv;
- a = float32_squash_input_denormal(a, status);
- b = float32_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
- || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
- ) {
- if (float32_is_signaling_nan(a, status)
- || float32_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- aSign = extractFloat32Sign( a );
- bSign = extractFloat32Sign( b );
- av = float32_val(a);
- bv = float32_val(b);
- if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
- return ( av == bv ) || ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
-| exception. Otherwise, the comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_lt_quiet(float32 a, float32 b, float_status *status)
-{
- flag aSign, bSign;
- uint32_t av, bv;
- a = float32_squash_input_denormal(a, status);
- b = float32_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
- || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
- ) {
- if (float32_is_signaling_nan(a, status)
- || float32_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- aSign = extractFloat32Sign( a );
- bSign = extractFloat32Sign( b );
- av = float32_val(a);
- bv = float32_val(b);
- if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
- return ( av != bv ) && ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the single-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
-| comparison is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float32_unordered_quiet(float32 a, float32 b, float_status *status)
-{
- a = float32_squash_input_denormal(a, status);
- b = float32_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
- || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
- ) {
- if (float32_is_signaling_nan(a, status)
- || float32_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 1;
- }
- return 0;
-}
-
-/*----------------------------------------------------------------------------
-| If `a' is denormal and we are in flush-to-zero mode then set the
-| input-denormal exception and return zero. Otherwise just return the value.
-*----------------------------------------------------------------------------*/
-float16 float16_squash_input_denormal(float16 a, float_status *status)
-{
- if (status->flush_inputs_to_zero) {
- if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
- float_raise(float_flag_input_denormal, status);
- return make_float16(float16_val(a) & 0x8000);
- }
- }
- return a;
-}
-
/*----------------------------------------------------------------------------
| Returns the result of converting the double-precision floating-point value
| `a' to the extended double-precision floating-point format. The conversion
floatx80 float64_to_floatx80(float64 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int aExp;
uint64_t aSig;
aSign = extractFloat64Sign( a );
if ( aExp == 0x7FF ) {
if (aSig) {
- return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
+ floatx80 res = commonNaNToFloatx80(float64ToCommonNaN(a, status),
+ status);
+ return floatx80_silence_nan(res, status);
}
return packFloatx80(aSign,
floatx80_infinity_high,
}
return
packFloatx80(
- aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+ aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000)) << 11);
}
float128 float64_to_float128(float64 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int aExp;
uint64_t aSig, zSig0, zSig1;
float64 float64_rem(float64 a, float64 b, float_status *status)
{
- flag aSign, zSign;
+ bool aSign, zSign;
int aExp, bExp, expDiff;
uint64_t aSig, bSig;
uint64_t q, alternateASig;
normalizeFloat64Subnormal( aSig, &aExp, &aSig );
}
expDiff = aExp - bExp;
- aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
- bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+ aSig = (aSig | UINT64_C(0x0010000000000000)) << 11;
+ bSig = (bSig | UINT64_C(0x0010000000000000)) << 11;
if ( expDiff < 0 ) {
if ( expDiff < -1 ) return a;
aSig >>= 1;
*----------------------------------------------------------------------------*/
float64 float64_log2(float64 a, float_status *status)
{
- flag aSign, zSign;
+ bool aSign, zSign;
int aExp;
uint64_t aSig, aSig0, aSig1, zSig, i;
a = float64_squash_input_denormal(a, status);
}
aExp -= 0x3FF;
- aSig |= LIT64( 0x0010000000000000 );
+ aSig |= UINT64_C(0x0010000000000000);
zSign = aExp < 0;
zSig = (uint64_t)aExp << 52;
for (i = 1LL << 51; i > 0; i >>= 1) {
mul64To128( aSig, aSig, &aSig0, &aSig1 );
aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
- if ( aSig & LIT64( 0x0020000000000000 ) ) {
+ if ( aSig & UINT64_C(0x0020000000000000) ) {
aSig >>= 1;
zSig |= i;
}
}
/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is equal to the
-| corresponding value `b', and 0 otherwise. The invalid exception is raised
-| if either operand is a NaN. Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN, the
+| largest positive integer is returned. Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
*----------------------------------------------------------------------------*/
-int float64_eq(float64 a, float64 b, float_status *status)
+int32_t floatx80_to_int32(floatx80 a, float_status *status)
{
- uint64_t av, bv;
- a = float64_squash_input_denormal(a, status);
- b = float64_squash_input_denormal(b, status);
+ bool aSign;
+ int32_t aExp, shiftCount;
+ uint64_t aSig;
- if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
- || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- av = float64_val(a);
- bv = float64_val(b);
- return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is less than or
-| equal to the corresponding value `b', and 0 otherwise. The invalid
-| exception is raised if either operand is a NaN. The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_le(float64 a, float64 b, float_status *status)
-{
- flag aSign, bSign;
- uint64_t av, bv;
- a = float64_squash_input_denormal(a, status);
- b = float64_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
- || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- aSign = extractFloat64Sign( a );
- bSign = extractFloat64Sign( b );
- av = float64_val(a);
- bv = float64_val(b);
- if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
- return ( av == bv ) || ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise. The invalid exception is
-| raised if either operand is a NaN. The comparison is performed according
-| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_lt(float64 a, float64 b, float_status *status)
-{
- flag aSign, bSign;
- uint64_t av, bv;
-
- a = float64_squash_input_denormal(a, status);
- b = float64_squash_input_denormal(b, status);
- if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
- || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- aSign = extractFloat64Sign( a );
- bSign = extractFloat64Sign( b );
- av = float64_val(a);
- bv = float64_val(b);
- if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
- return ( av != bv ) && ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise. The invalid exception is raised if either
-| operand is a NaN. The comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_unordered(float64 a, float64 b, float_status *status)
-{
- a = float64_squash_input_denormal(a, status);
- b = float64_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
- || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 1;
- }
- return 0;
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is equal to the
-| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
-| exception.The comparison is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_eq_quiet(float64 a, float64 b, float_status *status)
-{
- uint64_t av, bv;
- a = float64_squash_input_denormal(a, status);
- b = float64_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
- || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
- ) {
- if (float64_is_signaling_nan(a, status)
- || float64_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- av = float64_val(a);
- bv = float64_val(b);
- return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is less than or
-| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
-| cause an exception. Otherwise, the comparison is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_le_quiet(float64 a, float64 b, float_status *status)
-{
- flag aSign, bSign;
- uint64_t av, bv;
- a = float64_squash_input_denormal(a, status);
- b = float64_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
- || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
- ) {
- if (float64_is_signaling_nan(a, status)
- || float64_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- aSign = extractFloat64Sign( a );
- bSign = extractFloat64Sign( b );
- av = float64_val(a);
- bv = float64_val(b);
- if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
- return ( av == bv ) || ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
-| exception. Otherwise, the comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_lt_quiet(float64 a, float64 b, float_status *status)
-{
- flag aSign, bSign;
- uint64_t av, bv;
- a = float64_squash_input_denormal(a, status);
- b = float64_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
- || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
- ) {
- if (float64_is_signaling_nan(a, status)
- || float64_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- aSign = extractFloat64Sign( a );
- bSign = extractFloat64Sign( b );
- av = float64_val(a);
- bv = float64_val(b);
- if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
- return ( av != bv ) && ( aSign ^ ( av < bv ) );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the double-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
-| comparison is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float64_unordered_quiet(float64 a, float64 b, float_status *status)
-{
- a = float64_squash_input_denormal(a, status);
- b = float64_squash_input_denormal(b, status);
-
- if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
- || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
- ) {
- if (float64_is_signaling_nan(a, status)
- || float64_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 1;
- }
- return 0;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the extended double-precision floating-
-| point value `a' to the 32-bit two's complement integer format. The
-| conversion is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic---which means in particular that the conversion
-| is rounded according to the current rounding mode. If `a' is a NaN, the
-| largest positive integer is returned. Otherwise, if the conversion
-| overflows, the largest integer with the same sign as `a' is returned.
-*----------------------------------------------------------------------------*/
-
-int32_t floatx80_to_int32(floatx80 a, float_status *status)
-{
- flag aSign;
- int32_t aExp, shiftCount;
- uint64_t aSig;
-
- if (floatx80_invalid_encoding(a)) {
+ if (floatx80_invalid_encoding(a)) {
float_raise(float_flag_invalid, status);
return 1 << 31;
}
int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp, shiftCount;
uint64_t aSig, savedASig;
int32_t z;
int64_t floatx80_to_int64(floatx80 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp, shiftCount;
uint64_t aSig, aSigExtra;
if ( shiftCount ) {
float_raise(float_flag_invalid, status);
if (!aSign || floatx80_is_any_nan(a)) {
- return LIT64( 0x7FFFFFFFFFFFFFFF );
+ return INT64_MAX;
}
- return (int64_t) LIT64( 0x8000000000000000 );
+ return INT64_MIN;
}
aSigExtra = 0;
}
int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp, shiftCount;
uint64_t aSig;
int64_t z;
aSign = extractFloatx80Sign( a );
shiftCount = aExp - 0x403E;
if ( 0 <= shiftCount ) {
- aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
+ aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
if ( ( a.high != 0xC03E ) || aSig ) {
float_raise(float_flag_invalid, status);
if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
- return LIT64( 0x7FFFFFFFFFFFFFFF );
+ return INT64_MAX;
}
}
- return (int64_t) LIT64( 0x8000000000000000 );
+ return INT64_MIN;
}
else if ( aExp < 0x3FFF ) {
if (aExp | aSig) {
float32 floatx80_to_float32(floatx80 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp;
uint64_t aSig;
aSign = extractFloatx80Sign( a );
if ( aExp == 0x7FFF ) {
if ( (uint64_t) ( aSig<<1 ) ) {
- return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
+ float32 res = commonNaNToFloat32(floatx80ToCommonNaN(a, status),
+ status);
+ return float32_silence_nan(res, status);
}
return packFloat32( aSign, 0xFF, 0 );
}
float64 floatx80_to_float64(floatx80 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp;
uint64_t aSig, zSig;
aSign = extractFloatx80Sign( a );
if ( aExp == 0x7FFF ) {
if ( (uint64_t) ( aSig<<1 ) ) {
- return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
+ float64 res = commonNaNToFloat64(floatx80ToCommonNaN(a, status),
+ status);
+ return float64_silence_nan(res, status);
}
return packFloat64( aSign, 0x7FF, 0 );
}
float128 floatx80_to_float128(floatx80 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int aExp;
uint64_t aSig, zSig0, zSig1;
aExp = extractFloatx80Exp( a );
aSign = extractFloatx80Sign( a );
if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
- return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
+ float128 res = commonNaNToFloat128(floatx80ToCommonNaN(a, status),
+ status);
+ return float128_silence_nan(res, status);
}
shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
return packFloat128( aSign, aExp, zSig0, zSig1 );
floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp;
uint64_t lastBitMask, roundBitsMask;
floatx80 z;
}
if ( aExp < 0x3FFF ) {
if ( ( aExp == 0 )
- && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+ && ( (uint64_t) ( extractFloatx80Frac( a ) ) == 0 ) ) {
return a;
}
status->float_exception_flags |= float_flag_inexact;
if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
) {
return
- packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
+ packFloatx80( aSign, 0x3FFF, UINT64_C(0x8000000000000000));
}
break;
case float_round_ties_away:
if (aExp == 0x3FFE) {
- return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
+ return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
}
break;
case float_round_down:
return
aSign ?
- packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
+ packFloatx80( 1, 0x3FFF, UINT64_C(0x8000000000000000))
: packFloatx80( 0, 0, 0 );
case float_round_up:
return
aSign ? packFloatx80( 1, 0, 0 )
- : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
+ : packFloatx80( 0, 0x3FFF, UINT64_C(0x8000000000000000));
+
+ case float_round_to_zero:
+ break;
+ default:
+ g_assert_not_reached();
}
return packFloatx80( aSign, 0, 0 );
}
z.low &= ~ roundBitsMask;
if ( z.low == 0 ) {
++z.high;
- z.low = LIT64( 0x8000000000000000 );
+ z.low = UINT64_C(0x8000000000000000);
}
if (z.low != a.low) {
status->float_exception_flags |= float_flag_inexact;
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
+static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
float_status *status)
{
int32_t aExp, bExp, zExp;
zSig1 = 0;
zSig0 = aSig + bSig;
if ( aExp == 0 ) {
+ if ((aSig | bSig) & UINT64_C(0x8000000000000000) && zSig0 < aSig) {
+ /* At least one of the values is a pseudo-denormal,
+ * and there is a carry out of the result. */
+ zExp = 1;
+ goto shiftRight1;
+ }
+ if (zSig0 == 0) {
+ return packFloatx80(zSign, 0, 0);
+ }
normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
goto roundAndPack;
}
if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
shiftRight1:
shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
- zSig0 |= LIT64( 0x8000000000000000 );
+ zSig0 |= UINT64_C(0x8000000000000000);
++zExp;
roundAndPack:
return roundAndPackFloatx80(status->floatx80_rounding_precision,
| Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
+static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, bool zSign,
float_status *status)
{
int32_t aExp, bExp, zExp;
floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
{
- flag aSign, bSign;
+ bool aSign, bSign;
if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
float_raise(float_flag_invalid, status);
floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
{
- flag aSign, bSign;
+ bool aSign, bSign;
if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
float_raise(float_flag_invalid, status);
floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
{
- flag aSign, bSign, zSign;
+ bool aSign, bSign, zSign;
int32_t aExp, bExp, zExp;
uint64_t aSig, bSig, zSig0, zSig1;
floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
{
- flag aSign, bSign, zSign;
+ bool aSign, bSign, zSign;
int32_t aExp, bExp, zExp;
uint64_t aSig, bSig, zSig0, zSig1;
uint64_t rem0, rem1, rem2, term0, term1, term2;
floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
{
- flag aSign, zSign;
+ bool aSign, zSign;
int32_t aExp, bExp, expDiff;
uint64_t aSig0, aSig1, bSig;
uint64_t q, term0, term1, alternateASig0, alternateASig1;
if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
}
- bSig |= LIT64( 0x8000000000000000 );
+ bSig |= UINT64_C(0x8000000000000000);
zSign = aSign;
expDiff = aExp - bExp;
aSig1 = 0;
floatx80 floatx80_sqrt(floatx80 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp, zExp;
uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
}
zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
- if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
+ if ( ( zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF) ) <= 5 ) {
if ( zSig1 == 0 ) zSig1 = 1;
mul64To128( doubleZSig0, zSig1, &term1, &term2 );
sub128( rem1, 0, term1, term2, &rem1, &rem2 );
0, zExp, zSig0, zSig1, status);
}
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is equal
-| to the corresponding value `b', and 0 otherwise. The invalid exception is
-| raised if either operand is a NaN. Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
-{
-
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
- || (extractFloatx80Exp(a) == 0x7FFF
- && (uint64_t) (extractFloatx80Frac(a) << 1))
- || (extractFloatx80Exp(b) == 0x7FFF
- && (uint64_t) (extractFloatx80Frac(b) << 1))
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- return
- ( a.low == b.low )
- && ( ( a.high == b.high )
- || ( ( a.low == 0 )
- && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
- );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is
-| less than or equal to the corresponding value `b', and 0 otherwise. The
-| invalid exception is raised if either operand is a NaN. The comparison is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_le(floatx80 a, floatx80 b, float_status *status)
-{
- flag aSign, bSign;
-
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
- || (extractFloatx80Exp(a) == 0x7FFF
- && (uint64_t) (extractFloatx80Frac(a) << 1))
- || (extractFloatx80Exp(b) == 0x7FFF
- && (uint64_t) (extractFloatx80Frac(b) << 1))
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- aSign = extractFloatx80Sign( a );
- bSign = extractFloatx80Sign( b );
- if ( aSign != bSign ) {
- return
- aSign
- || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
- == 0 );
- }
- return
- aSign ? le128( b.high, b.low, a.high, a.low )
- : le128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is
-| less than the corresponding value `b', and 0 otherwise. The invalid
-| exception is raised if either operand is a NaN. The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
-{
- flag aSign, bSign;
-
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
- || (extractFloatx80Exp(a) == 0x7FFF
- && (uint64_t) (extractFloatx80Frac(a) << 1))
- || (extractFloatx80Exp(b) == 0x7FFF
- && (uint64_t) (extractFloatx80Frac(b) << 1))
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- aSign = extractFloatx80Sign( a );
- bSign = extractFloatx80Sign( b );
- if ( aSign != bSign ) {
- return
- aSign
- && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
- != 0 );
- }
- return
- aSign ? lt128( b.high, b.low, a.high, a.low )
- : lt128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point values `a' and `b'
-| cannot be compared, and 0 otherwise. The invalid exception is raised if
-| either operand is a NaN. The comparison is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
-{
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
- || (extractFloatx80Exp(a) == 0x7FFF
- && (uint64_t) (extractFloatx80Frac(a) << 1))
- || (extractFloatx80Exp(b) == 0x7FFF
- && (uint64_t) (extractFloatx80Frac(b) << 1))
- ) {
- float_raise(float_flag_invalid, status);
- return 1;
- }
- return 0;
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is
-| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
-| cause an exception. The comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
-{
-
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
- && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
- || ( ( extractFloatx80Exp( b ) == 0x7FFF )
- && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
- ) {
- if (floatx80_is_signaling_nan(a, status)
- || floatx80_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- return
- ( a.low == b.low )
- && ( ( a.high == b.high )
- || ( ( a.low == 0 )
- && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
- );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is less
-| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
-| do not cause an exception. Otherwise, the comparison is performed according
-| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
-{
- flag aSign, bSign;
-
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
- && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
- || ( ( extractFloatx80Exp( b ) == 0x7FFF )
- && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
- ) {
- if (floatx80_is_signaling_nan(a, status)
- || floatx80_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- aSign = extractFloatx80Sign( a );
- bSign = extractFloatx80Sign( b );
- if ( aSign != bSign ) {
- return
- aSign
- || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
- == 0 );
- }
- return
- aSign ? le128( b.high, b.low, a.high, a.low )
- : le128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point value `a' is less
-| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
-| an exception. Otherwise, the comparison is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
-{
- flag aSign, bSign;
-
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
- && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
- || ( ( extractFloatx80Exp( b ) == 0x7FFF )
- && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
- ) {
- if (floatx80_is_signaling_nan(a, status)
- || floatx80_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- aSign = extractFloatx80Sign( a );
- bSign = extractFloatx80Sign( b );
- if ( aSign != bSign ) {
- return
- aSign
- && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
- != 0 );
- }
- return
- aSign ? lt128( b.high, b.low, a.high, a.low )
- : lt128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the extended double-precision floating-point values `a' and `b'
-| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
-| The comparison is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
-{
- if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
- float_raise(float_flag_invalid, status);
- return 1;
- }
- if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
- && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
- || ( ( extractFloatx80Exp( b ) == 0x7FFF )
- && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
- ) {
- if (floatx80_is_signaling_nan(a, status)
- || floatx80_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 1;
- }
- return 0;
-}
-
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point
| value `a' to the 32-bit two's complement integer format. The conversion
int32_t float128_to_int32(float128 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp, shiftCount;
uint64_t aSig0, aSig1;
aExp = extractFloat128Exp( a );
aSign = extractFloat128Sign( a );
if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
- if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
aSig0 |= ( aSig1 != 0 );
shiftCount = 0x4028 - aExp;
if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp, shiftCount;
uint64_t aSig0, aSig1, savedASig;
int32_t z;
}
return 0;
}
- aSig0 |= LIT64( 0x0001000000000000 );
+ aSig0 |= UINT64_C(0x0001000000000000);
shiftCount = 0x402F - aExp;
savedASig = aSig0;
aSig0 >>= shiftCount;
if ( ( z < 0 ) ^ aSign ) {
invalid:
float_raise(float_flag_invalid, status);
- return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
+ return aSign ? INT32_MIN : INT32_MAX;
}
if ( ( aSig0<<shiftCount ) != savedASig ) {
status->float_exception_flags |= float_flag_inexact;
int64_t float128_to_int64(float128 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp, shiftCount;
uint64_t aSig0, aSig1;
aSig0 = extractFloat128Frac0( a );
aExp = extractFloat128Exp( a );
aSign = extractFloat128Sign( a );
- if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
shiftCount = 0x402F - aExp;
if ( shiftCount <= 0 ) {
if ( 0x403E < aExp ) {
float_raise(float_flag_invalid, status);
if ( ! aSign
|| ( ( aExp == 0x7FFF )
- && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
+ && ( aSig1 || ( aSig0 != UINT64_C(0x0001000000000000) ) )
)
) {
- return LIT64( 0x7FFFFFFFFFFFFFFF );
+ return INT64_MAX;
}
- return (int64_t) LIT64( 0x8000000000000000 );
+ return INT64_MIN;
}
shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
}
int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp, shiftCount;
uint64_t aSig0, aSig1;
int64_t z;
aSig0 = extractFloat128Frac0( a );
aExp = extractFloat128Exp( a );
aSign = extractFloat128Sign( a );
- if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ if ( aExp ) aSig0 |= UINT64_C(0x0001000000000000);
shiftCount = aExp - 0x402F;
if ( 0 < shiftCount ) {
if ( 0x403E <= aExp ) {
- aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
- if ( ( a.high == LIT64( 0xC03E000000000000 ) )
- && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
+ aSig0 &= UINT64_C(0x0000FFFFFFFFFFFF);
+ if ( ( a.high == UINT64_C(0xC03E000000000000) )
+ && ( aSig1 < UINT64_C(0x0002000000000000) ) ) {
if (aSig1) {
status->float_exception_flags |= float_flag_inexact;
}
else {
float_raise(float_flag_invalid, status);
if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
- return LIT64( 0x7FFFFFFFFFFFFFFF );
+ return INT64_MAX;
}
}
- return (int64_t) LIT64( 0x8000000000000000 );
+ return INT64_MIN;
}
z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
if ( (uint64_t) ( aSig1<<shiftCount ) ) {
uint64_t float128_to_uint64(float128 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int aExp;
int shiftCount;
uint64_t aSig0, aSig1;
if (aSign && (aExp > 0x3FFE)) {
float_raise(float_flag_invalid, status);
if (float128_is_any_nan(a)) {
- return LIT64(0xFFFFFFFFFFFFFFFF);
+ return UINT64_MAX;
} else {
return 0;
}
}
if (aExp) {
- aSig0 |= LIT64(0x0001000000000000);
+ aSig0 |= UINT64_C(0x0001000000000000);
}
shiftCount = 0x402F - aExp;
if (shiftCount <= 0) {
if (0x403E < aExp) {
float_raise(float_flag_invalid, status);
- return LIT64(0xFFFFFFFFFFFFFFFF);
+ return UINT64_MAX;
}
shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
} else {
float32 float128_to_float32(float128 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp;
uint64_t aSig0, aSig1;
uint32_t zSig;
float64 float128_to_float64(float128 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp;
uint64_t aSig0, aSig1;
shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
aSig0 |= ( aSig1 != 0 );
if ( aExp || aSig0 ) {
- aSig0 |= LIT64( 0x4000000000000000 );
+ aSig0 |= UINT64_C(0x4000000000000000);
aExp -= 0x3C01;
}
return roundAndPackFloat64(aSign, aExp, aSig0, status);
floatx80 float128_to_floatx80(float128 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp;
uint64_t aSig0, aSig1;
aSign = extractFloat128Sign( a );
if ( aExp == 0x7FFF ) {
if ( aSig0 | aSig1 ) {
- return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
+ floatx80 res = commonNaNToFloatx80(float128ToCommonNaN(a, status),
+ status);
+ return floatx80_silence_nan(res, status);
}
return packFloatx80(aSign, floatx80_infinity_high,
floatx80_infinity_low);
normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
}
else {
- aSig0 |= LIT64( 0x0001000000000000 );
+ aSig0 |= UINT64_C(0x0001000000000000);
}
shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
float128 float128_round_to_int(float128 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp;
uint64_t lastBitMask, roundBitsMask;
float128 z;
case float_round_to_odd:
return packFloat128(aSign, 0x3FFF, 0, 0);
+
+ case float_round_to_zero:
+ break;
}
return packFloat128( aSign, 0, 0, 0 );
}
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
+static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
float_status *status)
{
int32_t aExp, bExp, zExp;
--expDiff;
}
else {
- bSig0 |= LIT64( 0x0001000000000000 );
+ bSig0 |= UINT64_C(0x0001000000000000);
}
shift128ExtraRightJamming(
bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
++expDiff;
}
else {
- aSig0 |= LIT64( 0x0001000000000000 );
+ aSig0 |= UINT64_C(0x0001000000000000);
}
shift128ExtraRightJamming(
aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
return packFloat128( zSign, 0, zSig0, zSig1 );
}
zSig2 = 0;
- zSig0 |= LIT64( 0x0002000000000000 );
+ zSig0 |= UINT64_C(0x0002000000000000);
zExp = aExp;
goto shiftRight1;
}
- aSig0 |= LIT64( 0x0001000000000000 );
+ aSig0 |= UINT64_C(0x0001000000000000);
add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
--zExp;
- if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
+ if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
++zExp;
shiftRight1:
shift128ExtraRightJamming(
| Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
-static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
+static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
float_status *status)
{
int32_t aExp, bExp, zExp;
++expDiff;
}
else {
- aSig0 |= LIT64( 0x4000000000000000 );
+ aSig0 |= UINT64_C(0x4000000000000000);
}
shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
- bSig0 |= LIT64( 0x4000000000000000 );
+ bSig0 |= UINT64_C(0x4000000000000000);
bBigger:
sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
zExp = bExp;
--expDiff;
}
else {
- bSig0 |= LIT64( 0x4000000000000000 );
+ bSig0 |= UINT64_C(0x4000000000000000);
}
shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
- aSig0 |= LIT64( 0x4000000000000000 );
+ aSig0 |= UINT64_C(0x4000000000000000);
aBigger:
sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
zExp = aExp;
float128 float128_add(float128 a, float128 b, float_status *status)
{
- flag aSign, bSign;
+ bool aSign, bSign;
aSign = extractFloat128Sign( a );
bSign = extractFloat128Sign( b );
float128 float128_sub(float128 a, float128 b, float_status *status)
{
- flag aSign, bSign;
+ bool aSign, bSign;
aSign = extractFloat128Sign( a );
bSign = extractFloat128Sign( b );
float128 float128_mul(float128 a, float128 b, float_status *status)
{
- flag aSign, bSign, zSign;
+ bool aSign, bSign, zSign;
int32_t aExp, bExp, zExp;
uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
}
zExp = aExp + bExp - 0x4000;
- aSig0 |= LIT64( 0x0001000000000000 );
+ aSig0 |= UINT64_C(0x0001000000000000);
shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
zSig2 |= ( zSig3 != 0 );
- if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
+ if (UINT64_C( 0x0002000000000000) <= zSig0 ) {
shift128ExtraRightJamming(
zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
++zExp;
float128 float128_div(float128 a, float128 b, float_status *status)
{
- flag aSign, bSign, zSign;
+ bool aSign, bSign, zSign;
int32_t aExp, bExp, zExp;
uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
}
zExp = aExp - bExp + 0x3FFD;
shortShift128Left(
- aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
+ aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1 );
shortShift128Left(
- bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+ bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
++zExp;
float128 float128_rem(float128 a, float128 b, float_status *status)
{
- flag aSign, zSign;
+ bool aSign, zSign;
int32_t aExp, bExp, expDiff;
uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
expDiff = aExp - bExp;
if ( expDiff < -1 ) return a;
shortShift128Left(
- aSig0 | LIT64( 0x0001000000000000 ),
+ aSig0 | UINT64_C(0x0001000000000000),
aSig1,
15 - ( expDiff < 0 ),
&aSig0,
&aSig1
);
shortShift128Left(
- bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+ bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1 );
q = le128( bSig0, bSig1, aSig0, aSig1 );
if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
expDiff -= 64;
float128 float128_sqrt(float128 a, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp, zExp;
uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
}
zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
- aSig0 |= LIT64( 0x0001000000000000 );
+ aSig0 |= UINT64_C(0x0001000000000000);
zSig0 = estimateSqrt32( aExp, aSig0>>17 );
shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
}
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise. The invalid exception is
-| raised if either operand is a NaN. Otherwise, the comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_eq(float128 a, float128 b, float_status *status)
-{
-
- if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
- && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
- || ( ( extractFloat128Exp( b ) == 0x7FFF )
- && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- return
- ( a.low == b.low )
- && ( ( a.high == b.high )
- || ( ( a.low == 0 )
- && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
- );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is less than
-| or equal to the corresponding value `b', and 0 otherwise. The invalid
-| exception is raised if either operand is a NaN. The comparison is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_le(float128 a, float128 b, float_status *status)
-{
- flag aSign, bSign;
-
- if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
- && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
- || ( ( extractFloat128Exp( b ) == 0x7FFF )
- && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- aSign = extractFloat128Sign( a );
- bSign = extractFloat128Sign( b );
- if ( aSign != bSign ) {
- return
- aSign
- || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
- == 0 );
- }
- return
- aSign ? le128( b.high, b.low, a.high, a.low )
- : le128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise. The invalid exception is
-| raised if either operand is a NaN. The comparison is performed according
-| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_lt(float128 a, float128 b, float_status *status)
+static inline FloatRelation
+floatx80_compare_internal(floatx80 a, floatx80 b, bool is_quiet,
+ float_status *status)
{
- flag aSign, bSign;
-
- if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
- && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
- || ( ( extractFloat128Exp( b ) == 0x7FFF )
- && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 0;
- }
- aSign = extractFloat128Sign( a );
- bSign = extractFloat128Sign( b );
- if ( aSign != bSign ) {
- return
- aSign
- && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
- != 0 );
- }
- return
- aSign ? lt128( b.high, b.low, a.high, a.low )
- : lt128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise. The invalid exception is raised if either
-| operand is a NaN. The comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_unordered(float128 a, float128 b, float_status *status)
-{
- if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
- && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
- || ( ( extractFloat128Exp( b ) == 0x7FFF )
- && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
- ) {
- float_raise(float_flag_invalid, status);
- return 1;
- }
- return 0;
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is equal to
-| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
-| exception. The comparison is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_eq_quiet(float128 a, float128 b, float_status *status)
-{
-
- if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
- && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
- || ( ( extractFloat128Exp( b ) == 0x7FFF )
- && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
- ) {
- if (float128_is_signaling_nan(a, status)
- || float128_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- return
- ( a.low == b.low )
- && ( ( a.high == b.high )
- || ( ( a.low == 0 )
- && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
- );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is less than
-| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
-| cause an exception. Otherwise, the comparison is performed according to the
-| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_le_quiet(float128 a, float128 b, float_status *status)
-{
- flag aSign, bSign;
-
- if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
- && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
- || ( ( extractFloat128Exp( b ) == 0x7FFF )
- && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
- ) {
- if (float128_is_signaling_nan(a, status)
- || float128_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- aSign = extractFloat128Sign( a );
- bSign = extractFloat128Sign( b );
- if ( aSign != bSign ) {
- return
- aSign
- || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
- == 0 );
- }
- return
- aSign ? le128( b.high, b.low, a.high, a.low )
- : le128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point value `a' is less than
-| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
-| exception. Otherwise, the comparison is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_lt_quiet(float128 a, float128 b, float_status *status)
-{
- flag aSign, bSign;
-
- if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
- && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
- || ( ( extractFloat128Exp( b ) == 0x7FFF )
- && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
- ) {
- if (float128_is_signaling_nan(a, status)
- || float128_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 0;
- }
- aSign = extractFloat128Sign( a );
- bSign = extractFloat128Sign( b );
- if ( aSign != bSign ) {
- return
- aSign
- && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
- != 0 );
- }
- return
- aSign ? lt128( b.high, b.low, a.high, a.low )
- : lt128( a.high, a.low, b.high, b.low );
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
-| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
-| comparison is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-int float128_unordered_quiet(float128 a, float128 b, float_status *status)
-{
- if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
- && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
- || ( ( extractFloat128Exp( b ) == 0x7FFF )
- && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
- ) {
- if (float128_is_signaling_nan(a, status)
- || float128_is_signaling_nan(b, status)) {
- float_raise(float_flag_invalid, status);
- }
- return 1;
- }
- return 0;
-}
-
-static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
- int is_quiet, float_status *status)
-{
- flag aSign, bSign;
+ bool aSign, bSign;
if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
float_raise(float_flag_invalid, status);
return 1 - (2 * aSign);
}
} else {
+ /* Normalize pseudo-denormals before comparison. */
+ if ((a.high & 0x7fff) == 0 && a.low & UINT64_C(0x8000000000000000)) {
+ ++a.high;
+ }
+ if ((b.high & 0x7fff) == 0 && b.low & UINT64_C(0x8000000000000000)) {
+ ++b.high;
+ }
if (a.low == b.low && a.high == b.high) {
return float_relation_equal;
} else {
}
}
-int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
+FloatRelation floatx80_compare(floatx80 a, floatx80 b, float_status *status)
{
return floatx80_compare_internal(a, b, 0, status);
}
-int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
+FloatRelation floatx80_compare_quiet(floatx80 a, floatx80 b,
+ float_status *status)
{
return floatx80_compare_internal(a, b, 1, status);
}
-static inline int float128_compare_internal(float128 a, float128 b,
- int is_quiet, float_status *status)
+static inline FloatRelation
+float128_compare_internal(float128 a, float128 b, bool is_quiet,
+ float_status *status)
{
- flag aSign, bSign;
+ bool aSign, bSign;
if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
}
}
-int float128_compare(float128 a, float128 b, float_status *status)
+FloatRelation float128_compare(float128 a, float128 b, float_status *status)
{
return float128_compare_internal(a, b, 0, status);
}
-int float128_compare_quiet(float128 a, float128 b, float_status *status)
+FloatRelation float128_compare_quiet(float128 a, float128 b,
+ float_status *status)
{
return float128_compare_internal(a, b, 1, status);
}
floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp;
uint64_t aSig;
float128 float128_scalbn(float128 a, int n, float_status *status)
{
- flag aSign;
+ bool aSign;
int32_t aExp;
uint64_t aSig0, aSig1;
return a;
}
if (aExp != 0) {
- aSig0 |= LIT64( 0x0001000000000000 );
+ aSig0 |= UINT64_C(0x0001000000000000);
} else if (aSig0 == 0 && aSig1 == 0) {
return a;
} else {