]> Git Repo - qemu.git/blame - fpu/softfloat.c
target/arm: Fix aa64 FCADD and FCMLA decode
[qemu.git] / fpu / softfloat.c
CommitLineData
8d725fac
AF
1/*
2 * QEMU float support
3 *
16017c48
PM
4 * The code in this source file is derived from release 2a of the SoftFloat
5 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6 * some later contributions) are provided under that license, as detailed below.
7 * It has subsequently been modified by contributors to the QEMU Project,
8 * so some portions are provided under:
9 * the SoftFloat-2a license
10 * the BSD license
11 * GPL-v2-or-later
12 *
13 * Any future contributions to this file after December 1st 2014 will be
14 * taken to be licensed under the Softfloat-2a license unless specifically
15 * indicated otherwise.
8d725fac 16 */
158142c2 17
a7d1ac78
PM
18/*
19===============================================================================
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
158142c2
FB
22
23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
a7d1ac78 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
158142c2
FB
31arithmetic/SoftFloat.html'.
32
a7d1ac78
PM
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
158142c2
FB
38
39Derivative works are acceptable, even for commercial purposes, so long as
a7d1ac78
PM
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
158142c2 43
a7d1ac78
PM
44===============================================================================
45*/
158142c2 46
16017c48
PM
47/* BSD licensing:
48 * Copyright (c) 2006, Fabrice Bellard
49 * All rights reserved.
50 *
51 * Redistribution and use in source and binary forms, with or without
52 * modification, are permitted provided that the following conditions are met:
53 *
54 * 1. Redistributions of source code must retain the above copyright notice,
55 * this list of conditions and the following disclaimer.
56 *
57 * 2. Redistributions in binary form must reproduce the above copyright notice,
58 * this list of conditions and the following disclaimer in the documentation
59 * and/or other materials provided with the distribution.
60 *
61 * 3. Neither the name of the copyright holder nor the names of its contributors
62 * may be used to endorse or promote products derived from this software without
63 * specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75 * THE POSSIBILITY OF SUCH DAMAGE.
76 */
77
78/* Portions of this work are licensed under the terms of the GNU GPL,
79 * version 2 or later. See the COPYING file in the top-level directory.
80 */
81
2ac8bd03
PM
82/* softfloat (and in particular the code in softfloat-specialize.h) is
83 * target-dependent and needs the TARGET_* macros.
84 */
d38ea87a 85#include "qemu/osdep.h"
6fff2167 86#include "qemu/bitops.h"
6b4c305c 87#include "fpu/softfloat.h"
158142c2 88
dc355b76 89/* We only need stdlib for abort() */
dc355b76 90
158142c2
FB
91/*----------------------------------------------------------------------------
92| Primitive arithmetic functions, including multi-word arithmetic, and
93| division and square root approximations. (Can be specialized to target if
94| desired.)
95*----------------------------------------------------------------------------*/
88857aca 96#include "fpu/softfloat-macros.h"
158142c2 97
bb4d4bb3
PM
98/*----------------------------------------------------------------------------
99| Returns the fraction bits of the half-precision floating-point value `a'.
100*----------------------------------------------------------------------------*/
101
a49db98d 102static inline uint32_t extractFloat16Frac(float16 a)
bb4d4bb3
PM
103{
104 return float16_val(a) & 0x3ff;
105}
106
107/*----------------------------------------------------------------------------
108| Returns the exponent bits of the half-precision floating-point value `a'.
109*----------------------------------------------------------------------------*/
110
0c48262d 111static inline int extractFloat16Exp(float16 a)
bb4d4bb3
PM
112{
113 return (float16_val(a) >> 10) & 0x1f;
114}
115
d97544c9
AB
116/*----------------------------------------------------------------------------
117| Returns the fraction bits of the single-precision floating-point value `a'.
118*----------------------------------------------------------------------------*/
119
120static inline uint32_t extractFloat32Frac(float32 a)
121{
122 return float32_val(a) & 0x007FFFFF;
123}
124
125/*----------------------------------------------------------------------------
126| Returns the exponent bits of the single-precision floating-point value `a'.
127*----------------------------------------------------------------------------*/
128
129static inline int extractFloat32Exp(float32 a)
130{
131 return (float32_val(a) >> 23) & 0xFF;
132}
133
134/*----------------------------------------------------------------------------
135| Returns the sign bit of the single-precision floating-point value `a'.
136*----------------------------------------------------------------------------*/
137
138static inline flag extractFloat32Sign(float32 a)
139{
140 return float32_val(a) >> 31;
141}
142
143/*----------------------------------------------------------------------------
144| Returns the fraction bits of the double-precision floating-point value `a'.
145*----------------------------------------------------------------------------*/
146
147static inline uint64_t extractFloat64Frac(float64 a)
148{
149 return float64_val(a) & LIT64(0x000FFFFFFFFFFFFF);
150}
151
152/*----------------------------------------------------------------------------
153| Returns the exponent bits of the double-precision floating-point value `a'.
154*----------------------------------------------------------------------------*/
155
156static inline int extractFloat64Exp(float64 a)
157{
158 return (float64_val(a) >> 52) & 0x7FF;
159}
160
161/*----------------------------------------------------------------------------
162| Returns the sign bit of the double-precision floating-point value `a'.
163*----------------------------------------------------------------------------*/
164
165static inline flag extractFloat64Sign(float64 a)
166{
167 return float64_val(a) >> 63;
168}
169
a90119b5
AB
170/*
171 * Classify a floating point number. Everything above float_class_qnan
172 * is a NaN so cls >= float_class_qnan is any NaN.
173 */
174
175typedef enum __attribute__ ((__packed__)) {
176 float_class_unclassified,
177 float_class_zero,
178 float_class_normal,
179 float_class_inf,
180 float_class_qnan, /* all NaNs from here */
181 float_class_snan,
a90119b5
AB
182} FloatClass;
183
247d1f21
RH
184/* Simple helpers for checking if, or what kind of, NaN we have */
185static inline __attribute__((unused)) bool is_nan(FloatClass c)
186{
187 return unlikely(c >= float_class_qnan);
188}
189
190static inline __attribute__((unused)) bool is_snan(FloatClass c)
191{
192 return c == float_class_snan;
193}
194
195static inline __attribute__((unused)) bool is_qnan(FloatClass c)
196{
197 return c == float_class_qnan;
198}
199
a90119b5
AB
200/*
201 * Structure holding all of the decomposed parts of a float. The
202 * exponent is unbiased and the fraction is normalized. All
203 * calculations are done with a 64 bit fraction and then rounded as
204 * appropriate for the final format.
205 *
206 * Thanks to the packed FloatClass a decent compiler should be able to
207 * fit the whole structure into registers and avoid using the stack
208 * for parameter passing.
209 */
210
211typedef struct {
212 uint64_t frac;
213 int32_t exp;
214 FloatClass cls;
215 bool sign;
216} FloatParts;
217
218#define DECOMPOSED_BINARY_POINT (64 - 2)
219#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
220#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
221
222/* Structure holding all of the relevant parameters for a format.
223 * exp_size: the size of the exponent field
224 * exp_bias: the offset applied to the exponent field
225 * exp_max: the maximum normalised exponent
226 * frac_size: the size of the fraction field
227 * frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
228 * The following are computed based the size of fraction
229 * frac_lsb: least significant bit of fraction
ca3a3d5a 230 * frac_lsbm1: the bit below the least significant bit (for rounding)
a90119b5 231 * round_mask/roundeven_mask: masks used for rounding
ca3a3d5a
AB
232 * The following optional modifiers are available:
233 * arm_althp: handle ARM Alternative Half Precision
a90119b5
AB
234 */
235typedef struct {
236 int exp_size;
237 int exp_bias;
238 int exp_max;
239 int frac_size;
240 int frac_shift;
241 uint64_t frac_lsb;
242 uint64_t frac_lsbm1;
243 uint64_t round_mask;
244 uint64_t roundeven_mask;
ca3a3d5a 245 bool arm_althp;
a90119b5
AB
246} FloatFmt;
247
248/* Expand fields based on the size of exponent and fraction */
249#define FLOAT_PARAMS(E, F) \
250 .exp_size = E, \
251 .exp_bias = ((1 << E) - 1) >> 1, \
252 .exp_max = (1 << E) - 1, \
253 .frac_size = F, \
254 .frac_shift = DECOMPOSED_BINARY_POINT - F, \
255 .frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
256 .frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
257 .round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
258 .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
259
260static const FloatFmt float16_params = {
261 FLOAT_PARAMS(5, 10)
262};
263
6fed16b2
AB
264static const FloatFmt float16_params_ahp = {
265 FLOAT_PARAMS(5, 10),
266 .arm_althp = true
267};
268
a90119b5
AB
269static const FloatFmt float32_params = {
270 FLOAT_PARAMS(8, 23)
271};
272
273static const FloatFmt float64_params = {
274 FLOAT_PARAMS(11, 52)
275};
276
6fff2167
AB
277/* Unpack a float to parts, but do not canonicalize. */
278static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
279{
280 const int sign_pos = fmt.frac_size + fmt.exp_size;
281
282 return (FloatParts) {
283 .cls = float_class_unclassified,
284 .sign = extract64(raw, sign_pos, 1),
285 .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
286 .frac = extract64(raw, 0, fmt.frac_size),
287 };
288}
289
290static inline FloatParts float16_unpack_raw(float16 f)
291{
292 return unpack_raw(float16_params, f);
293}
294
295static inline FloatParts float32_unpack_raw(float32 f)
296{
297 return unpack_raw(float32_params, f);
298}
299
300static inline FloatParts float64_unpack_raw(float64 f)
301{
302 return unpack_raw(float64_params, f);
303}
304
305/* Pack a float from parts, but do not canonicalize. */
306static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
307{
308 const int sign_pos = fmt.frac_size + fmt.exp_size;
309 uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
310 return deposit64(ret, sign_pos, 1, p.sign);
311}
312
313static inline float16 float16_pack_raw(FloatParts p)
314{
315 return make_float16(pack_raw(float16_params, p));
316}
317
318static inline float32 float32_pack_raw(FloatParts p)
319{
320 return make_float32(pack_raw(float32_params, p));
321}
322
323static inline float64 float64_pack_raw(FloatParts p)
324{
325 return make_float64(pack_raw(float64_params, p));
326}
327
0664335a
RH
328/*----------------------------------------------------------------------------
329| Functions and definitions to determine: (1) whether tininess for underflow
330| is detected before or after rounding by default, (2) what (if anything)
331| happens when exceptions are raised, (3) how signaling NaNs are distinguished
332| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
333| are propagated from function inputs to output. These details are target-
334| specific.
335*----------------------------------------------------------------------------*/
336#include "softfloat-specialize.h"
337
6fff2167
AB
338/* Canonicalize EXP and FRAC, setting CLS. */
339static FloatParts canonicalize(FloatParts part, const FloatFmt *parm,
340 float_status *status)
341{
ca3a3d5a 342 if (part.exp == parm->exp_max && !parm->arm_althp) {
6fff2167
AB
343 if (part.frac == 0) {
344 part.cls = float_class_inf;
345 } else {
94933df0 346 part.frac <<= parm->frac_shift;
298b468e
RH
347 part.cls = (parts_is_snan_frac(part.frac, status)
348 ? float_class_snan : float_class_qnan);
6fff2167
AB
349 }
350 } else if (part.exp == 0) {
351 if (likely(part.frac == 0)) {
352 part.cls = float_class_zero;
353 } else if (status->flush_inputs_to_zero) {
354 float_raise(float_flag_input_denormal, status);
355 part.cls = float_class_zero;
356 part.frac = 0;
357 } else {
358 int shift = clz64(part.frac) - 1;
359 part.cls = float_class_normal;
360 part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
361 part.frac <<= shift;
362 }
363 } else {
364 part.cls = float_class_normal;
365 part.exp -= parm->exp_bias;
366 part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
367 }
368 return part;
369}
370
371/* Round and uncanonicalize a floating-point number by parts. There
372 * are FRAC_SHIFT bits that may require rounding at the bottom of the
373 * fraction; these bits will be removed. The exponent will be biased
374 * by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
375 */
376
377static FloatParts round_canonical(FloatParts p, float_status *s,
378 const FloatFmt *parm)
379{
380 const uint64_t frac_lsbm1 = parm->frac_lsbm1;
381 const uint64_t round_mask = parm->round_mask;
382 const uint64_t roundeven_mask = parm->roundeven_mask;
383 const int exp_max = parm->exp_max;
384 const int frac_shift = parm->frac_shift;
385 uint64_t frac, inc;
386 int exp, flags = 0;
387 bool overflow_norm;
388
389 frac = p.frac;
390 exp = p.exp;
391
392 switch (p.cls) {
393 case float_class_normal:
394 switch (s->float_rounding_mode) {
395 case float_round_nearest_even:
396 overflow_norm = false;
397 inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
398 break;
399 case float_round_ties_away:
400 overflow_norm = false;
401 inc = frac_lsbm1;
402 break;
403 case float_round_to_zero:
404 overflow_norm = true;
405 inc = 0;
406 break;
407 case float_round_up:
408 inc = p.sign ? 0 : round_mask;
409 overflow_norm = p.sign;
410 break;
411 case float_round_down:
412 inc = p.sign ? round_mask : 0;
413 overflow_norm = !p.sign;
414 break;
415 default:
416 g_assert_not_reached();
417 }
418
419 exp += parm->exp_bias;
420 if (likely(exp > 0)) {
421 if (frac & round_mask) {
422 flags |= float_flag_inexact;
423 frac += inc;
424 if (frac & DECOMPOSED_OVERFLOW_BIT) {
425 frac >>= 1;
426 exp++;
427 }
428 }
429 frac >>= frac_shift;
430
ca3a3d5a
AB
431 if (parm->arm_althp) {
432 /* ARM Alt HP eschews Inf and NaN for a wider exponent. */
433 if (unlikely(exp > exp_max)) {
434 /* Overflow. Return the maximum normal. */
435 flags = float_flag_invalid;
436 exp = exp_max;
437 frac = -1;
438 }
439 } else if (unlikely(exp >= exp_max)) {
6fff2167
AB
440 flags |= float_flag_overflow | float_flag_inexact;
441 if (overflow_norm) {
442 exp = exp_max - 1;
443 frac = -1;
444 } else {
445 p.cls = float_class_inf;
446 goto do_inf;
447 }
448 }
449 } else if (s->flush_to_zero) {
450 flags |= float_flag_output_denormal;
451 p.cls = float_class_zero;
452 goto do_zero;
453 } else {
454 bool is_tiny = (s->float_detect_tininess
455 == float_tininess_before_rounding)
456 || (exp < 0)
457 || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
458
459 shift64RightJamming(frac, 1 - exp, &frac);
460 if (frac & round_mask) {
461 /* Need to recompute round-to-even. */
462 if (s->float_rounding_mode == float_round_nearest_even) {
463 inc = ((frac & roundeven_mask) != frac_lsbm1
464 ? frac_lsbm1 : 0);
465 }
466 flags |= float_flag_inexact;
467 frac += inc;
468 }
469
470 exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
471 frac >>= frac_shift;
472
473 if (is_tiny && (flags & float_flag_inexact)) {
474 flags |= float_flag_underflow;
475 }
476 if (exp == 0 && frac == 0) {
477 p.cls = float_class_zero;
478 }
479 }
480 break;
481
482 case float_class_zero:
483 do_zero:
484 exp = 0;
485 frac = 0;
486 break;
487
488 case float_class_inf:
489 do_inf:
ca3a3d5a 490 assert(!parm->arm_althp);
6fff2167
AB
491 exp = exp_max;
492 frac = 0;
493 break;
494
495 case float_class_qnan:
496 case float_class_snan:
ca3a3d5a 497 assert(!parm->arm_althp);
6fff2167 498 exp = exp_max;
94933df0 499 frac >>= parm->frac_shift;
6fff2167
AB
500 break;
501
502 default:
503 g_assert_not_reached();
504 }
505
506 float_raise(flags, s);
507 p.exp = exp;
508 p.frac = frac;
509 return p;
510}
511
6fed16b2
AB
512/* Explicit FloatFmt version */
513static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
514 const FloatFmt *params)
515{
516 return canonicalize(float16_unpack_raw(f), params, s);
517}
518
6fff2167
AB
519static FloatParts float16_unpack_canonical(float16 f, float_status *s)
520{
6fed16b2
AB
521 return float16a_unpack_canonical(f, s, &float16_params);
522}
523
524static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
525 const FloatFmt *params)
526{
527 return float16_pack_raw(round_canonical(p, s, params));
6fff2167
AB
528}
529
530static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
531{
6fed16b2 532 return float16a_round_pack_canonical(p, s, &float16_params);
6fff2167
AB
533}
534
535static FloatParts float32_unpack_canonical(float32 f, float_status *s)
536{
537 return canonicalize(float32_unpack_raw(f), &float32_params, s);
538}
539
540static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
541{
0bcfbcbe 542 return float32_pack_raw(round_canonical(p, s, &float32_params));
6fff2167
AB
543}
544
545static FloatParts float64_unpack_canonical(float64 f, float_status *s)
546{
547 return canonicalize(float64_unpack_raw(f), &float64_params, s);
548}
549
550static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
551{
0bcfbcbe 552 return float64_pack_raw(round_canonical(p, s, &float64_params));
6fff2167
AB
553}
554
dbe4d53a
AB
555static FloatParts return_nan(FloatParts a, float_status *s)
556{
557 switch (a.cls) {
558 case float_class_snan:
559 s->float_exception_flags |= float_flag_invalid;
0bcfbcbe 560 a = parts_silence_nan(a, s);
dbe4d53a
AB
561 /* fall through */
562 case float_class_qnan:
563 if (s->default_nan_mode) {
f7e598e2 564 return parts_default_nan(s);
dbe4d53a
AB
565 }
566 break;
567
568 default:
569 g_assert_not_reached();
570 }
571 return a;
572}
573
6fff2167
AB
574static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
575{
576 if (is_snan(a.cls) || is_snan(b.cls)) {
577 s->float_exception_flags |= float_flag_invalid;
578 }
579
580 if (s->default_nan_mode) {
f7e598e2 581 return parts_default_nan(s);
6fff2167 582 } else {
4f251cfd 583 if (pickNaN(a.cls, b.cls,
6fff2167
AB
584 a.frac > b.frac ||
585 (a.frac == b.frac && a.sign < b.sign))) {
586 a = b;
587 }
0bcfbcbe
RH
588 if (is_snan(a.cls)) {
589 return parts_silence_nan(a, s);
590 }
6fff2167
AB
591 }
592 return a;
593}
594
d446830a
AB
595static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
596 bool inf_zero, float_status *s)
597{
1839189b
PM
598 int which;
599
d446830a
AB
600 if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) {
601 s->float_exception_flags |= float_flag_invalid;
602 }
603
3bd2dec1 604 which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
1839189b 605
d446830a 606 if (s->default_nan_mode) {
1839189b
PM
607 /* Note that this check is after pickNaNMulAdd so that function
608 * has an opportunity to set the Invalid flag.
609 */
f7e598e2 610 which = 3;
1839189b 611 }
d446830a 612
1839189b
PM
613 switch (which) {
614 case 0:
615 break;
616 case 1:
617 a = b;
618 break;
619 case 2:
620 a = c;
621 break;
622 case 3:
f7e598e2 623 return parts_default_nan(s);
1839189b
PM
624 default:
625 g_assert_not_reached();
d446830a 626 }
1839189b 627
0bcfbcbe
RH
628 if (is_snan(a.cls)) {
629 return parts_silence_nan(a, s);
630 }
d446830a
AB
631 return a;
632}
633
6fff2167
AB
634/*
635 * Returns the result of adding or subtracting the values of the
636 * floating-point values `a' and `b'. The operation is performed
637 * according to the IEC/IEEE Standard for Binary Floating-Point
638 * Arithmetic.
639 */
640
641static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
642 float_status *s)
643{
644 bool a_sign = a.sign;
645 bool b_sign = b.sign ^ subtract;
646
647 if (a_sign != b_sign) {
648 /* Subtraction */
649
650 if (a.cls == float_class_normal && b.cls == float_class_normal) {
651 if (a.exp > b.exp || (a.exp == b.exp && a.frac >= b.frac)) {
652 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
653 a.frac = a.frac - b.frac;
654 } else {
655 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
656 a.frac = b.frac - a.frac;
657 a.exp = b.exp;
658 a_sign ^= 1;
659 }
660
661 if (a.frac == 0) {
662 a.cls = float_class_zero;
663 a.sign = s->float_rounding_mode == float_round_down;
664 } else {
665 int shift = clz64(a.frac) - 1;
666 a.frac = a.frac << shift;
667 a.exp = a.exp - shift;
668 a.sign = a_sign;
669 }
670 return a;
671 }
672 if (is_nan(a.cls) || is_nan(b.cls)) {
673 return pick_nan(a, b, s);
674 }
675 if (a.cls == float_class_inf) {
676 if (b.cls == float_class_inf) {
677 float_raise(float_flag_invalid, s);
f7e598e2 678 return parts_default_nan(s);
6fff2167
AB
679 }
680 return a;
681 }
682 if (a.cls == float_class_zero && b.cls == float_class_zero) {
683 a.sign = s->float_rounding_mode == float_round_down;
684 return a;
685 }
686 if (a.cls == float_class_zero || b.cls == float_class_inf) {
687 b.sign = a_sign ^ 1;
688 return b;
689 }
690 if (b.cls == float_class_zero) {
691 return a;
692 }
693 } else {
694 /* Addition */
695 if (a.cls == float_class_normal && b.cls == float_class_normal) {
696 if (a.exp > b.exp) {
697 shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
698 } else if (a.exp < b.exp) {
699 shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
700 a.exp = b.exp;
701 }
702 a.frac += b.frac;
703 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
704 a.frac >>= 1;
705 a.exp += 1;
706 }
707 return a;
708 }
709 if (is_nan(a.cls) || is_nan(b.cls)) {
710 return pick_nan(a, b, s);
711 }
712 if (a.cls == float_class_inf || b.cls == float_class_zero) {
713 return a;
714 }
715 if (b.cls == float_class_inf || a.cls == float_class_zero) {
716 b.sign = b_sign;
717 return b;
718 }
719 }
720 g_assert_not_reached();
721}
722
723/*
724 * Returns the result of adding or subtracting the floating-point
725 * values `a' and `b'. The operation is performed according to the
726 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
727 */
728
729float16 __attribute__((flatten)) float16_add(float16 a, float16 b,
730 float_status *status)
731{
732 FloatParts pa = float16_unpack_canonical(a, status);
733 FloatParts pb = float16_unpack_canonical(b, status);
734 FloatParts pr = addsub_floats(pa, pb, false, status);
735
736 return float16_round_pack_canonical(pr, status);
737}
738
739float32 __attribute__((flatten)) float32_add(float32 a, float32 b,
740 float_status *status)
741{
742 FloatParts pa = float32_unpack_canonical(a, status);
743 FloatParts pb = float32_unpack_canonical(b, status);
744 FloatParts pr = addsub_floats(pa, pb, false, status);
745
746 return float32_round_pack_canonical(pr, status);
747}
748
749float64 __attribute__((flatten)) float64_add(float64 a, float64 b,
750 float_status *status)
751{
752 FloatParts pa = float64_unpack_canonical(a, status);
753 FloatParts pb = float64_unpack_canonical(b, status);
754 FloatParts pr = addsub_floats(pa, pb, false, status);
755
756 return float64_round_pack_canonical(pr, status);
757}
758
759float16 __attribute__((flatten)) float16_sub(float16 a, float16 b,
760 float_status *status)
761{
762 FloatParts pa = float16_unpack_canonical(a, status);
763 FloatParts pb = float16_unpack_canonical(b, status);
764 FloatParts pr = addsub_floats(pa, pb, true, status);
765
766 return float16_round_pack_canonical(pr, status);
767}
768
769float32 __attribute__((flatten)) float32_sub(float32 a, float32 b,
770 float_status *status)
771{
772 FloatParts pa = float32_unpack_canonical(a, status);
773 FloatParts pb = float32_unpack_canonical(b, status);
774 FloatParts pr = addsub_floats(pa, pb, true, status);
775
776 return float32_round_pack_canonical(pr, status);
777}
778
779float64 __attribute__((flatten)) float64_sub(float64 a, float64 b,
780 float_status *status)
781{
782 FloatParts pa = float64_unpack_canonical(a, status);
783 FloatParts pb = float64_unpack_canonical(b, status);
784 FloatParts pr = addsub_floats(pa, pb, true, status);
785
786 return float64_round_pack_canonical(pr, status);
787}
788
74d707e2
AB
789/*
790 * Returns the result of multiplying the floating-point values `a' and
791 * `b'. The operation is performed according to the IEC/IEEE Standard
792 * for Binary Floating-Point Arithmetic.
793 */
794
795static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
796{
797 bool sign = a.sign ^ b.sign;
798
799 if (a.cls == float_class_normal && b.cls == float_class_normal) {
800 uint64_t hi, lo;
801 int exp = a.exp + b.exp;
802
803 mul64To128(a.frac, b.frac, &hi, &lo);
804 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
805 if (lo & DECOMPOSED_OVERFLOW_BIT) {
806 shift64RightJamming(lo, 1, &lo);
807 exp += 1;
808 }
809
810 /* Re-use a */
811 a.exp = exp;
812 a.sign = sign;
813 a.frac = lo;
814 return a;
815 }
816 /* handle all the NaN cases */
817 if (is_nan(a.cls) || is_nan(b.cls)) {
818 return pick_nan(a, b, s);
819 }
820 /* Inf * Zero == NaN */
821 if ((a.cls == float_class_inf && b.cls == float_class_zero) ||
822 (a.cls == float_class_zero && b.cls == float_class_inf)) {
823 s->float_exception_flags |= float_flag_invalid;
f7e598e2 824 return parts_default_nan(s);
74d707e2
AB
825 }
826 /* Multiply by 0 or Inf */
827 if (a.cls == float_class_inf || a.cls == float_class_zero) {
828 a.sign = sign;
829 return a;
830 }
831 if (b.cls == float_class_inf || b.cls == float_class_zero) {
832 b.sign = sign;
833 return b;
834 }
835 g_assert_not_reached();
836}
837
838float16 __attribute__((flatten)) float16_mul(float16 a, float16 b,
839 float_status *status)
840{
841 FloatParts pa = float16_unpack_canonical(a, status);
842 FloatParts pb = float16_unpack_canonical(b, status);
843 FloatParts pr = mul_floats(pa, pb, status);
844
845 return float16_round_pack_canonical(pr, status);
846}
847
848float32 __attribute__((flatten)) float32_mul(float32 a, float32 b,
849 float_status *status)
850{
851 FloatParts pa = float32_unpack_canonical(a, status);
852 FloatParts pb = float32_unpack_canonical(b, status);
853 FloatParts pr = mul_floats(pa, pb, status);
854
855 return float32_round_pack_canonical(pr, status);
856}
857
858float64 __attribute__((flatten)) float64_mul(float64 a, float64 b,
859 float_status *status)
860{
861 FloatParts pa = float64_unpack_canonical(a, status);
862 FloatParts pb = float64_unpack_canonical(b, status);
863 FloatParts pr = mul_floats(pa, pb, status);
864
865 return float64_round_pack_canonical(pr, status);
866}
867
d446830a
AB
868/*
869 * Returns the result of multiplying the floating-point values `a' and
870 * `b' then adding 'c', with no intermediate rounding step after the
871 * multiplication. The operation is performed according to the
872 * IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
873 * The flags argument allows the caller to select negation of the
874 * addend, the intermediate product, or the final result. (The
875 * difference between this and having the caller do a separate
876 * negation is that negating externally will flip the sign bit on
877 * NaNs.)
878 */
879
880static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
881 int flags, float_status *s)
882{
883 bool inf_zero = ((1 << a.cls) | (1 << b.cls)) ==
884 ((1 << float_class_inf) | (1 << float_class_zero));
885 bool p_sign;
886 bool sign_flip = flags & float_muladd_negate_result;
887 FloatClass p_class;
888 uint64_t hi, lo;
889 int p_exp;
890
891 /* It is implementation-defined whether the cases of (0,inf,qnan)
892 * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
893 * they return if they do), so we have to hand this information
894 * off to the target-specific pick-a-NaN routine.
895 */
896 if (is_nan(a.cls) || is_nan(b.cls) || is_nan(c.cls)) {
897 return pick_nan_muladd(a, b, c, inf_zero, s);
898 }
899
900 if (inf_zero) {
901 s->float_exception_flags |= float_flag_invalid;
f7e598e2 902 return parts_default_nan(s);
d446830a
AB
903 }
904
905 if (flags & float_muladd_negate_c) {
906 c.sign ^= 1;
907 }
908
909 p_sign = a.sign ^ b.sign;
910
911 if (flags & float_muladd_negate_product) {
912 p_sign ^= 1;
913 }
914
915 if (a.cls == float_class_inf || b.cls == float_class_inf) {
916 p_class = float_class_inf;
917 } else if (a.cls == float_class_zero || b.cls == float_class_zero) {
918 p_class = float_class_zero;
919 } else {
920 p_class = float_class_normal;
921 }
922
923 if (c.cls == float_class_inf) {
924 if (p_class == float_class_inf && p_sign != c.sign) {
925 s->float_exception_flags |= float_flag_invalid;
f7e598e2 926 return parts_default_nan(s);
d446830a
AB
927 } else {
928 a.cls = float_class_inf;
929 a.sign = c.sign ^ sign_flip;
f7e598e2 930 return a;
d446830a 931 }
d446830a
AB
932 }
933
934 if (p_class == float_class_inf) {
935 a.cls = float_class_inf;
936 a.sign = p_sign ^ sign_flip;
937 return a;
938 }
939
940 if (p_class == float_class_zero) {
941 if (c.cls == float_class_zero) {
942 if (p_sign != c.sign) {
943 p_sign = s->float_rounding_mode == float_round_down;
944 }
945 c.sign = p_sign;
946 } else if (flags & float_muladd_halve_result) {
947 c.exp -= 1;
948 }
949 c.sign ^= sign_flip;
950 return c;
951 }
952
953 /* a & b should be normals now... */
954 assert(a.cls == float_class_normal &&
955 b.cls == float_class_normal);
956
957 p_exp = a.exp + b.exp;
958
959 /* Multiply of 2 62-bit numbers produces a (2*62) == 124-bit
960 * result.
961 */
962 mul64To128(a.frac, b.frac, &hi, &lo);
963 /* binary point now at bit 124 */
964
965 /* check for overflow */
966 if (hi & (1ULL << (DECOMPOSED_BINARY_POINT * 2 + 1 - 64))) {
967 shift128RightJamming(hi, lo, 1, &hi, &lo);
968 p_exp += 1;
969 }
970
971 /* + add/sub */
972 if (c.cls == float_class_zero) {
973 /* move binary point back to 62 */
974 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
975 } else {
976 int exp_diff = p_exp - c.exp;
977 if (p_sign == c.sign) {
978 /* Addition */
979 if (exp_diff <= 0) {
980 shift128RightJamming(hi, lo,
981 DECOMPOSED_BINARY_POINT - exp_diff,
982 &hi, &lo);
983 lo += c.frac;
984 p_exp = c.exp;
985 } else {
986 uint64_t c_hi, c_lo;
987 /* shift c to the same binary point as the product (124) */
988 c_hi = c.frac >> 2;
989 c_lo = 0;
990 shift128RightJamming(c_hi, c_lo,
991 exp_diff,
992 &c_hi, &c_lo);
993 add128(hi, lo, c_hi, c_lo, &hi, &lo);
994 /* move binary point back to 62 */
995 shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
996 }
997
998 if (lo & DECOMPOSED_OVERFLOW_BIT) {
999 shift64RightJamming(lo, 1, &lo);
1000 p_exp += 1;
1001 }
1002
1003 } else {
1004 /* Subtraction */
1005 uint64_t c_hi, c_lo;
1006 /* make C binary point match product at bit 124 */
1007 c_hi = c.frac >> 2;
1008 c_lo = 0;
1009
1010 if (exp_diff <= 0) {
1011 shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1012 if (exp_diff == 0
1013 &&
1014 (hi > c_hi || (hi == c_hi && lo >= c_lo))) {
1015 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1016 } else {
1017 sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1018 p_sign ^= 1;
1019 p_exp = c.exp;
1020 }
1021 } else {
1022 shift128RightJamming(c_hi, c_lo,
1023 exp_diff,
1024 &c_hi, &c_lo);
1025 sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1026 }
1027
1028 if (hi == 0 && lo == 0) {
1029 a.cls = float_class_zero;
1030 a.sign = s->float_rounding_mode == float_round_down;
1031 a.sign ^= sign_flip;
1032 return a;
1033 } else {
1034 int shift;
1035 if (hi != 0) {
1036 shift = clz64(hi);
1037 } else {
1038 shift = clz64(lo) + 64;
1039 }
1040 /* Normalizing to a binary point of 124 is the
1041 correct adjust for the exponent. However since we're
1042 shifting, we might as well put the binary point back
1043 at 62 where we really want it. Therefore shift as
1044 if we're leaving 1 bit at the top of the word, but
1045 adjust the exponent as if we're leaving 3 bits. */
1046 shift -= 1;
1047 if (shift >= 64) {
1048 lo = lo << (shift - 64);
1049 } else {
1050 hi = (hi << shift) | (lo >> (64 - shift));
1051 lo = hi | ((lo << shift) != 0);
1052 }
1053 p_exp -= shift - 2;
1054 }
1055 }
1056 }
1057
1058 if (flags & float_muladd_halve_result) {
1059 p_exp -= 1;
1060 }
1061
1062 /* finally prepare our result */
1063 a.cls = float_class_normal;
1064 a.sign = p_sign ^ sign_flip;
1065 a.exp = p_exp;
1066 a.frac = lo;
1067
1068 return a;
1069}
1070
1071float16 __attribute__((flatten)) float16_muladd(float16 a, float16 b, float16 c,
1072 int flags, float_status *status)
1073{
1074 FloatParts pa = float16_unpack_canonical(a, status);
1075 FloatParts pb = float16_unpack_canonical(b, status);
1076 FloatParts pc = float16_unpack_canonical(c, status);
1077 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1078
1079 return float16_round_pack_canonical(pr, status);
1080}
1081
1082float32 __attribute__((flatten)) float32_muladd(float32 a, float32 b, float32 c,
1083 int flags, float_status *status)
1084{
1085 FloatParts pa = float32_unpack_canonical(a, status);
1086 FloatParts pb = float32_unpack_canonical(b, status);
1087 FloatParts pc = float32_unpack_canonical(c, status);
1088 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1089
1090 return float32_round_pack_canonical(pr, status);
1091}
1092
1093float64 __attribute__((flatten)) float64_muladd(float64 a, float64 b, float64 c,
1094 int flags, float_status *status)
1095{
1096 FloatParts pa = float64_unpack_canonical(a, status);
1097 FloatParts pb = float64_unpack_canonical(b, status);
1098 FloatParts pc = float64_unpack_canonical(c, status);
1099 FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1100
1101 return float64_round_pack_canonical(pr, status);
1102}
1103
cf07323d
AB
1104/*
1105 * Returns the result of dividing the floating-point value `a' by the
1106 * corresponding value `b'. The operation is performed according to
1107 * the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1108 */
1109
1110static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1111{
1112 bool sign = a.sign ^ b.sign;
1113
1114 if (a.cls == float_class_normal && b.cls == float_class_normal) {
1115 uint64_t temp_lo, temp_hi;
1116 int exp = a.exp - b.exp;
1117 if (a.frac < b.frac) {
1118 exp -= 1;
1119 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT + 1,
1120 &temp_hi, &temp_lo);
1121 } else {
1122 shortShift128Left(0, a.frac, DECOMPOSED_BINARY_POINT,
1123 &temp_hi, &temp_lo);
1124 }
1125 /* LSB of quot is set if inexact which roundandpack will use
1126 * to set flags. Yet again we re-use a for the result */
1127 a.frac = div128To64(temp_lo, temp_hi, b.frac);
1128 a.sign = sign;
1129 a.exp = exp;
1130 return a;
1131 }
1132 /* handle all the NaN cases */
1133 if (is_nan(a.cls) || is_nan(b.cls)) {
1134 return pick_nan(a, b, s);
1135 }
1136 /* 0/0 or Inf/Inf */
1137 if (a.cls == b.cls
1138 &&
1139 (a.cls == float_class_inf || a.cls == float_class_zero)) {
1140 s->float_exception_flags |= float_flag_invalid;
f7e598e2 1141 return parts_default_nan(s);
cf07323d 1142 }
9cb4e398
AB
1143 /* Inf / x or 0 / x */
1144 if (a.cls == float_class_inf || a.cls == float_class_zero) {
1145 a.sign = sign;
1146 return a;
1147 }
cf07323d
AB
1148 /* Div 0 => Inf */
1149 if (b.cls == float_class_zero) {
1150 s->float_exception_flags |= float_flag_divbyzero;
1151 a.cls = float_class_inf;
1152 a.sign = sign;
1153 return a;
1154 }
cf07323d
AB
1155 /* Div by Inf */
1156 if (b.cls == float_class_inf) {
1157 a.cls = float_class_zero;
1158 a.sign = sign;
1159 return a;
1160 }
1161 g_assert_not_reached();
1162}
1163
1164float16 float16_div(float16 a, float16 b, float_status *status)
1165{
1166 FloatParts pa = float16_unpack_canonical(a, status);
1167 FloatParts pb = float16_unpack_canonical(b, status);
1168 FloatParts pr = div_floats(pa, pb, status);
1169
1170 return float16_round_pack_canonical(pr, status);
1171}
1172
1173float32 float32_div(float32 a, float32 b, float_status *status)
1174{
1175 FloatParts pa = float32_unpack_canonical(a, status);
1176 FloatParts pb = float32_unpack_canonical(b, status);
1177 FloatParts pr = div_floats(pa, pb, status);
1178
1179 return float32_round_pack_canonical(pr, status);
1180}
1181
1182float64 float64_div(float64 a, float64 b, float_status *status)
1183{
1184 FloatParts pa = float64_unpack_canonical(a, status);
1185 FloatParts pb = float64_unpack_canonical(b, status);
1186 FloatParts pr = div_floats(pa, pb, status);
1187
1188 return float64_round_pack_canonical(pr, status);
1189}
1190
6fed16b2
AB
1191/*
1192 * Float to Float conversions
1193 *
1194 * Returns the result of converting one float format to another. The
1195 * conversion is performed according to the IEC/IEEE Standard for
1196 * Binary Floating-Point Arithmetic.
1197 *
1198 * The float_to_float helper only needs to take care of raising
1199 * invalid exceptions and handling the conversion on NaNs.
1200 */
1201
1202static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1203 float_status *s)
1204{
1205 if (dstf->arm_althp) {
1206 switch (a.cls) {
1207 case float_class_qnan:
1208 case float_class_snan:
1209 /* There is no NaN in the destination format. Raise Invalid
1210 * and return a zero with the sign of the input NaN.
1211 */
1212 s->float_exception_flags |= float_flag_invalid;
1213 a.cls = float_class_zero;
1214 a.frac = 0;
1215 a.exp = 0;
1216 break;
1217
1218 case float_class_inf:
1219 /* There is no Inf in the destination format. Raise Invalid
1220 * and return the maximum normal with the correct sign.
1221 */
1222 s->float_exception_flags |= float_flag_invalid;
1223 a.cls = float_class_normal;
1224 a.exp = dstf->exp_max;
1225 a.frac = ((1ull << dstf->frac_size) - 1) << dstf->frac_shift;
1226 break;
1227
1228 default:
1229 break;
1230 }
1231 } else if (is_nan(a.cls)) {
1232 if (is_snan(a.cls)) {
1233 s->float_exception_flags |= float_flag_invalid;
1234 a = parts_silence_nan(a, s);
1235 }
1236 if (s->default_nan_mode) {
1237 return parts_default_nan(s);
1238 }
1239 }
1240 return a;
1241}
1242
1243float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1244{
1245 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1246 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1247 FloatParts pr = float_to_float(p, &float32_params, s);
1248 return float32_round_pack_canonical(pr, s);
1249}
1250
1251float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1252{
1253 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1254 FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1255 FloatParts pr = float_to_float(p, &float64_params, s);
1256 return float64_round_pack_canonical(pr, s);
1257}
1258
1259float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1260{
1261 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1262 FloatParts p = float32_unpack_canonical(a, s);
1263 FloatParts pr = float_to_float(p, fmt16, s);
1264 return float16a_round_pack_canonical(pr, s, fmt16);
1265}
1266
1267float64 float32_to_float64(float32 a, float_status *s)
1268{
1269 FloatParts p = float32_unpack_canonical(a, s);
1270 FloatParts pr = float_to_float(p, &float64_params, s);
1271 return float64_round_pack_canonical(pr, s);
1272}
1273
1274float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1275{
1276 const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1277 FloatParts p = float64_unpack_canonical(a, s);
1278 FloatParts pr = float_to_float(p, fmt16, s);
1279 return float16a_round_pack_canonical(pr, s, fmt16);
1280}
1281
1282float32 float64_to_float32(float64 a, float_status *s)
1283{
1284 FloatParts p = float64_unpack_canonical(a, s);
1285 FloatParts pr = float_to_float(p, &float32_params, s);
1286 return float32_round_pack_canonical(pr, s);
1287}
1288
dbe4d53a
AB
1289/*
1290 * Rounds the floating-point value `a' to an integer, and returns the
1291 * result as a floating-point value. The operation is performed
1292 * according to the IEC/IEEE Standard for Binary Floating-Point
1293 * Arithmetic.
1294 */
1295
1296static FloatParts round_to_int(FloatParts a, int rounding_mode, float_status *s)
1297{
1298 if (is_nan(a.cls)) {
1299 return return_nan(a, s);
1300 }
1301
1302 switch (a.cls) {
1303 case float_class_zero:
1304 case float_class_inf:
1305 case float_class_qnan:
1306 /* already "integral" */
1307 break;
1308 case float_class_normal:
1309 if (a.exp >= DECOMPOSED_BINARY_POINT) {
1310 /* already integral */
1311 break;
1312 }
1313 if (a.exp < 0) {
1314 bool one;
1315 /* all fractional */
1316 s->float_exception_flags |= float_flag_inexact;
1317 switch (rounding_mode) {
1318 case float_round_nearest_even:
1319 one = a.exp == -1 && a.frac > DECOMPOSED_IMPLICIT_BIT;
1320 break;
1321 case float_round_ties_away:
1322 one = a.exp == -1 && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1323 break;
1324 case float_round_to_zero:
1325 one = false;
1326 break;
1327 case float_round_up:
1328 one = !a.sign;
1329 break;
1330 case float_round_down:
1331 one = a.sign;
1332 break;
1333 default:
1334 g_assert_not_reached();
1335 }
1336
1337 if (one) {
1338 a.frac = DECOMPOSED_IMPLICIT_BIT;
1339 a.exp = 0;
1340 } else {
1341 a.cls = float_class_zero;
1342 }
1343 } else {
1344 uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
1345 uint64_t frac_lsbm1 = frac_lsb >> 1;
1346 uint64_t rnd_even_mask = (frac_lsb - 1) | frac_lsb;
1347 uint64_t rnd_mask = rnd_even_mask >> 1;
1348 uint64_t inc;
1349
1350 switch (rounding_mode) {
1351 case float_round_nearest_even:
1352 inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
1353 break;
1354 case float_round_ties_away:
1355 inc = frac_lsbm1;
1356 break;
1357 case float_round_to_zero:
1358 inc = 0;
1359 break;
1360 case float_round_up:
1361 inc = a.sign ? 0 : rnd_mask;
1362 break;
1363 case float_round_down:
1364 inc = a.sign ? rnd_mask : 0;
1365 break;
1366 default:
1367 g_assert_not_reached();
1368 }
1369
1370 if (a.frac & rnd_mask) {
1371 s->float_exception_flags |= float_flag_inexact;
1372 a.frac += inc;
1373 a.frac &= ~rnd_mask;
1374 if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1375 a.frac >>= 1;
1376 a.exp++;
1377 }
1378 }
1379 }
1380 break;
1381 default:
1382 g_assert_not_reached();
1383 }
1384 return a;
1385}
1386
1387float16 float16_round_to_int(float16 a, float_status *s)
1388{
1389 FloatParts pa = float16_unpack_canonical(a, s);
1390 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1391 return float16_round_pack_canonical(pr, s);
1392}
1393
1394float32 float32_round_to_int(float32 a, float_status *s)
1395{
1396 FloatParts pa = float32_unpack_canonical(a, s);
1397 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1398 return float32_round_pack_canonical(pr, s);
1399}
1400
1401float64 float64_round_to_int(float64 a, float_status *s)
1402{
1403 FloatParts pa = float64_unpack_canonical(a, s);
1404 FloatParts pr = round_to_int(pa, s->float_rounding_mode, s);
1405 return float64_round_pack_canonical(pr, s);
1406}
1407
1408float64 float64_trunc_to_int(float64 a, float_status *s)
1409{
1410 FloatParts pa = float64_unpack_canonical(a, s);
1411 FloatParts pr = round_to_int(pa, float_round_to_zero, s);
1412 return float64_round_pack_canonical(pr, s);
1413}
1414
ab52f973
AB
1415/*
1416 * Returns the result of converting the floating-point value `a' to
1417 * the two's complement integer format. The conversion is performed
1418 * according to the IEC/IEEE Standard for Binary Floating-Point
1419 * Arithmetic---which means in particular that the conversion is
1420 * rounded according to the current rounding mode. If `a' is a NaN,
1421 * the largest positive integer is returned. Otherwise, if the
1422 * conversion overflows, the largest integer with the same sign as `a'
1423 * is returned.
1424*/
1425
1426static int64_t round_to_int_and_pack(FloatParts in, int rmode,
1427 int64_t min, int64_t max,
1428 float_status *s)
1429{
1430 uint64_t r;
1431 int orig_flags = get_float_exception_flags(s);
1432 FloatParts p = round_to_int(in, rmode, s);
1433
1434 switch (p.cls) {
1435 case float_class_snan:
1436 case float_class_qnan:
801bc563 1437 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1438 return max;
1439 case float_class_inf:
801bc563 1440 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1441 return p.sign ? min : max;
1442 case float_class_zero:
1443 return 0;
1444 case float_class_normal:
1445 if (p.exp < DECOMPOSED_BINARY_POINT) {
1446 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1447 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1448 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1449 } else {
1450 r = UINT64_MAX;
1451 }
1452 if (p.sign) {
33358375 1453 if (r <= -(uint64_t) min) {
ab52f973
AB
1454 return -r;
1455 } else {
1456 s->float_exception_flags = orig_flags | float_flag_invalid;
1457 return min;
1458 }
1459 } else {
33358375 1460 if (r <= max) {
ab52f973
AB
1461 return r;
1462 } else {
1463 s->float_exception_flags = orig_flags | float_flag_invalid;
1464 return max;
1465 }
1466 }
1467 default:
1468 g_assert_not_reached();
1469 }
1470}
1471
1472#define FLOAT_TO_INT(fsz, isz) \
1473int ## isz ## _t float ## fsz ## _to_int ## isz(float ## fsz a, \
1474 float_status *s) \
1475{ \
1476 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1477 return round_to_int_and_pack(p, s->float_rounding_mode, \
1478 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1479 s); \
1480} \
1481 \
1482int ## isz ## _t float ## fsz ## _to_int ## isz ## _round_to_zero \
1483 (float ## fsz a, float_status *s) \
1484{ \
1485 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1486 return round_to_int_and_pack(p, float_round_to_zero, \
1487 INT ## isz ## _MIN, INT ## isz ## _MAX,\
1488 s); \
1489}
1490
1491FLOAT_TO_INT(16, 16)
1492FLOAT_TO_INT(16, 32)
1493FLOAT_TO_INT(16, 64)
1494
1495FLOAT_TO_INT(32, 16)
1496FLOAT_TO_INT(32, 32)
1497FLOAT_TO_INT(32, 64)
1498
1499FLOAT_TO_INT(64, 16)
1500FLOAT_TO_INT(64, 32)
1501FLOAT_TO_INT(64, 64)
1502
1503#undef FLOAT_TO_INT
1504
1505/*
1506 * Returns the result of converting the floating-point value `a' to
1507 * the unsigned integer format. The conversion is performed according
1508 * to the IEC/IEEE Standard for Binary Floating-Point
1509 * Arithmetic---which means in particular that the conversion is
1510 * rounded according to the current rounding mode. If `a' is a NaN,
1511 * the largest unsigned integer is returned. Otherwise, if the
1512 * conversion overflows, the largest unsigned integer is returned. If
1513 * the 'a' is negative, the result is rounded and zero is returned;
1514 * values that do not round to zero will raise the inexact exception
1515 * flag.
1516 */
1517
1518static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, uint64_t max,
1519 float_status *s)
1520{
1521 int orig_flags = get_float_exception_flags(s);
1522 FloatParts p = round_to_int(in, rmode, s);
1523
1524 switch (p.cls) {
1525 case float_class_snan:
1526 case float_class_qnan:
1527 s->float_exception_flags = orig_flags | float_flag_invalid;
1528 return max;
1529 case float_class_inf:
801bc563 1530 s->float_exception_flags = orig_flags | float_flag_invalid;
ab52f973
AB
1531 return p.sign ? 0 : max;
1532 case float_class_zero:
1533 return 0;
1534 case float_class_normal:
1535 {
1536 uint64_t r;
1537 if (p.sign) {
1538 s->float_exception_flags = orig_flags | float_flag_invalid;
1539 return 0;
1540 }
1541
1542 if (p.exp < DECOMPOSED_BINARY_POINT) {
1543 r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
1544 } else if (p.exp - DECOMPOSED_BINARY_POINT < 2) {
1545 r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
1546 } else {
1547 s->float_exception_flags = orig_flags | float_flag_invalid;
1548 return max;
1549 }
1550
1551 /* For uint64 this will never trip, but if p.exp is too large
1552 * to shift a decomposed fraction we shall have exited via the
1553 * 3rd leg above.
1554 */
1555 if (r > max) {
1556 s->float_exception_flags = orig_flags | float_flag_invalid;
1557 return max;
1558 } else {
1559 return r;
1560 }
1561 }
1562 default:
1563 g_assert_not_reached();
1564 }
1565}
1566
1567#define FLOAT_TO_UINT(fsz, isz) \
1568uint ## isz ## _t float ## fsz ## _to_uint ## isz(float ## fsz a, \
1569 float_status *s) \
1570{ \
1571 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
1572 return round_to_uint_and_pack(p, s->float_rounding_mode, \
1573 UINT ## isz ## _MAX, s); \
1574} \
1575 \
1576uint ## isz ## _t float ## fsz ## _to_uint ## isz ## _round_to_zero \
1577 (float ## fsz a, float_status *s) \
1578{ \
1579 FloatParts p = float ## fsz ## _unpack_canonical(a, s); \
bd49e602
RH
1580 return round_to_uint_and_pack(p, float_round_to_zero, \
1581 UINT ## isz ## _MAX, s); \
ab52f973
AB
1582}
1583
1584FLOAT_TO_UINT(16, 16)
1585FLOAT_TO_UINT(16, 32)
1586FLOAT_TO_UINT(16, 64)
1587
1588FLOAT_TO_UINT(32, 16)
1589FLOAT_TO_UINT(32, 32)
1590FLOAT_TO_UINT(32, 64)
1591
1592FLOAT_TO_UINT(64, 16)
1593FLOAT_TO_UINT(64, 32)
1594FLOAT_TO_UINT(64, 64)
1595
1596#undef FLOAT_TO_UINT
1597
c02e1fb8
AB
1598/*
1599 * Integer to float conversions
1600 *
1601 * Returns the result of converting the two's complement integer `a'
1602 * to the floating-point format. The conversion is performed according
1603 * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1604 */
1605
1606static FloatParts int_to_float(int64_t a, float_status *status)
1607{
a5a5f5e2 1608 FloatParts r = {};
c02e1fb8
AB
1609 if (a == 0) {
1610 r.cls = float_class_zero;
1611 r.sign = false;
1612 } else if (a == (1ULL << 63)) {
1613 r.cls = float_class_normal;
1614 r.sign = true;
1615 r.frac = DECOMPOSED_IMPLICIT_BIT;
1616 r.exp = 63;
1617 } else {
1618 uint64_t f;
1619 if (a < 0) {
1620 f = -a;
1621 r.sign = true;
1622 } else {
1623 f = a;
1624 r.sign = false;
1625 }
1626 int shift = clz64(f) - 1;
1627 r.cls = float_class_normal;
1628 r.exp = (DECOMPOSED_BINARY_POINT - shift);
1629 r.frac = f << shift;
1630 }
1631
1632 return r;
1633}
1634
1635float16 int64_to_float16(int64_t a, float_status *status)
1636{
1637 FloatParts pa = int_to_float(a, status);
1638 return float16_round_pack_canonical(pa, status);
1639}
1640
1641float16 int32_to_float16(int32_t a, float_status *status)
1642{
1643 return int64_to_float16(a, status);
1644}
1645
1646float16 int16_to_float16(int16_t a, float_status *status)
1647{
1648 return int64_to_float16(a, status);
1649}
1650
1651float32 int64_to_float32(int64_t a, float_status *status)
1652{
1653 FloatParts pa = int_to_float(a, status);
1654 return float32_round_pack_canonical(pa, status);
1655}
1656
1657float32 int32_to_float32(int32_t a, float_status *status)
1658{
1659 return int64_to_float32(a, status);
1660}
1661
1662float32 int16_to_float32(int16_t a, float_status *status)
1663{
1664 return int64_to_float32(a, status);
1665}
1666
1667float64 int64_to_float64(int64_t a, float_status *status)
1668{
1669 FloatParts pa = int_to_float(a, status);
1670 return float64_round_pack_canonical(pa, status);
1671}
1672
1673float64 int32_to_float64(int32_t a, float_status *status)
1674{
1675 return int64_to_float64(a, status);
1676}
1677
1678float64 int16_to_float64(int16_t a, float_status *status)
1679{
1680 return int64_to_float64(a, status);
1681}
1682
1683
1684/*
1685 * Unsigned Integer to float conversions
1686 *
1687 * Returns the result of converting the unsigned integer `a' to the
1688 * floating-point format. The conversion is performed according to the
1689 * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1690 */
1691
1692static FloatParts uint_to_float(uint64_t a, float_status *status)
1693{
1694 FloatParts r = { .sign = false};
1695
1696 if (a == 0) {
1697 r.cls = float_class_zero;
1698 } else {
1699 int spare_bits = clz64(a) - 1;
1700 r.cls = float_class_normal;
1701 r.exp = DECOMPOSED_BINARY_POINT - spare_bits;
1702 if (spare_bits < 0) {
1703 shift64RightJamming(a, -spare_bits, &a);
1704 r.frac = a;
1705 } else {
1706 r.frac = a << spare_bits;
1707 }
1708 }
1709
1710 return r;
1711}
1712
1713float16 uint64_to_float16(uint64_t a, float_status *status)
1714{
1715 FloatParts pa = uint_to_float(a, status);
1716 return float16_round_pack_canonical(pa, status);
1717}
1718
1719float16 uint32_to_float16(uint32_t a, float_status *status)
1720{
1721 return uint64_to_float16(a, status);
1722}
1723
1724float16 uint16_to_float16(uint16_t a, float_status *status)
1725{
1726 return uint64_to_float16(a, status);
1727}
1728
1729float32 uint64_to_float32(uint64_t a, float_status *status)
1730{
1731 FloatParts pa = uint_to_float(a, status);
1732 return float32_round_pack_canonical(pa, status);
1733}
1734
1735float32 uint32_to_float32(uint32_t a, float_status *status)
1736{
1737 return uint64_to_float32(a, status);
1738}
1739
1740float32 uint16_to_float32(uint16_t a, float_status *status)
1741{
1742 return uint64_to_float32(a, status);
1743}
1744
1745float64 uint64_to_float64(uint64_t a, float_status *status)
1746{
1747 FloatParts pa = uint_to_float(a, status);
1748 return float64_round_pack_canonical(pa, status);
1749}
1750
1751float64 uint32_to_float64(uint32_t a, float_status *status)
1752{
1753 return uint64_to_float64(a, status);
1754}
1755
1756float64 uint16_to_float64(uint16_t a, float_status *status)
1757{
1758 return uint64_to_float64(a, status);
1759}
1760
89360067
AB
1761/* Float Min/Max */
1762/* min() and max() functions. These can't be implemented as
1763 * 'compare and pick one input' because that would mishandle
1764 * NaNs and +0 vs -0.
1765 *
1766 * minnum() and maxnum() functions. These are similar to the min()
1767 * and max() functions but if one of the arguments is a QNaN and
1768 * the other is numerical then the numerical argument is returned.
1769 * SNaNs will get quietened before being returned.
1770 * minnum() and maxnum correspond to the IEEE 754-2008 minNum()
1771 * and maxNum() operations. min() and max() are the typical min/max
1772 * semantics provided by many CPUs which predate that specification.
1773 *
1774 * minnummag() and maxnummag() functions correspond to minNumMag()
1775 * and minNumMag() from the IEEE-754 2008.
1776 */
1777static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
1778 bool ieee, bool ismag, float_status *s)
1779{
1780 if (unlikely(is_nan(a.cls) || is_nan(b.cls))) {
1781 if (ieee) {
1782 /* Takes two floating-point values `a' and `b', one of
1783 * which is a NaN, and returns the appropriate NaN
1784 * result. If either `a' or `b' is a signaling NaN,
1785 * the invalid exception is raised.
1786 */
1787 if (is_snan(a.cls) || is_snan(b.cls)) {
1788 return pick_nan(a, b, s);
1789 } else if (is_nan(a.cls) && !is_nan(b.cls)) {
1790 return b;
1791 } else if (is_nan(b.cls) && !is_nan(a.cls)) {
1792 return a;
1793 }
1794 }
1795 return pick_nan(a, b, s);
1796 } else {
1797 int a_exp, b_exp;
89360067
AB
1798
1799 switch (a.cls) {
1800 case float_class_normal:
1801 a_exp = a.exp;
1802 break;
1803 case float_class_inf:
1804 a_exp = INT_MAX;
1805 break;
1806 case float_class_zero:
1807 a_exp = INT_MIN;
1808 break;
1809 default:
1810 g_assert_not_reached();
1811 break;
1812 }
1813 switch (b.cls) {
1814 case float_class_normal:
1815 b_exp = b.exp;
1816 break;
1817 case float_class_inf:
1818 b_exp = INT_MAX;
1819 break;
1820 case float_class_zero:
1821 b_exp = INT_MIN;
1822 break;
1823 default:
1824 g_assert_not_reached();
1825 break;
1826 }
1827
6245327a
EC
1828 if (ismag && (a_exp != b_exp || a.frac != b.frac)) {
1829 bool a_less = a_exp < b_exp;
1830 if (a_exp == b_exp) {
1831 a_less = a.frac < b.frac;
1832 }
1833 return a_less ^ ismin ? b : a;
89360067
AB
1834 }
1835
6245327a 1836 if (a.sign == b.sign) {
89360067
AB
1837 bool a_less = a_exp < b_exp;
1838 if (a_exp == b_exp) {
1839 a_less = a.frac < b.frac;
1840 }
6245327a 1841 return a.sign ^ a_less ^ ismin ? b : a;
89360067 1842 } else {
6245327a 1843 return a.sign ^ ismin ? b : a;
89360067
AB
1844 }
1845 }
1846}
1847
1848#define MINMAX(sz, name, ismin, isiee, ismag) \
1849float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
1850 float_status *s) \
1851{ \
1852 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1853 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1854 FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
1855 \
1856 return float ## sz ## _round_pack_canonical(pr, s); \
1857}
1858
1859MINMAX(16, min, true, false, false)
1860MINMAX(16, minnum, true, true, false)
1861MINMAX(16, minnummag, true, true, true)
1862MINMAX(16, max, false, false, false)
1863MINMAX(16, maxnum, false, true, false)
1864MINMAX(16, maxnummag, false, true, true)
1865
1866MINMAX(32, min, true, false, false)
1867MINMAX(32, minnum, true, true, false)
1868MINMAX(32, minnummag, true, true, true)
1869MINMAX(32, max, false, false, false)
1870MINMAX(32, maxnum, false, true, false)
1871MINMAX(32, maxnummag, false, true, true)
1872
1873MINMAX(64, min, true, false, false)
1874MINMAX(64, minnum, true, true, false)
1875MINMAX(64, minnummag, true, true, true)
1876MINMAX(64, max, false, false, false)
1877MINMAX(64, maxnum, false, true, false)
1878MINMAX(64, maxnummag, false, true, true)
1879
1880#undef MINMAX
1881
0c4c9092
AB
1882/* Floating point compare */
1883static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
1884 float_status *s)
1885{
1886 if (is_nan(a.cls) || is_nan(b.cls)) {
1887 if (!is_quiet ||
1888 a.cls == float_class_snan ||
1889 b.cls == float_class_snan) {
1890 s->float_exception_flags |= float_flag_invalid;
1891 }
1892 return float_relation_unordered;
1893 }
1894
1895 if (a.cls == float_class_zero) {
1896 if (b.cls == float_class_zero) {
1897 return float_relation_equal;
1898 }
1899 return b.sign ? float_relation_greater : float_relation_less;
1900 } else if (b.cls == float_class_zero) {
1901 return a.sign ? float_relation_less : float_relation_greater;
1902 }
1903
1904 /* The only really important thing about infinity is its sign. If
1905 * both are infinities the sign marks the smallest of the two.
1906 */
1907 if (a.cls == float_class_inf) {
1908 if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
1909 return float_relation_equal;
1910 }
1911 return a.sign ? float_relation_less : float_relation_greater;
1912 } else if (b.cls == float_class_inf) {
1913 return b.sign ? float_relation_greater : float_relation_less;
1914 }
1915
1916 if (a.sign != b.sign) {
1917 return a.sign ? float_relation_less : float_relation_greater;
1918 }
1919
1920 if (a.exp == b.exp) {
1921 if (a.frac == b.frac) {
1922 return float_relation_equal;
1923 }
1924 if (a.sign) {
1925 return a.frac > b.frac ?
1926 float_relation_less : float_relation_greater;
1927 } else {
1928 return a.frac > b.frac ?
1929 float_relation_greater : float_relation_less;
1930 }
1931 } else {
1932 if (a.sign) {
1933 return a.exp > b.exp ? float_relation_less : float_relation_greater;
1934 } else {
1935 return a.exp > b.exp ? float_relation_greater : float_relation_less;
1936 }
1937 }
1938}
1939
1940#define COMPARE(sz) \
1941int float ## sz ## _compare(float ## sz a, float ## sz b, \
1942 float_status *s) \
1943{ \
1944 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1945 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1946 return compare_floats(pa, pb, false, s); \
1947} \
1948int float ## sz ## _compare_quiet(float ## sz a, float ## sz b, \
1949 float_status *s) \
1950{ \
1951 FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
1952 FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
1953 return compare_floats(pa, pb, true, s); \
1954}
1955
1956COMPARE(16)
1957COMPARE(32)
1958COMPARE(64)
1959
1960#undef COMPARE
1961
0bfc9f19
AB
1962/* Multiply A by 2 raised to the power N. */
1963static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
1964{
1965 if (unlikely(is_nan(a.cls))) {
1966 return return_nan(a, s);
1967 }
1968 if (a.cls == float_class_normal) {
ce8d4082
RH
1969 /* The largest float type (even though not supported by FloatParts)
1970 * is float128, which has a 15 bit exponent. Bounding N to 16 bits
1971 * still allows rounding to infinity, without allowing overflow
1972 * within the int32_t that backs FloatParts.exp.
1973 */
1974 n = MIN(MAX(n, -0x10000), 0x10000);
0bfc9f19
AB
1975 a.exp += n;
1976 }
1977 return a;
1978}
1979
1980float16 float16_scalbn(float16 a, int n, float_status *status)
1981{
1982 FloatParts pa = float16_unpack_canonical(a, status);
1983 FloatParts pr = scalbn_decomposed(pa, n, status);
1984 return float16_round_pack_canonical(pr, status);
1985}
1986
1987float32 float32_scalbn(float32 a, int n, float_status *status)
1988{
1989 FloatParts pa = float32_unpack_canonical(a, status);
1990 FloatParts pr = scalbn_decomposed(pa, n, status);
1991 return float32_round_pack_canonical(pr, status);
1992}
1993
1994float64 float64_scalbn(float64 a, int n, float_status *status)
1995{
1996 FloatParts pa = float64_unpack_canonical(a, status);
1997 FloatParts pr = scalbn_decomposed(pa, n, status);
1998 return float64_round_pack_canonical(pr, status);
1999}
2000
c13bb2da
AB
2001/*
2002 * Square Root
2003 *
2004 * The old softfloat code did an approximation step before zeroing in
2005 * on the final result. However for simpleness we just compute the
2006 * square root by iterating down from the implicit bit to enough extra
2007 * bits to ensure we get a correctly rounded result.
2008 *
2009 * This does mean however the calculation is slower than before,
2010 * especially for 64 bit floats.
2011 */
2012
2013static FloatParts sqrt_float(FloatParts a, float_status *s, const FloatFmt *p)
2014{
2015 uint64_t a_frac, r_frac, s_frac;
2016 int bit, last_bit;
2017
2018 if (is_nan(a.cls)) {
2019 return return_nan(a, s);
2020 }
2021 if (a.cls == float_class_zero) {
2022 return a; /* sqrt(+-0) = +-0 */
2023 }
2024 if (a.sign) {
2025 s->float_exception_flags |= float_flag_invalid;
f7e598e2 2026 return parts_default_nan(s);
c13bb2da
AB
2027 }
2028 if (a.cls == float_class_inf) {
2029 return a; /* sqrt(+inf) = +inf */
2030 }
2031
2032 assert(a.cls == float_class_normal);
2033
2034 /* We need two overflow bits at the top. Adding room for that is a
2035 * right shift. If the exponent is odd, we can discard the low bit
2036 * by multiplying the fraction by 2; that's a left shift. Combine
2037 * those and we shift right if the exponent is even.
2038 */
2039 a_frac = a.frac;
2040 if (!(a.exp & 1)) {
2041 a_frac >>= 1;
2042 }
2043 a.exp >>= 1;
2044
2045 /* Bit-by-bit computation of sqrt. */
2046 r_frac = 0;
2047 s_frac = 0;
2048
2049 /* Iterate from implicit bit down to the 3 extra bits to compute a
2050 * properly rounded result. Remember we've inserted one more bit
2051 * at the top, so these positions are one less.
2052 */
2053 bit = DECOMPOSED_BINARY_POINT - 1;
2054 last_bit = MAX(p->frac_shift - 4, 0);
2055 do {
2056 uint64_t q = 1ULL << bit;
2057 uint64_t t_frac = s_frac + q;
2058 if (t_frac <= a_frac) {
2059 s_frac = t_frac + q;
2060 a_frac -= t_frac;
2061 r_frac += q;
2062 }
2063 a_frac <<= 1;
2064 } while (--bit >= last_bit);
2065
2066 /* Undo the right shift done above. If there is any remaining
2067 * fraction, the result is inexact. Set the sticky bit.
2068 */
2069 a.frac = (r_frac << 1) + (a_frac != 0);
2070
2071 return a;
2072}
2073
2074float16 __attribute__((flatten)) float16_sqrt(float16 a, float_status *status)
2075{
2076 FloatParts pa = float16_unpack_canonical(a, status);
2077 FloatParts pr = sqrt_float(pa, status, &float16_params);
2078 return float16_round_pack_canonical(pr, status);
2079}
2080
2081float32 __attribute__((flatten)) float32_sqrt(float32 a, float_status *status)
2082{
2083 FloatParts pa = float32_unpack_canonical(a, status);
2084 FloatParts pr = sqrt_float(pa, status, &float32_params);
2085 return float32_round_pack_canonical(pr, status);
2086}
2087
2088float64 __attribute__((flatten)) float64_sqrt(float64 a, float_status *status)
2089{
2090 FloatParts pa = float64_unpack_canonical(a, status);
2091 FloatParts pr = sqrt_float(pa, status, &float64_params);
2092 return float64_round_pack_canonical(pr, status);
2093}
2094
0218a16e
RH
2095/*----------------------------------------------------------------------------
2096| The pattern for a default generated NaN.
2097*----------------------------------------------------------------------------*/
2098
2099float16 float16_default_nan(float_status *status)
2100{
2101 FloatParts p = parts_default_nan(status);
2102 p.frac >>= float16_params.frac_shift;
2103 return float16_pack_raw(p);
2104}
2105
2106float32 float32_default_nan(float_status *status)
2107{
2108 FloatParts p = parts_default_nan(status);
2109 p.frac >>= float32_params.frac_shift;
2110 return float32_pack_raw(p);
2111}
2112
2113float64 float64_default_nan(float_status *status)
2114{
2115 FloatParts p = parts_default_nan(status);
2116 p.frac >>= float64_params.frac_shift;
2117 return float64_pack_raw(p);
2118}
2119
2120float128 float128_default_nan(float_status *status)
2121{
2122 FloatParts p = parts_default_nan(status);
2123 float128 r;
2124
2125 /* Extrapolate from the choices made by parts_default_nan to fill
2126 * in the quad-floating format. If the low bit is set, assume we
2127 * want to set all non-snan bits.
2128 */
2129 r.low = -(p.frac & 1);
2130 r.high = p.frac >> (DECOMPOSED_BINARY_POINT - 48);
2131 r.high |= LIT64(0x7FFF000000000000);
2132 r.high |= (uint64_t)p.sign << 63;
2133
2134 return r;
2135}
c13bb2da 2136
158142c2 2137/*----------------------------------------------------------------------------
377ed926
RH
2138| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
2139*----------------------------------------------------------------------------*/
2140
2141float16 float16_silence_nan(float16 a, float_status *status)
2142{
2143 FloatParts p = float16_unpack_raw(a);
2144 p.frac <<= float16_params.frac_shift;
2145 p = parts_silence_nan(p, status);
2146 p.frac >>= float16_params.frac_shift;
2147 return float16_pack_raw(p);
2148}
2149
2150float32 float32_silence_nan(float32 a, float_status *status)
2151{
2152 FloatParts p = float32_unpack_raw(a);
2153 p.frac <<= float32_params.frac_shift;
2154 p = parts_silence_nan(p, status);
2155 p.frac >>= float32_params.frac_shift;
2156 return float32_pack_raw(p);
2157}
2158
2159float64 float64_silence_nan(float64 a, float_status *status)
2160{
2161 FloatParts p = float64_unpack_raw(a);
2162 p.frac <<= float64_params.frac_shift;
2163 p = parts_silence_nan(p, status);
2164 p.frac >>= float64_params.frac_shift;
2165 return float64_pack_raw(p);
2166}
2167
2168/*----------------------------------------------------------------------------
158142c2
FB
2169| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
2170| and 7, and returns the properly rounded 32-bit integer corresponding to the
2171| input. If `zSign' is 1, the input is negated before being converted to an
2172| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
2173| is simply rounded to an integer, with the inexact exception raised if the
2174| input cannot be represented exactly as an integer. However, if the fixed-
2175| point input is too large, the invalid exception is raised and the largest
2176| positive or negative integer is returned.
2177*----------------------------------------------------------------------------*/
2178
f4014512 2179static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
158142c2 2180{
8f506c70 2181 int8_t roundingMode;
158142c2 2182 flag roundNearestEven;
8f506c70 2183 int8_t roundIncrement, roundBits;
760e1416 2184 int32_t z;
158142c2 2185
a2f2d288 2186 roundingMode = status->float_rounding_mode;
158142c2 2187 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2188 switch (roundingMode) {
2189 case float_round_nearest_even:
f9288a76 2190 case float_round_ties_away:
dc355b76
PM
2191 roundIncrement = 0x40;
2192 break;
2193 case float_round_to_zero:
2194 roundIncrement = 0;
2195 break;
2196 case float_round_up:
2197 roundIncrement = zSign ? 0 : 0x7f;
2198 break;
2199 case float_round_down:
2200 roundIncrement = zSign ? 0x7f : 0;
2201 break;
2202 default:
2203 abort();
158142c2
FB
2204 }
2205 roundBits = absZ & 0x7F;
2206 absZ = ( absZ + roundIncrement )>>7;
2207 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2208 z = absZ;
2209 if ( zSign ) z = - z;
2210 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
ff32e16e 2211 float_raise(float_flag_invalid, status);
bb98fe42 2212 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2 2213 }
a2f2d288
PM
2214 if (roundBits) {
2215 status->float_exception_flags |= float_flag_inexact;
2216 }
158142c2
FB
2217 return z;
2218
2219}
2220
2221/*----------------------------------------------------------------------------
2222| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2223| `absZ1', with binary point between bits 63 and 64 (between the input words),
2224| and returns the properly rounded 64-bit integer corresponding to the input.
2225| If `zSign' is 1, the input is negated before being converted to an integer.
2226| Ordinarily, the fixed-point input is simply rounded to an integer, with
2227| the inexact exception raised if the input cannot be represented exactly as
2228| an integer. However, if the fixed-point input is too large, the invalid
2229| exception is raised and the largest positive or negative integer is
2230| returned.
2231*----------------------------------------------------------------------------*/
2232
f42c2224 2233static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
e5a41ffa 2234 float_status *status)
158142c2 2235{
8f506c70 2236 int8_t roundingMode;
158142c2 2237 flag roundNearestEven, increment;
760e1416 2238 int64_t z;
158142c2 2239
a2f2d288 2240 roundingMode = status->float_rounding_mode;
158142c2 2241 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2242 switch (roundingMode) {
2243 case float_round_nearest_even:
f9288a76 2244 case float_round_ties_away:
dc355b76
PM
2245 increment = ((int64_t) absZ1 < 0);
2246 break;
2247 case float_round_to_zero:
2248 increment = 0;
2249 break;
2250 case float_round_up:
2251 increment = !zSign && absZ1;
2252 break;
2253 case float_round_down:
2254 increment = zSign && absZ1;
2255 break;
2256 default:
2257 abort();
158142c2
FB
2258 }
2259 if ( increment ) {
2260 ++absZ0;
2261 if ( absZ0 == 0 ) goto overflow;
bb98fe42 2262 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2263 }
2264 z = absZ0;
2265 if ( zSign ) z = - z;
2266 if ( z && ( ( z < 0 ) ^ zSign ) ) {
2267 overflow:
ff32e16e 2268 float_raise(float_flag_invalid, status);
158142c2 2269 return
bb98fe42 2270 zSign ? (int64_t) LIT64( 0x8000000000000000 )
158142c2
FB
2271 : LIT64( 0x7FFFFFFFFFFFFFFF );
2272 }
a2f2d288
PM
2273 if (absZ1) {
2274 status->float_exception_flags |= float_flag_inexact;
2275 }
158142c2
FB
2276 return z;
2277
2278}
2279
fb3ea83a
TM
2280/*----------------------------------------------------------------------------
2281| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
2282| `absZ1', with binary point between bits 63 and 64 (between the input words),
2283| and returns the properly rounded 64-bit unsigned integer corresponding to the
2284| input. Ordinarily, the fixed-point input is simply rounded to an integer,
2285| with the inexact exception raised if the input cannot be represented exactly
2286| as an integer. However, if the fixed-point input is too large, the invalid
2287| exception is raised and the largest unsigned integer is returned.
2288*----------------------------------------------------------------------------*/
2289
f42c2224 2290static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
e5a41ffa 2291 uint64_t absZ1, float_status *status)
fb3ea83a 2292{
8f506c70 2293 int8_t roundingMode;
fb3ea83a
TM
2294 flag roundNearestEven, increment;
2295
a2f2d288 2296 roundingMode = status->float_rounding_mode;
fb3ea83a 2297 roundNearestEven = (roundingMode == float_round_nearest_even);
dc355b76
PM
2298 switch (roundingMode) {
2299 case float_round_nearest_even:
f9288a76 2300 case float_round_ties_away:
dc355b76
PM
2301 increment = ((int64_t)absZ1 < 0);
2302 break;
2303 case float_round_to_zero:
2304 increment = 0;
2305 break;
2306 case float_round_up:
2307 increment = !zSign && absZ1;
2308 break;
2309 case float_round_down:
2310 increment = zSign && absZ1;
2311 break;
2312 default:
2313 abort();
fb3ea83a
TM
2314 }
2315 if (increment) {
2316 ++absZ0;
2317 if (absZ0 == 0) {
ff32e16e 2318 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2319 return LIT64(0xFFFFFFFFFFFFFFFF);
2320 }
2321 absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven);
2322 }
2323
2324 if (zSign && absZ0) {
ff32e16e 2325 float_raise(float_flag_invalid, status);
fb3ea83a
TM
2326 return 0;
2327 }
2328
2329 if (absZ1) {
a2f2d288 2330 status->float_exception_flags |= float_flag_inexact;
fb3ea83a
TM
2331 }
2332 return absZ0;
2333}
2334
37d18660
PM
2335/*----------------------------------------------------------------------------
2336| If `a' is denormal and we are in flush-to-zero mode then set the
2337| input-denormal exception and return zero. Otherwise just return the value.
2338*----------------------------------------------------------------------------*/
e5a41ffa 2339float32 float32_squash_input_denormal(float32 a, float_status *status)
37d18660 2340{
a2f2d288 2341 if (status->flush_inputs_to_zero) {
37d18660 2342 if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
ff32e16e 2343 float_raise(float_flag_input_denormal, status);
37d18660
PM
2344 return make_float32(float32_val(a) & 0x80000000);
2345 }
2346 }
2347 return a;
2348}
2349
158142c2
FB
2350/*----------------------------------------------------------------------------
2351| Normalizes the subnormal single-precision floating-point value represented
2352| by the denormalized significand `aSig'. The normalized exponent and
2353| significand are stored at the locations pointed to by `zExpPtr' and
2354| `zSigPtr', respectively.
2355*----------------------------------------------------------------------------*/
2356
2357static void
0c48262d 2358 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
158142c2 2359{
8f506c70 2360 int8_t shiftCount;
158142c2
FB
2361
2362 shiftCount = countLeadingZeros32( aSig ) - 8;
2363 *zSigPtr = aSig<<shiftCount;
2364 *zExpPtr = 1 - shiftCount;
2365
2366}
2367
158142c2
FB
2368/*----------------------------------------------------------------------------
2369| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2370| and significand `zSig', and returns the proper single-precision floating-
2371| point value corresponding to the abstract input. Ordinarily, the abstract
2372| value is simply rounded and packed into the single-precision format, with
2373| the inexact exception raised if the abstract input cannot be represented
2374| exactly. However, if the abstract value is too large, the overflow and
2375| inexact exceptions are raised and an infinity or maximal finite value is
2376| returned. If the abstract value is too small, the input value is rounded to
2377| a subnormal number, and the underflow and inexact exceptions are raised if
2378| the abstract input cannot be represented exactly as a subnormal single-
2379| precision floating-point number.
2380| The input significand `zSig' has its binary point between bits 30
2381| and 29, which is 7 bits to the left of the usual location. This shifted
2382| significand must be normalized or smaller. If `zSig' is not normalized,
2383| `zExp' must be 0; in that case, the result returned is a subnormal number,
2384| and it must not require rounding. In the usual case that `zSig' is
2385| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2386| The handling of underflow and overflow follows the IEC/IEEE Standard for
2387| Binary Floating-Point Arithmetic.
2388*----------------------------------------------------------------------------*/
2389
0c48262d 2390static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2391 float_status *status)
158142c2 2392{
8f506c70 2393 int8_t roundingMode;
158142c2 2394 flag roundNearestEven;
8f506c70 2395 int8_t roundIncrement, roundBits;
158142c2
FB
2396 flag isTiny;
2397
a2f2d288 2398 roundingMode = status->float_rounding_mode;
158142c2 2399 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2400 switch (roundingMode) {
2401 case float_round_nearest_even:
f9288a76 2402 case float_round_ties_away:
dc355b76
PM
2403 roundIncrement = 0x40;
2404 break;
2405 case float_round_to_zero:
2406 roundIncrement = 0;
2407 break;
2408 case float_round_up:
2409 roundIncrement = zSign ? 0 : 0x7f;
2410 break;
2411 case float_round_down:
2412 roundIncrement = zSign ? 0x7f : 0;
2413 break;
2414 default:
2415 abort();
2416 break;
158142c2
FB
2417 }
2418 roundBits = zSig & 0x7F;
bb98fe42 2419 if ( 0xFD <= (uint16_t) zExp ) {
158142c2
FB
2420 if ( ( 0xFD < zExp )
2421 || ( ( zExp == 0xFD )
bb98fe42 2422 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2423 ) {
ff32e16e 2424 float_raise(float_flag_overflow | float_flag_inexact, status);
f090c9d4 2425 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
158142c2
FB
2426 }
2427 if ( zExp < 0 ) {
a2f2d288 2428 if (status->flush_to_zero) {
ff32e16e 2429 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2430 return packFloat32(zSign, 0, 0);
2431 }
158142c2 2432 isTiny =
a2f2d288
PM
2433 (status->float_detect_tininess
2434 == float_tininess_before_rounding)
158142c2
FB
2435 || ( zExp < -1 )
2436 || ( zSig + roundIncrement < 0x80000000 );
2437 shift32RightJamming( zSig, - zExp, &zSig );
2438 zExp = 0;
2439 roundBits = zSig & 0x7F;
ff32e16e
PM
2440 if (isTiny && roundBits) {
2441 float_raise(float_flag_underflow, status);
2442 }
158142c2
FB
2443 }
2444 }
a2f2d288
PM
2445 if (roundBits) {
2446 status->float_exception_flags |= float_flag_inexact;
2447 }
158142c2
FB
2448 zSig = ( zSig + roundIncrement )>>7;
2449 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
2450 if ( zSig == 0 ) zExp = 0;
2451 return packFloat32( zSign, zExp, zSig );
2452
2453}
2454
2455/*----------------------------------------------------------------------------
2456| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2457| and significand `zSig', and returns the proper single-precision floating-
2458| point value corresponding to the abstract input. This routine is just like
2459| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
2460| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2461| floating-point exponent.
2462*----------------------------------------------------------------------------*/
2463
2464static float32
0c48262d 2465 normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
e5a41ffa 2466 float_status *status)
158142c2 2467{
8f506c70 2468 int8_t shiftCount;
158142c2
FB
2469
2470 shiftCount = countLeadingZeros32( zSig ) - 1;
ff32e16e
PM
2471 return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
2472 status);
158142c2
FB
2473
2474}
2475
37d18660
PM
2476/*----------------------------------------------------------------------------
2477| If `a' is denormal and we are in flush-to-zero mode then set the
2478| input-denormal exception and return zero. Otherwise just return the value.
2479*----------------------------------------------------------------------------*/
e5a41ffa 2480float64 float64_squash_input_denormal(float64 a, float_status *status)
37d18660 2481{
a2f2d288 2482 if (status->flush_inputs_to_zero) {
37d18660 2483 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
ff32e16e 2484 float_raise(float_flag_input_denormal, status);
37d18660
PM
2485 return make_float64(float64_val(a) & (1ULL << 63));
2486 }
2487 }
2488 return a;
2489}
2490
158142c2
FB
2491/*----------------------------------------------------------------------------
2492| Normalizes the subnormal double-precision floating-point value represented
2493| by the denormalized significand `aSig'. The normalized exponent and
2494| significand are stored at the locations pointed to by `zExpPtr' and
2495| `zSigPtr', respectively.
2496*----------------------------------------------------------------------------*/
2497
2498static void
0c48262d 2499 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
158142c2 2500{
8f506c70 2501 int8_t shiftCount;
158142c2
FB
2502
2503 shiftCount = countLeadingZeros64( aSig ) - 11;
2504 *zSigPtr = aSig<<shiftCount;
2505 *zExpPtr = 1 - shiftCount;
2506
2507}
2508
2509/*----------------------------------------------------------------------------
2510| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
2511| double-precision floating-point value, returning the result. After being
2512| shifted into the proper positions, the three fields are simply added
2513| together to form the result. This means that any integer portion of `zSig'
2514| will be added into the exponent. Since a properly normalized significand
2515| will have an integer portion equal to 1, the `zExp' input should be 1 less
2516| than the desired result exponent whenever `zSig' is a complete, normalized
2517| significand.
2518*----------------------------------------------------------------------------*/
2519
0c48262d 2520static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
158142c2
FB
2521{
2522
f090c9d4 2523 return make_float64(
bb98fe42 2524 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
158142c2
FB
2525
2526}
2527
2528/*----------------------------------------------------------------------------
2529| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2530| and significand `zSig', and returns the proper double-precision floating-
2531| point value corresponding to the abstract input. Ordinarily, the abstract
2532| value is simply rounded and packed into the double-precision format, with
2533| the inexact exception raised if the abstract input cannot be represented
2534| exactly. However, if the abstract value is too large, the overflow and
2535| inexact exceptions are raised and an infinity or maximal finite value is
a7d1ac78
PM
2536| returned. If the abstract value is too small, the input value is rounded to
2537| a subnormal number, and the underflow and inexact exceptions are raised if
2538| the abstract input cannot be represented exactly as a subnormal double-
158142c2
FB
2539| precision floating-point number.
2540| The input significand `zSig' has its binary point between bits 62
2541| and 61, which is 10 bits to the left of the usual location. This shifted
2542| significand must be normalized or smaller. If `zSig' is not normalized,
2543| `zExp' must be 0; in that case, the result returned is a subnormal number,
2544| and it must not require rounding. In the usual case that `zSig' is
2545| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
2546| The handling of underflow and overflow follows the IEC/IEEE Standard for
2547| Binary Floating-Point Arithmetic.
2548*----------------------------------------------------------------------------*/
2549
0c48262d 2550static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2551 float_status *status)
158142c2 2552{
8f506c70 2553 int8_t roundingMode;
158142c2 2554 flag roundNearestEven;
0c48262d 2555 int roundIncrement, roundBits;
158142c2
FB
2556 flag isTiny;
2557
a2f2d288 2558 roundingMode = status->float_rounding_mode;
158142c2 2559 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
2560 switch (roundingMode) {
2561 case float_round_nearest_even:
f9288a76 2562 case float_round_ties_away:
dc355b76
PM
2563 roundIncrement = 0x200;
2564 break;
2565 case float_round_to_zero:
2566 roundIncrement = 0;
2567 break;
2568 case float_round_up:
2569 roundIncrement = zSign ? 0 : 0x3ff;
2570 break;
2571 case float_round_down:
2572 roundIncrement = zSign ? 0x3ff : 0;
2573 break;
9ee6f678
BR
2574 case float_round_to_odd:
2575 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2576 break;
dc355b76
PM
2577 default:
2578 abort();
158142c2
FB
2579 }
2580 roundBits = zSig & 0x3FF;
bb98fe42 2581 if ( 0x7FD <= (uint16_t) zExp ) {
158142c2
FB
2582 if ( ( 0x7FD < zExp )
2583 || ( ( zExp == 0x7FD )
bb98fe42 2584 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
158142c2 2585 ) {
9ee6f678
BR
2586 bool overflow_to_inf = roundingMode != float_round_to_odd &&
2587 roundIncrement != 0;
ff32e16e 2588 float_raise(float_flag_overflow | float_flag_inexact, status);
9ee6f678 2589 return packFloat64(zSign, 0x7FF, -(!overflow_to_inf));
158142c2
FB
2590 }
2591 if ( zExp < 0 ) {
a2f2d288 2592 if (status->flush_to_zero) {
ff32e16e 2593 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2594 return packFloat64(zSign, 0, 0);
2595 }
158142c2 2596 isTiny =
a2f2d288
PM
2597 (status->float_detect_tininess
2598 == float_tininess_before_rounding)
158142c2
FB
2599 || ( zExp < -1 )
2600 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
2601 shift64RightJamming( zSig, - zExp, &zSig );
2602 zExp = 0;
2603 roundBits = zSig & 0x3FF;
ff32e16e
PM
2604 if (isTiny && roundBits) {
2605 float_raise(float_flag_underflow, status);
2606 }
9ee6f678
BR
2607 if (roundingMode == float_round_to_odd) {
2608 /*
2609 * For round-to-odd case, the roundIncrement depends on
2610 * zSig which just changed.
2611 */
2612 roundIncrement = (zSig & 0x400) ? 0 : 0x3ff;
2613 }
158142c2
FB
2614 }
2615 }
a2f2d288
PM
2616 if (roundBits) {
2617 status->float_exception_flags |= float_flag_inexact;
2618 }
158142c2
FB
2619 zSig = ( zSig + roundIncrement )>>10;
2620 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
2621 if ( zSig == 0 ) zExp = 0;
2622 return packFloat64( zSign, zExp, zSig );
2623
2624}
2625
2626/*----------------------------------------------------------------------------
2627| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2628| and significand `zSig', and returns the proper double-precision floating-
2629| point value corresponding to the abstract input. This routine is just like
2630| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
2631| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
2632| floating-point exponent.
2633*----------------------------------------------------------------------------*/
2634
2635static float64
0c48262d 2636 normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
e5a41ffa 2637 float_status *status)
158142c2 2638{
8f506c70 2639 int8_t shiftCount;
158142c2
FB
2640
2641 shiftCount = countLeadingZeros64( zSig ) - 1;
ff32e16e
PM
2642 return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
2643 status);
158142c2
FB
2644
2645}
2646
158142c2
FB
2647/*----------------------------------------------------------------------------
2648| Normalizes the subnormal extended double-precision floating-point value
2649| represented by the denormalized significand `aSig'. The normalized exponent
2650| and significand are stored at the locations pointed to by `zExpPtr' and
2651| `zSigPtr', respectively.
2652*----------------------------------------------------------------------------*/
2653
88857aca
LV
2654void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
2655 uint64_t *zSigPtr)
158142c2 2656{
8f506c70 2657 int8_t shiftCount;
158142c2
FB
2658
2659 shiftCount = countLeadingZeros64( aSig );
2660 *zSigPtr = aSig<<shiftCount;
2661 *zExpPtr = 1 - shiftCount;
158142c2
FB
2662}
2663
2664/*----------------------------------------------------------------------------
2665| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
2666| and extended significand formed by the concatenation of `zSig0' and `zSig1',
2667| and returns the proper extended double-precision floating-point value
2668| corresponding to the abstract input. Ordinarily, the abstract value is
2669| rounded and packed into the extended double-precision format, with the
2670| inexact exception raised if the abstract input cannot be represented
2671| exactly. However, if the abstract value is too large, the overflow and
2672| inexact exceptions are raised and an infinity or maximal finite value is
2673| returned. If the abstract value is too small, the input value is rounded to
2674| a subnormal number, and the underflow and inexact exceptions are raised if
2675| the abstract input cannot be represented exactly as a subnormal extended
2676| double-precision floating-point number.
2677| If `roundingPrecision' is 32 or 64, the result is rounded to the same
2678| number of bits as single or double precision, respectively. Otherwise, the
2679| result is rounded to the full precision of the extended double-precision
2680| format.
2681| The input significand must be normalized or smaller. If the input
2682| significand is not normalized, `zExp' must be 0; in that case, the result
2683| returned is a subnormal number, and it must not require rounding. The
2684| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
2685| Floating-Point Arithmetic.
2686*----------------------------------------------------------------------------*/
2687
88857aca
LV
2688floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
2689 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
2690 float_status *status)
158142c2 2691{
8f506c70 2692 int8_t roundingMode;
158142c2 2693 flag roundNearestEven, increment, isTiny;
f42c2224 2694 int64_t roundIncrement, roundMask, roundBits;
158142c2 2695
a2f2d288 2696 roundingMode = status->float_rounding_mode;
158142c2
FB
2697 roundNearestEven = ( roundingMode == float_round_nearest_even );
2698 if ( roundingPrecision == 80 ) goto precision80;
2699 if ( roundingPrecision == 64 ) {
2700 roundIncrement = LIT64( 0x0000000000000400 );
2701 roundMask = LIT64( 0x00000000000007FF );
2702 }
2703 else if ( roundingPrecision == 32 ) {
2704 roundIncrement = LIT64( 0x0000008000000000 );
2705 roundMask = LIT64( 0x000000FFFFFFFFFF );
2706 }
2707 else {
2708 goto precision80;
2709 }
2710 zSig0 |= ( zSig1 != 0 );
dc355b76
PM
2711 switch (roundingMode) {
2712 case float_round_nearest_even:
f9288a76 2713 case float_round_ties_away:
dc355b76
PM
2714 break;
2715 case float_round_to_zero:
2716 roundIncrement = 0;
2717 break;
2718 case float_round_up:
2719 roundIncrement = zSign ? 0 : roundMask;
2720 break;
2721 case float_round_down:
2722 roundIncrement = zSign ? roundMask : 0;
2723 break;
2724 default:
2725 abort();
158142c2
FB
2726 }
2727 roundBits = zSig0 & roundMask;
bb98fe42 2728 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
2729 if ( ( 0x7FFE < zExp )
2730 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
2731 ) {
2732 goto overflow;
2733 }
2734 if ( zExp <= 0 ) {
a2f2d288 2735 if (status->flush_to_zero) {
ff32e16e 2736 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
2737 return packFloatx80(zSign, 0, 0);
2738 }
158142c2 2739 isTiny =
a2f2d288
PM
2740 (status->float_detect_tininess
2741 == float_tininess_before_rounding)
158142c2
FB
2742 || ( zExp < 0 )
2743 || ( zSig0 <= zSig0 + roundIncrement );
2744 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
2745 zExp = 0;
2746 roundBits = zSig0 & roundMask;
ff32e16e
PM
2747 if (isTiny && roundBits) {
2748 float_raise(float_flag_underflow, status);
2749 }
a2f2d288
PM
2750 if (roundBits) {
2751 status->float_exception_flags |= float_flag_inexact;
2752 }
158142c2 2753 zSig0 += roundIncrement;
bb98fe42 2754 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
2755 roundIncrement = roundMask + 1;
2756 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2757 roundMask |= roundIncrement;
2758 }
2759 zSig0 &= ~ roundMask;
2760 return packFloatx80( zSign, zExp, zSig0 );
2761 }
2762 }
a2f2d288
PM
2763 if (roundBits) {
2764 status->float_exception_flags |= float_flag_inexact;
2765 }
158142c2
FB
2766 zSig0 += roundIncrement;
2767 if ( zSig0 < roundIncrement ) {
2768 ++zExp;
2769 zSig0 = LIT64( 0x8000000000000000 );
2770 }
2771 roundIncrement = roundMask + 1;
2772 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
2773 roundMask |= roundIncrement;
2774 }
2775 zSig0 &= ~ roundMask;
2776 if ( zSig0 == 0 ) zExp = 0;
2777 return packFloatx80( zSign, zExp, zSig0 );
2778 precision80:
dc355b76
PM
2779 switch (roundingMode) {
2780 case float_round_nearest_even:
f9288a76 2781 case float_round_ties_away:
dc355b76
PM
2782 increment = ((int64_t)zSig1 < 0);
2783 break;
2784 case float_round_to_zero:
2785 increment = 0;
2786 break;
2787 case float_round_up:
2788 increment = !zSign && zSig1;
2789 break;
2790 case float_round_down:
2791 increment = zSign && zSig1;
2792 break;
2793 default:
2794 abort();
158142c2 2795 }
bb98fe42 2796 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
158142c2
FB
2797 if ( ( 0x7FFE < zExp )
2798 || ( ( zExp == 0x7FFE )
2799 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
2800 && increment
2801 )
2802 ) {
2803 roundMask = 0;
2804 overflow:
ff32e16e 2805 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
2806 if ( ( roundingMode == float_round_to_zero )
2807 || ( zSign && ( roundingMode == float_round_up ) )
2808 || ( ! zSign && ( roundingMode == float_round_down ) )
2809 ) {
2810 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
2811 }
0f605c88
LV
2812 return packFloatx80(zSign,
2813 floatx80_infinity_high,
2814 floatx80_infinity_low);
158142c2
FB
2815 }
2816 if ( zExp <= 0 ) {
2817 isTiny =
a2f2d288
PM
2818 (status->float_detect_tininess
2819 == float_tininess_before_rounding)
158142c2
FB
2820 || ( zExp < 0 )
2821 || ! increment
2822 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
2823 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
2824 zExp = 0;
ff32e16e
PM
2825 if (isTiny && zSig1) {
2826 float_raise(float_flag_underflow, status);
2827 }
a2f2d288
PM
2828 if (zSig1) {
2829 status->float_exception_flags |= float_flag_inexact;
2830 }
dc355b76
PM
2831 switch (roundingMode) {
2832 case float_round_nearest_even:
f9288a76 2833 case float_round_ties_away:
dc355b76
PM
2834 increment = ((int64_t)zSig1 < 0);
2835 break;
2836 case float_round_to_zero:
2837 increment = 0;
2838 break;
2839 case float_round_up:
2840 increment = !zSign && zSig1;
2841 break;
2842 case float_round_down:
2843 increment = zSign && zSig1;
2844 break;
2845 default:
2846 abort();
158142c2
FB
2847 }
2848 if ( increment ) {
2849 ++zSig0;
2850 zSig0 &=
bb98fe42
AF
2851 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
2852 if ( (int64_t) zSig0 < 0 ) zExp = 1;
158142c2
FB
2853 }
2854 return packFloatx80( zSign, zExp, zSig0 );
2855 }
2856 }
a2f2d288
PM
2857 if (zSig1) {
2858 status->float_exception_flags |= float_flag_inexact;
2859 }
158142c2
FB
2860 if ( increment ) {
2861 ++zSig0;
2862 if ( zSig0 == 0 ) {
2863 ++zExp;
2864 zSig0 = LIT64( 0x8000000000000000 );
2865 }
2866 else {
bb98fe42 2867 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
158142c2
FB
2868 }
2869 }
2870 else {
2871 if ( zSig0 == 0 ) zExp = 0;
2872 }
2873 return packFloatx80( zSign, zExp, zSig0 );
2874
2875}
2876
2877/*----------------------------------------------------------------------------
2878| Takes an abstract floating-point value having sign `zSign', exponent
2879| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
2880| and returns the proper extended double-precision floating-point value
2881| corresponding to the abstract input. This routine is just like
2882| `roundAndPackFloatx80' except that the input significand does not have to be
2883| normalized.
2884*----------------------------------------------------------------------------*/
2885
88857aca
LV
2886floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
2887 flag zSign, int32_t zExp,
2888 uint64_t zSig0, uint64_t zSig1,
2889 float_status *status)
158142c2 2890{
8f506c70 2891 int8_t shiftCount;
158142c2
FB
2892
2893 if ( zSig0 == 0 ) {
2894 zSig0 = zSig1;
2895 zSig1 = 0;
2896 zExp -= 64;
2897 }
2898 shiftCount = countLeadingZeros64( zSig0 );
2899 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
2900 zExp -= shiftCount;
ff32e16e
PM
2901 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
2902 zSig0, zSig1, status);
158142c2
FB
2903
2904}
2905
158142c2
FB
2906/*----------------------------------------------------------------------------
2907| Returns the least-significant 64 fraction bits of the quadruple-precision
2908| floating-point value `a'.
2909*----------------------------------------------------------------------------*/
2910
a49db98d 2911static inline uint64_t extractFloat128Frac1( float128 a )
158142c2
FB
2912{
2913
2914 return a.low;
2915
2916}
2917
2918/*----------------------------------------------------------------------------
2919| Returns the most-significant 48 fraction bits of the quadruple-precision
2920| floating-point value `a'.
2921*----------------------------------------------------------------------------*/
2922
a49db98d 2923static inline uint64_t extractFloat128Frac0( float128 a )
158142c2
FB
2924{
2925
2926 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
2927
2928}
2929
2930/*----------------------------------------------------------------------------
2931| Returns the exponent bits of the quadruple-precision floating-point value
2932| `a'.
2933*----------------------------------------------------------------------------*/
2934
f4014512 2935static inline int32_t extractFloat128Exp( float128 a )
158142c2
FB
2936{
2937
2938 return ( a.high>>48 ) & 0x7FFF;
2939
2940}
2941
2942/*----------------------------------------------------------------------------
2943| Returns the sign bit of the quadruple-precision floating-point value `a'.
2944*----------------------------------------------------------------------------*/
2945
a49db98d 2946static inline flag extractFloat128Sign( float128 a )
158142c2
FB
2947{
2948
2949 return a.high>>63;
2950
2951}
2952
2953/*----------------------------------------------------------------------------
2954| Normalizes the subnormal quadruple-precision floating-point value
2955| represented by the denormalized significand formed by the concatenation of
2956| `aSig0' and `aSig1'. The normalized exponent is stored at the location
2957| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
2958| significand are stored at the location pointed to by `zSig0Ptr', and the
2959| least significant 64 bits of the normalized significand are stored at the
2960| location pointed to by `zSig1Ptr'.
2961*----------------------------------------------------------------------------*/
2962
2963static void
2964 normalizeFloat128Subnormal(
bb98fe42
AF
2965 uint64_t aSig0,
2966 uint64_t aSig1,
f4014512 2967 int32_t *zExpPtr,
bb98fe42
AF
2968 uint64_t *zSig0Ptr,
2969 uint64_t *zSig1Ptr
158142c2
FB
2970 )
2971{
8f506c70 2972 int8_t shiftCount;
158142c2
FB
2973
2974 if ( aSig0 == 0 ) {
2975 shiftCount = countLeadingZeros64( aSig1 ) - 15;
2976 if ( shiftCount < 0 ) {
2977 *zSig0Ptr = aSig1>>( - shiftCount );
2978 *zSig1Ptr = aSig1<<( shiftCount & 63 );
2979 }
2980 else {
2981 *zSig0Ptr = aSig1<<shiftCount;
2982 *zSig1Ptr = 0;
2983 }
2984 *zExpPtr = - shiftCount - 63;
2985 }
2986 else {
2987 shiftCount = countLeadingZeros64( aSig0 ) - 15;
2988 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
2989 *zExpPtr = 1 - shiftCount;
2990 }
2991
2992}
2993
2994/*----------------------------------------------------------------------------
2995| Packs the sign `zSign', the exponent `zExp', and the significand formed
2996| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
2997| floating-point value, returning the result. After being shifted into the
2998| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
2999| added together to form the most significant 32 bits of the result. This
3000| means that any integer portion of `zSig0' will be added into the exponent.
3001| Since a properly normalized significand will have an integer portion equal
3002| to 1, the `zExp' input should be 1 less than the desired result exponent
3003| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
3004| significand.
3005*----------------------------------------------------------------------------*/
3006
a49db98d 3007static inline float128
f4014512 3008 packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
158142c2
FB
3009{
3010 float128 z;
3011
3012 z.low = zSig1;
bb98fe42 3013 z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
158142c2
FB
3014 return z;
3015
3016}
3017
3018/*----------------------------------------------------------------------------
3019| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3020| and extended significand formed by the concatenation of `zSig0', `zSig1',
3021| and `zSig2', and returns the proper quadruple-precision floating-point value
3022| corresponding to the abstract input. Ordinarily, the abstract value is
3023| simply rounded and packed into the quadruple-precision format, with the
3024| inexact exception raised if the abstract input cannot be represented
3025| exactly. However, if the abstract value is too large, the overflow and
3026| inexact exceptions are raised and an infinity or maximal finite value is
3027| returned. If the abstract value is too small, the input value is rounded to
3028| a subnormal number, and the underflow and inexact exceptions are raised if
3029| the abstract input cannot be represented exactly as a subnormal quadruple-
3030| precision floating-point number.
3031| The input significand must be normalized or smaller. If the input
3032| significand is not normalized, `zExp' must be 0; in that case, the result
3033| returned is a subnormal number, and it must not require rounding. In the
3034| usual case that the input significand is normalized, `zExp' must be 1 less
3035| than the ``true'' floating-point exponent. The handling of underflow and
3036| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3037*----------------------------------------------------------------------------*/
3038
f4014512 3039static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3040 uint64_t zSig0, uint64_t zSig1,
3041 uint64_t zSig2, float_status *status)
158142c2 3042{
8f506c70 3043 int8_t roundingMode;
158142c2
FB
3044 flag roundNearestEven, increment, isTiny;
3045
a2f2d288 3046 roundingMode = status->float_rounding_mode;
158142c2 3047 roundNearestEven = ( roundingMode == float_round_nearest_even );
dc355b76
PM
3048 switch (roundingMode) {
3049 case float_round_nearest_even:
f9288a76 3050 case float_round_ties_away:
dc355b76
PM
3051 increment = ((int64_t)zSig2 < 0);
3052 break;
3053 case float_round_to_zero:
3054 increment = 0;
3055 break;
3056 case float_round_up:
3057 increment = !zSign && zSig2;
3058 break;
3059 case float_round_down:
3060 increment = zSign && zSig2;
3061 break;
9ee6f678
BR
3062 case float_round_to_odd:
3063 increment = !(zSig1 & 0x1) && zSig2;
3064 break;
dc355b76
PM
3065 default:
3066 abort();
158142c2 3067 }
bb98fe42 3068 if ( 0x7FFD <= (uint32_t) zExp ) {
158142c2
FB
3069 if ( ( 0x7FFD < zExp )
3070 || ( ( zExp == 0x7FFD )
3071 && eq128(
3072 LIT64( 0x0001FFFFFFFFFFFF ),
3073 LIT64( 0xFFFFFFFFFFFFFFFF ),
3074 zSig0,
3075 zSig1
3076 )
3077 && increment
3078 )
3079 ) {
ff32e16e 3080 float_raise(float_flag_overflow | float_flag_inexact, status);
158142c2
FB
3081 if ( ( roundingMode == float_round_to_zero )
3082 || ( zSign && ( roundingMode == float_round_up ) )
3083 || ( ! zSign && ( roundingMode == float_round_down ) )
9ee6f678 3084 || (roundingMode == float_round_to_odd)
158142c2
FB
3085 ) {
3086 return
3087 packFloat128(
3088 zSign,
3089 0x7FFE,
3090 LIT64( 0x0000FFFFFFFFFFFF ),
3091 LIT64( 0xFFFFFFFFFFFFFFFF )
3092 );
3093 }
3094 return packFloat128( zSign, 0x7FFF, 0, 0 );
3095 }
3096 if ( zExp < 0 ) {
a2f2d288 3097 if (status->flush_to_zero) {
ff32e16e 3098 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
3099 return packFloat128(zSign, 0, 0, 0);
3100 }
158142c2 3101 isTiny =
a2f2d288
PM
3102 (status->float_detect_tininess
3103 == float_tininess_before_rounding)
158142c2
FB
3104 || ( zExp < -1 )
3105 || ! increment
3106 || lt128(
3107 zSig0,
3108 zSig1,
3109 LIT64( 0x0001FFFFFFFFFFFF ),
3110 LIT64( 0xFFFFFFFFFFFFFFFF )
3111 );
3112 shift128ExtraRightJamming(
3113 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
3114 zExp = 0;
ff32e16e
PM
3115 if (isTiny && zSig2) {
3116 float_raise(float_flag_underflow, status);
3117 }
dc355b76
PM
3118 switch (roundingMode) {
3119 case float_round_nearest_even:
f9288a76 3120 case float_round_ties_away:
dc355b76
PM
3121 increment = ((int64_t)zSig2 < 0);
3122 break;
3123 case float_round_to_zero:
3124 increment = 0;
3125 break;
3126 case float_round_up:
3127 increment = !zSign && zSig2;
3128 break;
3129 case float_round_down:
3130 increment = zSign && zSig2;
3131 break;
9ee6f678
BR
3132 case float_round_to_odd:
3133 increment = !(zSig1 & 0x1) && zSig2;
3134 break;
dc355b76
PM
3135 default:
3136 abort();
158142c2
FB
3137 }
3138 }
3139 }
a2f2d288
PM
3140 if (zSig2) {
3141 status->float_exception_flags |= float_flag_inexact;
3142 }
158142c2
FB
3143 if ( increment ) {
3144 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
3145 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
3146 }
3147 else {
3148 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
3149 }
3150 return packFloat128( zSign, zExp, zSig0, zSig1 );
3151
3152}
3153
3154/*----------------------------------------------------------------------------
3155| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3156| and significand formed by the concatenation of `zSig0' and `zSig1', and
3157| returns the proper quadruple-precision floating-point value corresponding
3158| to the abstract input. This routine is just like `roundAndPackFloat128'
3159| except that the input significand has fewer bits and does not have to be
3160| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
3161| point exponent.
3162*----------------------------------------------------------------------------*/
3163
f4014512 3164static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
e5a41ffa
PM
3165 uint64_t zSig0, uint64_t zSig1,
3166 float_status *status)
158142c2 3167{
8f506c70 3168 int8_t shiftCount;
bb98fe42 3169 uint64_t zSig2;
158142c2
FB
3170
3171 if ( zSig0 == 0 ) {
3172 zSig0 = zSig1;
3173 zSig1 = 0;
3174 zExp -= 64;
3175 }
3176 shiftCount = countLeadingZeros64( zSig0 ) - 15;
3177 if ( 0 <= shiftCount ) {
3178 zSig2 = 0;
3179 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3180 }
3181 else {
3182 shift128ExtraRightJamming(
3183 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
3184 }
3185 zExp -= shiftCount;
ff32e16e 3186 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
3187
3188}
3189
158142c2 3190
158142c2
FB
3191/*----------------------------------------------------------------------------
3192| Returns the result of converting the 32-bit two's complement integer `a'
3193| to the extended double-precision floating-point format. The conversion
3194| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3195| Arithmetic.
3196*----------------------------------------------------------------------------*/
3197
e5a41ffa 3198floatx80 int32_to_floatx80(int32_t a, float_status *status)
158142c2
FB
3199{
3200 flag zSign;
3a87d009 3201 uint32_t absA;
8f506c70 3202 int8_t shiftCount;
bb98fe42 3203 uint64_t zSig;
158142c2
FB
3204
3205 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3206 zSign = ( a < 0 );
3207 absA = zSign ? - a : a;
3208 shiftCount = countLeadingZeros32( absA ) + 32;
3209 zSig = absA;
3210 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
3211
3212}
3213
158142c2
FB
3214/*----------------------------------------------------------------------------
3215| Returns the result of converting the 32-bit two's complement integer `a' to
3216| the quadruple-precision floating-point format. The conversion is performed
3217| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3218*----------------------------------------------------------------------------*/
3219
e5a41ffa 3220float128 int32_to_float128(int32_t a, float_status *status)
158142c2
FB
3221{
3222 flag zSign;
3a87d009 3223 uint32_t absA;
8f506c70 3224 int8_t shiftCount;
bb98fe42 3225 uint64_t zSig0;
158142c2
FB
3226
3227 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3228 zSign = ( a < 0 );
3229 absA = zSign ? - a : a;
3230 shiftCount = countLeadingZeros32( absA ) + 17;
3231 zSig0 = absA;
3232 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
3233
3234}
3235
158142c2
FB
3236/*----------------------------------------------------------------------------
3237| Returns the result of converting the 64-bit two's complement integer `a'
3238| to the extended double-precision floating-point format. The conversion
3239| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3240| Arithmetic.
3241*----------------------------------------------------------------------------*/
3242
e5a41ffa 3243floatx80 int64_to_floatx80(int64_t a, float_status *status)
158142c2
FB
3244{
3245 flag zSign;
182f42fd 3246 uint64_t absA;
8f506c70 3247 int8_t shiftCount;
158142c2
FB
3248
3249 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3250 zSign = ( a < 0 );
3251 absA = zSign ? - a : a;
3252 shiftCount = countLeadingZeros64( absA );
3253 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
3254
3255}
3256
158142c2
FB
3257/*----------------------------------------------------------------------------
3258| Returns the result of converting the 64-bit two's complement integer `a' to
3259| the quadruple-precision floating-point format. The conversion is performed
3260| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3261*----------------------------------------------------------------------------*/
3262
e5a41ffa 3263float128 int64_to_float128(int64_t a, float_status *status)
158142c2
FB
3264{
3265 flag zSign;
182f42fd 3266 uint64_t absA;
8f506c70 3267 int8_t shiftCount;
f4014512 3268 int32_t zExp;
bb98fe42 3269 uint64_t zSig0, zSig1;
158142c2
FB
3270
3271 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
3272 zSign = ( a < 0 );
3273 absA = zSign ? - a : a;
3274 shiftCount = countLeadingZeros64( absA ) + 49;
3275 zExp = 0x406E - shiftCount;
3276 if ( 64 <= shiftCount ) {
3277 zSig1 = 0;
3278 zSig0 = absA;
3279 shiftCount -= 64;
3280 }
3281 else {
3282 zSig1 = absA;
3283 zSig0 = 0;
3284 }
3285 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
3286 return packFloat128( zSign, zExp, zSig0, zSig1 );
3287
3288}
3289
6bb8e0f1
PM
3290/*----------------------------------------------------------------------------
3291| Returns the result of converting the 64-bit unsigned integer `a'
3292| to the quadruple-precision floating-point format. The conversion is performed
3293| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3294*----------------------------------------------------------------------------*/
3295
e5a41ffa 3296float128 uint64_to_float128(uint64_t a, float_status *status)
1e397ead
RH
3297{
3298 if (a == 0) {
3299 return float128_zero;
3300 }
6603d506 3301 return normalizeRoundAndPackFloat128(0, 0x406E, 0, a, status);
1e397ead
RH
3302}
3303
158142c2
FB
3304/*----------------------------------------------------------------------------
3305| Returns the result of converting the single-precision floating-point value
3306| `a' to the extended double-precision floating-point format. The conversion
3307| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3308| Arithmetic.
3309*----------------------------------------------------------------------------*/
3310
e5a41ffa 3311floatx80 float32_to_floatx80(float32 a, float_status *status)
158142c2
FB
3312{
3313 flag aSign;
0c48262d 3314 int aExp;
bb98fe42 3315 uint32_t aSig;
158142c2 3316
ff32e16e 3317 a = float32_squash_input_denormal(a, status);
158142c2
FB
3318 aSig = extractFloat32Frac( a );
3319 aExp = extractFloat32Exp( a );
3320 aSign = extractFloat32Sign( a );
3321 if ( aExp == 0xFF ) {
ff32e16e
PM
3322 if (aSig) {
3323 return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
3324 }
0f605c88
LV
3325 return packFloatx80(aSign,
3326 floatx80_infinity_high,
3327 floatx80_infinity_low);
158142c2
FB
3328 }
3329 if ( aExp == 0 ) {
3330 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3331 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3332 }
3333 aSig |= 0x00800000;
bb98fe42 3334 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
158142c2
FB
3335
3336}
3337
158142c2
FB
3338/*----------------------------------------------------------------------------
3339| Returns the result of converting the single-precision floating-point value
3340| `a' to the double-precision floating-point format. The conversion is
3341| performed according to the IEC/IEEE Standard for Binary Floating-Point
3342| Arithmetic.
3343*----------------------------------------------------------------------------*/
3344
e5a41ffa 3345float128 float32_to_float128(float32 a, float_status *status)
158142c2
FB
3346{
3347 flag aSign;
0c48262d 3348 int aExp;
bb98fe42 3349 uint32_t aSig;
158142c2 3350
ff32e16e 3351 a = float32_squash_input_denormal(a, status);
158142c2
FB
3352 aSig = extractFloat32Frac( a );
3353 aExp = extractFloat32Exp( a );
3354 aSign = extractFloat32Sign( a );
3355 if ( aExp == 0xFF ) {
ff32e16e
PM
3356 if (aSig) {
3357 return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
3358 }
158142c2
FB
3359 return packFloat128( aSign, 0x7FFF, 0, 0 );
3360 }
3361 if ( aExp == 0 ) {
3362 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3363 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3364 --aExp;
3365 }
bb98fe42 3366 return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
158142c2
FB
3367
3368}
3369
158142c2
FB
3370/*----------------------------------------------------------------------------
3371| Returns the remainder of the single-precision floating-point value `a'
3372| with respect to the corresponding value `b'. The operation is performed
3373| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3374*----------------------------------------------------------------------------*/
3375
e5a41ffa 3376float32 float32_rem(float32 a, float32 b, float_status *status)
158142c2 3377{
ed086f3d 3378 flag aSign, zSign;
0c48262d 3379 int aExp, bExp, expDiff;
bb98fe42
AF
3380 uint32_t aSig, bSig;
3381 uint32_t q;
3382 uint64_t aSig64, bSig64, q64;
3383 uint32_t alternateASig;
3384 int32_t sigMean;
ff32e16e
PM
3385 a = float32_squash_input_denormal(a, status);
3386 b = float32_squash_input_denormal(b, status);
158142c2
FB
3387
3388 aSig = extractFloat32Frac( a );
3389 aExp = extractFloat32Exp( a );
3390 aSign = extractFloat32Sign( a );
3391 bSig = extractFloat32Frac( b );
3392 bExp = extractFloat32Exp( b );
158142c2
FB
3393 if ( aExp == 0xFF ) {
3394 if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
ff32e16e 3395 return propagateFloat32NaN(a, b, status);
158142c2 3396 }
ff32e16e 3397 float_raise(float_flag_invalid, status);
af39bc8c 3398 return float32_default_nan(status);
158142c2
FB
3399 }
3400 if ( bExp == 0xFF ) {
ff32e16e
PM
3401 if (bSig) {
3402 return propagateFloat32NaN(a, b, status);
3403 }
158142c2
FB
3404 return a;
3405 }
3406 if ( bExp == 0 ) {
3407 if ( bSig == 0 ) {
ff32e16e 3408 float_raise(float_flag_invalid, status);
af39bc8c 3409 return float32_default_nan(status);
158142c2
FB
3410 }
3411 normalizeFloat32Subnormal( bSig, &bExp, &bSig );
3412 }
3413 if ( aExp == 0 ) {
3414 if ( aSig == 0 ) return a;
3415 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3416 }
3417 expDiff = aExp - bExp;
3418 aSig |= 0x00800000;
3419 bSig |= 0x00800000;
3420 if ( expDiff < 32 ) {
3421 aSig <<= 8;
3422 bSig <<= 8;
3423 if ( expDiff < 0 ) {
3424 if ( expDiff < -1 ) return a;
3425 aSig >>= 1;
3426 }
3427 q = ( bSig <= aSig );
3428 if ( q ) aSig -= bSig;
3429 if ( 0 < expDiff ) {
bb98fe42 3430 q = ( ( (uint64_t) aSig )<<32 ) / bSig;
158142c2
FB
3431 q >>= 32 - expDiff;
3432 bSig >>= 2;
3433 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3434 }
3435 else {
3436 aSig >>= 2;
3437 bSig >>= 2;
3438 }
3439 }
3440 else {
3441 if ( bSig <= aSig ) aSig -= bSig;
bb98fe42
AF
3442 aSig64 = ( (uint64_t) aSig )<<40;
3443 bSig64 = ( (uint64_t) bSig )<<40;
158142c2
FB
3444 expDiff -= 64;
3445 while ( 0 < expDiff ) {
3446 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3447 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3448 aSig64 = - ( ( bSig * q64 )<<38 );
3449 expDiff -= 62;
3450 }
3451 expDiff += 64;
3452 q64 = estimateDiv128To64( aSig64, 0, bSig64 );
3453 q64 = ( 2 < q64 ) ? q64 - 2 : 0;
3454 q = q64>>( 64 - expDiff );
3455 bSig <<= 6;
3456 aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
3457 }
3458 do {
3459 alternateASig = aSig;
3460 ++q;
3461 aSig -= bSig;
bb98fe42 3462 } while ( 0 <= (int32_t) aSig );
158142c2
FB
3463 sigMean = aSig + alternateASig;
3464 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3465 aSig = alternateASig;
3466 }
bb98fe42 3467 zSign = ( (int32_t) aSig < 0 );
158142c2 3468 if ( zSign ) aSig = - aSig;
ff32e16e 3469 return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3470}
3471
369be8f6 3472
158142c2 3473
8229c991
AJ
3474/*----------------------------------------------------------------------------
3475| Returns the binary exponential of the single-precision floating-point value
3476| `a'. The operation is performed according to the IEC/IEEE Standard for
3477| Binary Floating-Point Arithmetic.
3478|
3479| Uses the following identities:
3480|
3481| 1. -------------------------------------------------------------------------
3482| x x*ln(2)
3483| 2 = e
3484|
3485| 2. -------------------------------------------------------------------------
3486| 2 3 4 5 n
3487| x x x x x x x
3488| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
3489| 1! 2! 3! 4! 5! n!
3490*----------------------------------------------------------------------------*/
3491
3492static const float64 float32_exp2_coefficients[15] =
3493{
d5138cf4
PM
3494 const_float64( 0x3ff0000000000000ll ), /* 1 */
3495 const_float64( 0x3fe0000000000000ll ), /* 2 */
3496 const_float64( 0x3fc5555555555555ll ), /* 3 */
3497 const_float64( 0x3fa5555555555555ll ), /* 4 */
3498 const_float64( 0x3f81111111111111ll ), /* 5 */
3499 const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
3500 const_float64( 0x3f2a01a01a01a01all ), /* 7 */
3501 const_float64( 0x3efa01a01a01a01all ), /* 8 */
3502 const_float64( 0x3ec71de3a556c734ll ), /* 9 */
3503 const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
3504 const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
3505 const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
3506 const_float64( 0x3de6124613a86d09ll ), /* 13 */
3507 const_float64( 0x3da93974a8c07c9dll ), /* 14 */
3508 const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
8229c991
AJ
3509};
3510
e5a41ffa 3511float32 float32_exp2(float32 a, float_status *status)
8229c991
AJ
3512{
3513 flag aSign;
0c48262d 3514 int aExp;
bb98fe42 3515 uint32_t aSig;
8229c991
AJ
3516 float64 r, x, xn;
3517 int i;
ff32e16e 3518 a = float32_squash_input_denormal(a, status);
8229c991
AJ
3519
3520 aSig = extractFloat32Frac( a );
3521 aExp = extractFloat32Exp( a );
3522 aSign = extractFloat32Sign( a );
3523
3524 if ( aExp == 0xFF) {
ff32e16e
PM
3525 if (aSig) {
3526 return propagateFloat32NaN(a, float32_zero, status);
3527 }
8229c991
AJ
3528 return (aSign) ? float32_zero : a;
3529 }
3530 if (aExp == 0) {
3531 if (aSig == 0) return float32_one;
3532 }
3533
ff32e16e 3534 float_raise(float_flag_inexact, status);
8229c991
AJ
3535
3536 /* ******************************* */
3537 /* using float64 for approximation */
3538 /* ******************************* */
ff32e16e
PM
3539 x = float32_to_float64(a, status);
3540 x = float64_mul(x, float64_ln2, status);
8229c991
AJ
3541
3542 xn = x;
3543 r = float64_one;
3544 for (i = 0 ; i < 15 ; i++) {
3545 float64 f;
3546
ff32e16e
PM
3547 f = float64_mul(xn, float32_exp2_coefficients[i], status);
3548 r = float64_add(r, f, status);
8229c991 3549
ff32e16e 3550 xn = float64_mul(xn, x, status);
8229c991
AJ
3551 }
3552
3553 return float64_to_float32(r, status);
3554}
3555
374dfc33
AJ
3556/*----------------------------------------------------------------------------
3557| Returns the binary log of the single-precision floating-point value `a'.
3558| The operation is performed according to the IEC/IEEE Standard for Binary
3559| Floating-Point Arithmetic.
3560*----------------------------------------------------------------------------*/
e5a41ffa 3561float32 float32_log2(float32 a, float_status *status)
374dfc33
AJ
3562{
3563 flag aSign, zSign;
0c48262d 3564 int aExp;
bb98fe42 3565 uint32_t aSig, zSig, i;
374dfc33 3566
ff32e16e 3567 a = float32_squash_input_denormal(a, status);
374dfc33
AJ
3568 aSig = extractFloat32Frac( a );
3569 aExp = extractFloat32Exp( a );
3570 aSign = extractFloat32Sign( a );
3571
3572 if ( aExp == 0 ) {
3573 if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
3574 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3575 }
3576 if ( aSign ) {
ff32e16e 3577 float_raise(float_flag_invalid, status);
af39bc8c 3578 return float32_default_nan(status);
374dfc33
AJ
3579 }
3580 if ( aExp == 0xFF ) {
ff32e16e
PM
3581 if (aSig) {
3582 return propagateFloat32NaN(a, float32_zero, status);
3583 }
374dfc33
AJ
3584 return a;
3585 }
3586
3587 aExp -= 0x7F;
3588 aSig |= 0x00800000;
3589 zSign = aExp < 0;
3590 zSig = aExp << 23;
3591
3592 for (i = 1 << 22; i > 0; i >>= 1) {
bb98fe42 3593 aSig = ( (uint64_t)aSig * aSig ) >> 23;
374dfc33
AJ
3594 if ( aSig & 0x01000000 ) {
3595 aSig >>= 1;
3596 zSig |= i;
3597 }
3598 }
3599
3600 if ( zSign )
3601 zSig = -zSig;
3602
ff32e16e 3603 return normalizeRoundAndPackFloat32(zSign, 0x85, zSig, status);
374dfc33
AJ
3604}
3605
158142c2
FB
3606/*----------------------------------------------------------------------------
3607| Returns 1 if the single-precision floating-point value `a' is equal to
b689362d
AJ
3608| the corresponding value `b', and 0 otherwise. The invalid exception is
3609| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
3610| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3611*----------------------------------------------------------------------------*/
3612
e5a41ffa 3613int float32_eq(float32 a, float32 b, float_status *status)
158142c2 3614{
b689362d 3615 uint32_t av, bv;
ff32e16e
PM
3616 a = float32_squash_input_denormal(a, status);
3617 b = float32_squash_input_denormal(b, status);
158142c2
FB
3618
3619 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3620 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3621 ) {
ff32e16e 3622 float_raise(float_flag_invalid, status);
158142c2
FB
3623 return 0;
3624 }
b689362d
AJ
3625 av = float32_val(a);
3626 bv = float32_val(b);
3627 return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
3628}
3629
3630/*----------------------------------------------------------------------------
3631| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3632| or equal to the corresponding value `b', and 0 otherwise. The invalid
3633| exception is raised if either operand is a NaN. The comparison is performed
3634| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3635*----------------------------------------------------------------------------*/
3636
e5a41ffa 3637int float32_le(float32 a, float32 b, float_status *status)
158142c2
FB
3638{
3639 flag aSign, bSign;
bb98fe42 3640 uint32_t av, bv;
ff32e16e
PM
3641 a = float32_squash_input_denormal(a, status);
3642 b = float32_squash_input_denormal(b, status);
158142c2
FB
3643
3644 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3645 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3646 ) {
ff32e16e 3647 float_raise(float_flag_invalid, status);
158142c2
FB
3648 return 0;
3649 }
3650 aSign = extractFloat32Sign( a );
3651 bSign = extractFloat32Sign( b );
f090c9d4
PB
3652 av = float32_val(a);
3653 bv = float32_val(b);
bb98fe42 3654 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3655 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3656
3657}
3658
3659/*----------------------------------------------------------------------------
3660| Returns 1 if the single-precision floating-point value `a' is less than
f5a64251
AJ
3661| the corresponding value `b', and 0 otherwise. The invalid exception is
3662| raised if either operand is a NaN. The comparison is performed according
3663| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3664*----------------------------------------------------------------------------*/
3665
e5a41ffa 3666int float32_lt(float32 a, float32 b, float_status *status)
158142c2
FB
3667{
3668 flag aSign, bSign;
bb98fe42 3669 uint32_t av, bv;
ff32e16e
PM
3670 a = float32_squash_input_denormal(a, status);
3671 b = float32_squash_input_denormal(b, status);
158142c2
FB
3672
3673 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3674 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3675 ) {
ff32e16e 3676 float_raise(float_flag_invalid, status);
158142c2
FB
3677 return 0;
3678 }
3679 aSign = extractFloat32Sign( a );
3680 bSign = extractFloat32Sign( b );
f090c9d4
PB
3681 av = float32_val(a);
3682 bv = float32_val(b);
bb98fe42 3683 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 3684 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
3685
3686}
3687
67b7861d
AJ
3688/*----------------------------------------------------------------------------
3689| Returns 1 if the single-precision floating-point values `a' and `b' cannot
f5a64251
AJ
3690| be compared, and 0 otherwise. The invalid exception is raised if either
3691| operand is a NaN. The comparison is performed according to the IEC/IEEE
3692| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
3693*----------------------------------------------------------------------------*/
3694
e5a41ffa 3695int float32_unordered(float32 a, float32 b, float_status *status)
67b7861d 3696{
ff32e16e
PM
3697 a = float32_squash_input_denormal(a, status);
3698 b = float32_squash_input_denormal(b, status);
67b7861d
AJ
3699
3700 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3701 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3702 ) {
ff32e16e 3703 float_raise(float_flag_invalid, status);
67b7861d
AJ
3704 return 1;
3705 }
3706 return 0;
3707}
b689362d 3708
158142c2
FB
3709/*----------------------------------------------------------------------------
3710| Returns 1 if the single-precision floating-point value `a' is equal to
f5a64251
AJ
3711| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3712| exception. The comparison is performed according to the IEC/IEEE Standard
3713| for Binary Floating-Point Arithmetic.
158142c2
FB
3714*----------------------------------------------------------------------------*/
3715
e5a41ffa 3716int float32_eq_quiet(float32 a, float32 b, float_status *status)
158142c2 3717{
ff32e16e
PM
3718 a = float32_squash_input_denormal(a, status);
3719 b = float32_squash_input_denormal(b, status);
158142c2
FB
3720
3721 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3722 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3723 ) {
af39bc8c
AM
3724 if (float32_is_signaling_nan(a, status)
3725 || float32_is_signaling_nan(b, status)) {
ff32e16e 3726 float_raise(float_flag_invalid, status);
b689362d 3727 }
158142c2
FB
3728 return 0;
3729 }
b689362d
AJ
3730 return ( float32_val(a) == float32_val(b) ) ||
3731 ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
158142c2
FB
3732}
3733
3734/*----------------------------------------------------------------------------
3735| Returns 1 if the single-precision floating-point value `a' is less than or
3736| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
3737| cause an exception. Otherwise, the comparison is performed according to the
3738| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3739*----------------------------------------------------------------------------*/
3740
e5a41ffa 3741int float32_le_quiet(float32 a, float32 b, float_status *status)
158142c2
FB
3742{
3743 flag aSign, bSign;
bb98fe42 3744 uint32_t av, bv;
ff32e16e
PM
3745 a = float32_squash_input_denormal(a, status);
3746 b = float32_squash_input_denormal(b, status);
158142c2
FB
3747
3748 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3749 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3750 ) {
af39bc8c
AM
3751 if (float32_is_signaling_nan(a, status)
3752 || float32_is_signaling_nan(b, status)) {
ff32e16e 3753 float_raise(float_flag_invalid, status);
158142c2
FB
3754 }
3755 return 0;
3756 }
3757 aSign = extractFloat32Sign( a );
3758 bSign = extractFloat32Sign( b );
f090c9d4
PB
3759 av = float32_val(a);
3760 bv = float32_val(b);
bb98fe42 3761 if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 3762 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
3763
3764}
3765
3766/*----------------------------------------------------------------------------
3767| Returns 1 if the single-precision floating-point value `a' is less than
3768| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3769| exception. Otherwise, the comparison is performed according to the IEC/IEEE
ab52f973 3770| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
3771*----------------------------------------------------------------------------*/
3772
ab52f973 3773int float32_lt_quiet(float32 a, float32 b, float_status *status)
158142c2 3774{
ab52f973
AB
3775 flag aSign, bSign;
3776 uint32_t av, bv;
3777 a = float32_squash_input_denormal(a, status);
3778 b = float32_squash_input_denormal(b, status);
158142c2 3779
ab52f973
AB
3780 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3781 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3782 ) {
3783 if (float32_is_signaling_nan(a, status)
3784 || float32_is_signaling_nan(b, status)) {
ff32e16e 3785 float_raise(float_flag_invalid, status);
158142c2 3786 }
ab52f973 3787 return 0;
158142c2 3788 }
ab52f973
AB
3789 aSign = extractFloat32Sign( a );
3790 bSign = extractFloat32Sign( b );
3791 av = float32_val(a);
3792 bv = float32_val(b);
3793 if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
3794 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
3795
3796}
3797
3798/*----------------------------------------------------------------------------
ab52f973
AB
3799| Returns 1 if the single-precision floating-point values `a' and `b' cannot
3800| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
3801| comparison is performed according to the IEC/IEEE Standard for Binary
3802| Floating-Point Arithmetic.
158142c2
FB
3803*----------------------------------------------------------------------------*/
3804
ab52f973 3805int float32_unordered_quiet(float32 a, float32 b, float_status *status)
158142c2 3806{
ab52f973
AB
3807 a = float32_squash_input_denormal(a, status);
3808 b = float32_squash_input_denormal(b, status);
158142c2 3809
ab52f973
AB
3810 if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
3811 || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
3812 ) {
3813 if (float32_is_signaling_nan(a, status)
3814 || float32_is_signaling_nan(b, status)) {
3815 float_raise(float_flag_invalid, status);
158142c2 3816 }
ab52f973 3817 return 1;
158142c2 3818 }
ab52f973 3819 return 0;
158142c2
FB
3820}
3821
210cbd49
AB
3822/*----------------------------------------------------------------------------
3823| If `a' is denormal and we are in flush-to-zero mode then set the
3824| input-denormal exception and return zero. Otherwise just return the value.
3825*----------------------------------------------------------------------------*/
3826float16 float16_squash_input_denormal(float16 a, float_status *status)
3827{
3828 if (status->flush_inputs_to_zero) {
3829 if (extractFloat16Exp(a) == 0 && extractFloat16Frac(a) != 0) {
3830 float_raise(float_flag_input_denormal, status);
3831 return make_float16(float16_val(a) & 0x8000);
3832 }
3833 }
3834 return a;
3835}
3836
158142c2
FB
3837/*----------------------------------------------------------------------------
3838| Returns the result of converting the double-precision floating-point value
3839| `a' to the extended double-precision floating-point format. The conversion
3840| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3841| Arithmetic.
3842*----------------------------------------------------------------------------*/
3843
e5a41ffa 3844floatx80 float64_to_floatx80(float64 a, float_status *status)
158142c2
FB
3845{
3846 flag aSign;
0c48262d 3847 int aExp;
bb98fe42 3848 uint64_t aSig;
158142c2 3849
ff32e16e 3850 a = float64_squash_input_denormal(a, status);
158142c2
FB
3851 aSig = extractFloat64Frac( a );
3852 aExp = extractFloat64Exp( a );
3853 aSign = extractFloat64Sign( a );
3854 if ( aExp == 0x7FF ) {
ff32e16e
PM
3855 if (aSig) {
3856 return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
3857 }
0f605c88
LV
3858 return packFloatx80(aSign,
3859 floatx80_infinity_high,
3860 floatx80_infinity_low);
158142c2
FB
3861 }
3862 if ( aExp == 0 ) {
3863 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3864 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3865 }
3866 return
3867 packFloatx80(
3868 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3869
3870}
3871
158142c2
FB
3872/*----------------------------------------------------------------------------
3873| Returns the result of converting the double-precision floating-point value
3874| `a' to the quadruple-precision floating-point format. The conversion is
3875| performed according to the IEC/IEEE Standard for Binary Floating-Point
3876| Arithmetic.
3877*----------------------------------------------------------------------------*/
3878
e5a41ffa 3879float128 float64_to_float128(float64 a, float_status *status)
158142c2
FB
3880{
3881 flag aSign;
0c48262d 3882 int aExp;
bb98fe42 3883 uint64_t aSig, zSig0, zSig1;
158142c2 3884
ff32e16e 3885 a = float64_squash_input_denormal(a, status);
158142c2
FB
3886 aSig = extractFloat64Frac( a );
3887 aExp = extractFloat64Exp( a );
3888 aSign = extractFloat64Sign( a );
3889 if ( aExp == 0x7FF ) {
ff32e16e
PM
3890 if (aSig) {
3891 return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
3892 }
158142c2
FB
3893 return packFloat128( aSign, 0x7FFF, 0, 0 );
3894 }
3895 if ( aExp == 0 ) {
3896 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
3897 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3898 --aExp;
3899 }
3900 shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
3901 return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
3902
3903}
3904
158142c2
FB
3905
3906/*----------------------------------------------------------------------------
3907| Returns the remainder of the double-precision floating-point value `a'
3908| with respect to the corresponding value `b'. The operation is performed
3909| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3910*----------------------------------------------------------------------------*/
3911
e5a41ffa 3912float64 float64_rem(float64 a, float64 b, float_status *status)
158142c2 3913{
ed086f3d 3914 flag aSign, zSign;
0c48262d 3915 int aExp, bExp, expDiff;
bb98fe42
AF
3916 uint64_t aSig, bSig;
3917 uint64_t q, alternateASig;
3918 int64_t sigMean;
158142c2 3919
ff32e16e
PM
3920 a = float64_squash_input_denormal(a, status);
3921 b = float64_squash_input_denormal(b, status);
158142c2
FB
3922 aSig = extractFloat64Frac( a );
3923 aExp = extractFloat64Exp( a );
3924 aSign = extractFloat64Sign( a );
3925 bSig = extractFloat64Frac( b );
3926 bExp = extractFloat64Exp( b );
158142c2
FB
3927 if ( aExp == 0x7FF ) {
3928 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
ff32e16e 3929 return propagateFloat64NaN(a, b, status);
158142c2 3930 }
ff32e16e 3931 float_raise(float_flag_invalid, status);
af39bc8c 3932 return float64_default_nan(status);
158142c2
FB
3933 }
3934 if ( bExp == 0x7FF ) {
ff32e16e
PM
3935 if (bSig) {
3936 return propagateFloat64NaN(a, b, status);
3937 }
158142c2
FB
3938 return a;
3939 }
3940 if ( bExp == 0 ) {
3941 if ( bSig == 0 ) {
ff32e16e 3942 float_raise(float_flag_invalid, status);
af39bc8c 3943 return float64_default_nan(status);
158142c2
FB
3944 }
3945 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3946 }
3947 if ( aExp == 0 ) {
3948 if ( aSig == 0 ) return a;
3949 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3950 }
3951 expDiff = aExp - bExp;
3952 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3953 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3954 if ( expDiff < 0 ) {
3955 if ( expDiff < -1 ) return a;
3956 aSig >>= 1;
3957 }
3958 q = ( bSig <= aSig );
3959 if ( q ) aSig -= bSig;
3960 expDiff -= 64;
3961 while ( 0 < expDiff ) {
3962 q = estimateDiv128To64( aSig, 0, bSig );
3963 q = ( 2 < q ) ? q - 2 : 0;
3964 aSig = - ( ( bSig>>2 ) * q );
3965 expDiff -= 62;
3966 }
3967 expDiff += 64;
3968 if ( 0 < expDiff ) {
3969 q = estimateDiv128To64( aSig, 0, bSig );
3970 q = ( 2 < q ) ? q - 2 : 0;
3971 q >>= 64 - expDiff;
3972 bSig >>= 2;
3973 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3974 }
3975 else {
3976 aSig >>= 2;
3977 bSig >>= 2;
3978 }
3979 do {
3980 alternateASig = aSig;
3981 ++q;
3982 aSig -= bSig;
bb98fe42 3983 } while ( 0 <= (int64_t) aSig );
158142c2
FB
3984 sigMean = aSig + alternateASig;
3985 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3986 aSig = alternateASig;
3987 }
bb98fe42 3988 zSign = ( (int64_t) aSig < 0 );
158142c2 3989 if ( zSign ) aSig = - aSig;
ff32e16e 3990 return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
158142c2
FB
3991
3992}
3993
374dfc33
AJ
3994/*----------------------------------------------------------------------------
3995| Returns the binary log of the double-precision floating-point value `a'.
3996| The operation is performed according to the IEC/IEEE Standard for Binary
3997| Floating-Point Arithmetic.
3998*----------------------------------------------------------------------------*/
e5a41ffa 3999float64 float64_log2(float64 a, float_status *status)
374dfc33
AJ
4000{
4001 flag aSign, zSign;
0c48262d 4002 int aExp;
bb98fe42 4003 uint64_t aSig, aSig0, aSig1, zSig, i;
ff32e16e 4004 a = float64_squash_input_denormal(a, status);
374dfc33
AJ
4005
4006 aSig = extractFloat64Frac( a );
4007 aExp = extractFloat64Exp( a );
4008 aSign = extractFloat64Sign( a );
4009
4010 if ( aExp == 0 ) {
4011 if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
4012 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
4013 }
4014 if ( aSign ) {
ff32e16e 4015 float_raise(float_flag_invalid, status);
af39bc8c 4016 return float64_default_nan(status);
374dfc33
AJ
4017 }
4018 if ( aExp == 0x7FF ) {
ff32e16e
PM
4019 if (aSig) {
4020 return propagateFloat64NaN(a, float64_zero, status);
4021 }
374dfc33
AJ
4022 return a;
4023 }
4024
4025 aExp -= 0x3FF;
4026 aSig |= LIT64( 0x0010000000000000 );
4027 zSign = aExp < 0;
bb98fe42 4028 zSig = (uint64_t)aExp << 52;
374dfc33
AJ
4029 for (i = 1LL << 51; i > 0; i >>= 1) {
4030 mul64To128( aSig, aSig, &aSig0, &aSig1 );
4031 aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
4032 if ( aSig & LIT64( 0x0020000000000000 ) ) {
4033 aSig >>= 1;
4034 zSig |= i;
4035 }
4036 }
4037
4038 if ( zSign )
4039 zSig = -zSig;
ff32e16e 4040 return normalizeRoundAndPackFloat64(zSign, 0x408, zSig, status);
374dfc33
AJ
4041}
4042
158142c2
FB
4043/*----------------------------------------------------------------------------
4044| Returns 1 if the double-precision floating-point value `a' is equal to the
b689362d
AJ
4045| corresponding value `b', and 0 otherwise. The invalid exception is raised
4046| if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
4047| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4048*----------------------------------------------------------------------------*/
4049
e5a41ffa 4050int float64_eq(float64 a, float64 b, float_status *status)
158142c2 4051{
bb98fe42 4052 uint64_t av, bv;
ff32e16e
PM
4053 a = float64_squash_input_denormal(a, status);
4054 b = float64_squash_input_denormal(b, status);
158142c2
FB
4055
4056 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4057 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4058 ) {
ff32e16e 4059 float_raise(float_flag_invalid, status);
158142c2
FB
4060 return 0;
4061 }
f090c9d4 4062 av = float64_val(a);
a1b91bb4 4063 bv = float64_val(b);
bb98fe42 4064 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4065
4066}
4067
4068/*----------------------------------------------------------------------------
4069| Returns 1 if the double-precision floating-point value `a' is less than or
f5a64251
AJ
4070| equal to the corresponding value `b', and 0 otherwise. The invalid
4071| exception is raised if either operand is a NaN. The comparison is performed
4072| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4073*----------------------------------------------------------------------------*/
4074
e5a41ffa 4075int float64_le(float64 a, float64 b, float_status *status)
158142c2
FB
4076{
4077 flag aSign, bSign;
bb98fe42 4078 uint64_t av, bv;
ff32e16e
PM
4079 a = float64_squash_input_denormal(a, status);
4080 b = float64_squash_input_denormal(b, status);
158142c2
FB
4081
4082 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4083 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4084 ) {
ff32e16e 4085 float_raise(float_flag_invalid, status);
158142c2
FB
4086 return 0;
4087 }
4088 aSign = extractFloat64Sign( a );
4089 bSign = extractFloat64Sign( b );
f090c9d4 4090 av = float64_val(a);
a1b91bb4 4091 bv = float64_val(b);
bb98fe42 4092 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4093 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4094
4095}
4096
4097/*----------------------------------------------------------------------------
4098| Returns 1 if the double-precision floating-point value `a' is less than
f5a64251
AJ
4099| the corresponding value `b', and 0 otherwise. The invalid exception is
4100| raised if either operand is a NaN. The comparison is performed according
4101| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
4102*----------------------------------------------------------------------------*/
4103
e5a41ffa 4104int float64_lt(float64 a, float64 b, float_status *status)
158142c2
FB
4105{
4106 flag aSign, bSign;
bb98fe42 4107 uint64_t av, bv;
158142c2 4108
ff32e16e
PM
4109 a = float64_squash_input_denormal(a, status);
4110 b = float64_squash_input_denormal(b, status);
158142c2
FB
4111 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4112 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4113 ) {
ff32e16e 4114 float_raise(float_flag_invalid, status);
158142c2
FB
4115 return 0;
4116 }
4117 aSign = extractFloat64Sign( a );
4118 bSign = extractFloat64Sign( b );
f090c9d4 4119 av = float64_val(a);
a1b91bb4 4120 bv = float64_val(b);
bb98fe42 4121 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4122 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4123
4124}
4125
67b7861d
AJ
4126/*----------------------------------------------------------------------------
4127| Returns 1 if the double-precision floating-point values `a' and `b' cannot
f5a64251
AJ
4128| be compared, and 0 otherwise. The invalid exception is raised if either
4129| operand is a NaN. The comparison is performed according to the IEC/IEEE
4130| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
4131*----------------------------------------------------------------------------*/
4132
e5a41ffa 4133int float64_unordered(float64 a, float64 b, float_status *status)
67b7861d 4134{
ff32e16e
PM
4135 a = float64_squash_input_denormal(a, status);
4136 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4137
4138 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4139 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4140 ) {
ff32e16e 4141 float_raise(float_flag_invalid, status);
67b7861d
AJ
4142 return 1;
4143 }
4144 return 0;
4145}
4146
158142c2
FB
4147/*----------------------------------------------------------------------------
4148| Returns 1 if the double-precision floating-point value `a' is equal to the
f5a64251
AJ
4149| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4150| exception.The comparison is performed according to the IEC/IEEE Standard
4151| for Binary Floating-Point Arithmetic.
158142c2
FB
4152*----------------------------------------------------------------------------*/
4153
e5a41ffa 4154int float64_eq_quiet(float64 a, float64 b, float_status *status)
158142c2 4155{
bb98fe42 4156 uint64_t av, bv;
ff32e16e
PM
4157 a = float64_squash_input_denormal(a, status);
4158 b = float64_squash_input_denormal(b, status);
158142c2
FB
4159
4160 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4161 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4162 ) {
af39bc8c
AM
4163 if (float64_is_signaling_nan(a, status)
4164 || float64_is_signaling_nan(b, status)) {
ff32e16e 4165 float_raise(float_flag_invalid, status);
b689362d 4166 }
158142c2
FB
4167 return 0;
4168 }
f090c9d4 4169 av = float64_val(a);
a1b91bb4 4170 bv = float64_val(b);
bb98fe42 4171 return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
158142c2
FB
4172
4173}
4174
4175/*----------------------------------------------------------------------------
4176| Returns 1 if the double-precision floating-point value `a' is less than or
4177| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4178| cause an exception. Otherwise, the comparison is performed according to the
4179| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4180*----------------------------------------------------------------------------*/
4181
e5a41ffa 4182int float64_le_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4183{
4184 flag aSign, bSign;
bb98fe42 4185 uint64_t av, bv;
ff32e16e
PM
4186 a = float64_squash_input_denormal(a, status);
4187 b = float64_squash_input_denormal(b, status);
158142c2
FB
4188
4189 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4190 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4191 ) {
af39bc8c
AM
4192 if (float64_is_signaling_nan(a, status)
4193 || float64_is_signaling_nan(b, status)) {
ff32e16e 4194 float_raise(float_flag_invalid, status);
158142c2
FB
4195 }
4196 return 0;
4197 }
4198 aSign = extractFloat64Sign( a );
4199 bSign = extractFloat64Sign( b );
f090c9d4 4200 av = float64_val(a);
a1b91bb4 4201 bv = float64_val(b);
bb98fe42 4202 if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
f090c9d4 4203 return ( av == bv ) || ( aSign ^ ( av < bv ) );
158142c2
FB
4204
4205}
4206
4207/*----------------------------------------------------------------------------
4208| Returns 1 if the double-precision floating-point value `a' is less than
4209| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4210| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4211| Standard for Binary Floating-Point Arithmetic.
4212*----------------------------------------------------------------------------*/
4213
e5a41ffa 4214int float64_lt_quiet(float64 a, float64 b, float_status *status)
158142c2
FB
4215{
4216 flag aSign, bSign;
bb98fe42 4217 uint64_t av, bv;
ff32e16e
PM
4218 a = float64_squash_input_denormal(a, status);
4219 b = float64_squash_input_denormal(b, status);
158142c2
FB
4220
4221 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4222 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4223 ) {
af39bc8c
AM
4224 if (float64_is_signaling_nan(a, status)
4225 || float64_is_signaling_nan(b, status)) {
ff32e16e 4226 float_raise(float_flag_invalid, status);
158142c2
FB
4227 }
4228 return 0;
4229 }
4230 aSign = extractFloat64Sign( a );
4231 bSign = extractFloat64Sign( b );
f090c9d4 4232 av = float64_val(a);
a1b91bb4 4233 bv = float64_val(b);
bb98fe42 4234 if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
f090c9d4 4235 return ( av != bv ) && ( aSign ^ ( av < bv ) );
158142c2
FB
4236
4237}
4238
67b7861d
AJ
4239/*----------------------------------------------------------------------------
4240| Returns 1 if the double-precision floating-point values `a' and `b' cannot
4241| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4242| comparison is performed according to the IEC/IEEE Standard for Binary
4243| Floating-Point Arithmetic.
4244*----------------------------------------------------------------------------*/
4245
e5a41ffa 4246int float64_unordered_quiet(float64 a, float64 b, float_status *status)
67b7861d 4247{
ff32e16e
PM
4248 a = float64_squash_input_denormal(a, status);
4249 b = float64_squash_input_denormal(b, status);
67b7861d
AJ
4250
4251 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
4252 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
4253 ) {
af39bc8c
AM
4254 if (float64_is_signaling_nan(a, status)
4255 || float64_is_signaling_nan(b, status)) {
ff32e16e 4256 float_raise(float_flag_invalid, status);
67b7861d
AJ
4257 }
4258 return 1;
4259 }
4260 return 0;
4261}
4262
158142c2
FB
4263/*----------------------------------------------------------------------------
4264| Returns the result of converting the extended double-precision floating-
4265| point value `a' to the 32-bit two's complement integer format. The
4266| conversion is performed according to the IEC/IEEE Standard for Binary
4267| Floating-Point Arithmetic---which means in particular that the conversion
4268| is rounded according to the current rounding mode. If `a' is a NaN, the
4269| largest positive integer is returned. Otherwise, if the conversion
4270| overflows, the largest integer with the same sign as `a' is returned.
4271*----------------------------------------------------------------------------*/
4272
f4014512 4273int32_t floatx80_to_int32(floatx80 a, float_status *status)
158142c2
FB
4274{
4275 flag aSign;
f4014512 4276 int32_t aExp, shiftCount;
bb98fe42 4277 uint64_t aSig;
158142c2 4278
d1eb8f2a
AD
4279 if (floatx80_invalid_encoding(a)) {
4280 float_raise(float_flag_invalid, status);
4281 return 1 << 31;
4282 }
158142c2
FB
4283 aSig = extractFloatx80Frac( a );
4284 aExp = extractFloatx80Exp( a );
4285 aSign = extractFloatx80Sign( a );
bb98fe42 4286 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4287 shiftCount = 0x4037 - aExp;
4288 if ( shiftCount <= 0 ) shiftCount = 1;
4289 shift64RightJamming( aSig, shiftCount, &aSig );
ff32e16e 4290 return roundAndPackInt32(aSign, aSig, status);
158142c2
FB
4291
4292}
4293
4294/*----------------------------------------------------------------------------
4295| Returns the result of converting the extended double-precision floating-
4296| point value `a' to the 32-bit two's complement integer format. The
4297| conversion is performed according to the IEC/IEEE Standard for Binary
4298| Floating-Point Arithmetic, except that the conversion is always rounded
4299| toward zero. If `a' is a NaN, the largest positive integer is returned.
4300| Otherwise, if the conversion overflows, the largest integer with the same
4301| sign as `a' is returned.
4302*----------------------------------------------------------------------------*/
4303
f4014512 4304int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4305{
4306 flag aSign;
f4014512 4307 int32_t aExp, shiftCount;
bb98fe42 4308 uint64_t aSig, savedASig;
b3a6a2e0 4309 int32_t z;
158142c2 4310
d1eb8f2a
AD
4311 if (floatx80_invalid_encoding(a)) {
4312 float_raise(float_flag_invalid, status);
4313 return 1 << 31;
4314 }
158142c2
FB
4315 aSig = extractFloatx80Frac( a );
4316 aExp = extractFloatx80Exp( a );
4317 aSign = extractFloatx80Sign( a );
4318 if ( 0x401E < aExp ) {
bb98fe42 4319 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
158142c2
FB
4320 goto invalid;
4321 }
4322 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4323 if (aExp || aSig) {
4324 status->float_exception_flags |= float_flag_inexact;
4325 }
158142c2
FB
4326 return 0;
4327 }
4328 shiftCount = 0x403E - aExp;
4329 savedASig = aSig;
4330 aSig >>= shiftCount;
4331 z = aSig;
4332 if ( aSign ) z = - z;
4333 if ( ( z < 0 ) ^ aSign ) {
4334 invalid:
ff32e16e 4335 float_raise(float_flag_invalid, status);
bb98fe42 4336 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
4337 }
4338 if ( ( aSig<<shiftCount ) != savedASig ) {
a2f2d288 4339 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4340 }
4341 return z;
4342
4343}
4344
4345/*----------------------------------------------------------------------------
4346| Returns the result of converting the extended double-precision floating-
4347| point value `a' to the 64-bit two's complement integer format. The
4348| conversion is performed according to the IEC/IEEE Standard for Binary
4349| Floating-Point Arithmetic---which means in particular that the conversion
4350| is rounded according to the current rounding mode. If `a' is a NaN,
4351| the largest positive integer is returned. Otherwise, if the conversion
4352| overflows, the largest integer with the same sign as `a' is returned.
4353*----------------------------------------------------------------------------*/
4354
f42c2224 4355int64_t floatx80_to_int64(floatx80 a, float_status *status)
158142c2
FB
4356{
4357 flag aSign;
f4014512 4358 int32_t aExp, shiftCount;
bb98fe42 4359 uint64_t aSig, aSigExtra;
158142c2 4360
d1eb8f2a
AD
4361 if (floatx80_invalid_encoding(a)) {
4362 float_raise(float_flag_invalid, status);
4363 return 1ULL << 63;
4364 }
158142c2
FB
4365 aSig = extractFloatx80Frac( a );
4366 aExp = extractFloatx80Exp( a );
4367 aSign = extractFloatx80Sign( a );
4368 shiftCount = 0x403E - aExp;
4369 if ( shiftCount <= 0 ) {
4370 if ( shiftCount ) {
ff32e16e 4371 float_raise(float_flag_invalid, status);
0f605c88 4372 if (!aSign || floatx80_is_any_nan(a)) {
158142c2
FB
4373 return LIT64( 0x7FFFFFFFFFFFFFFF );
4374 }
bb98fe42 4375 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4376 }
4377 aSigExtra = 0;
4378 }
4379 else {
4380 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
4381 }
ff32e16e 4382 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
158142c2
FB
4383
4384}
4385
4386/*----------------------------------------------------------------------------
4387| Returns the result of converting the extended double-precision floating-
4388| point value `a' to the 64-bit two's complement integer format. The
4389| conversion is performed according to the IEC/IEEE Standard for Binary
4390| Floating-Point Arithmetic, except that the conversion is always rounded
4391| toward zero. If `a' is a NaN, the largest positive integer is returned.
4392| Otherwise, if the conversion overflows, the largest integer with the same
4393| sign as `a' is returned.
4394*----------------------------------------------------------------------------*/
4395
f42c2224 4396int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
158142c2
FB
4397{
4398 flag aSign;
f4014512 4399 int32_t aExp, shiftCount;
bb98fe42 4400 uint64_t aSig;
f42c2224 4401 int64_t z;
158142c2 4402
d1eb8f2a
AD
4403 if (floatx80_invalid_encoding(a)) {
4404 float_raise(float_flag_invalid, status);
4405 return 1ULL << 63;
4406 }
158142c2
FB
4407 aSig = extractFloatx80Frac( a );
4408 aExp = extractFloatx80Exp( a );
4409 aSign = extractFloatx80Sign( a );
4410 shiftCount = aExp - 0x403E;
4411 if ( 0 <= shiftCount ) {
4412 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
4413 if ( ( a.high != 0xC03E ) || aSig ) {
ff32e16e 4414 float_raise(float_flag_invalid, status);
158142c2
FB
4415 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
4416 return LIT64( 0x7FFFFFFFFFFFFFFF );
4417 }
4418 }
bb98fe42 4419 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
4420 }
4421 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
4422 if (aExp | aSig) {
4423 status->float_exception_flags |= float_flag_inexact;
4424 }
158142c2
FB
4425 return 0;
4426 }
4427 z = aSig>>( - shiftCount );
bb98fe42 4428 if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
a2f2d288 4429 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
4430 }
4431 if ( aSign ) z = - z;
4432 return z;
4433
4434}
4435
4436/*----------------------------------------------------------------------------
4437| Returns the result of converting the extended double-precision floating-
4438| point value `a' to the single-precision floating-point format. The
4439| conversion is performed according to the IEC/IEEE Standard for Binary
4440| Floating-Point Arithmetic.
4441*----------------------------------------------------------------------------*/
4442
e5a41ffa 4443float32 floatx80_to_float32(floatx80 a, float_status *status)
158142c2
FB
4444{
4445 flag aSign;
f4014512 4446 int32_t aExp;
bb98fe42 4447 uint64_t aSig;
158142c2 4448
d1eb8f2a
AD
4449 if (floatx80_invalid_encoding(a)) {
4450 float_raise(float_flag_invalid, status);
4451 return float32_default_nan(status);
4452 }
158142c2
FB
4453 aSig = extractFloatx80Frac( a );
4454 aExp = extractFloatx80Exp( a );
4455 aSign = extractFloatx80Sign( a );
4456 if ( aExp == 0x7FFF ) {
bb98fe42 4457 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4458 return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4459 }
4460 return packFloat32( aSign, 0xFF, 0 );
4461 }
4462 shift64RightJamming( aSig, 33, &aSig );
4463 if ( aExp || aSig ) aExp -= 0x3F81;
ff32e16e 4464 return roundAndPackFloat32(aSign, aExp, aSig, status);
158142c2
FB
4465
4466}
4467
4468/*----------------------------------------------------------------------------
4469| Returns the result of converting the extended double-precision floating-
4470| point value `a' to the double-precision floating-point format. The
4471| conversion is performed according to the IEC/IEEE Standard for Binary
4472| Floating-Point Arithmetic.
4473*----------------------------------------------------------------------------*/
4474
e5a41ffa 4475float64 floatx80_to_float64(floatx80 a, float_status *status)
158142c2
FB
4476{
4477 flag aSign;
f4014512 4478 int32_t aExp;
bb98fe42 4479 uint64_t aSig, zSig;
158142c2 4480
d1eb8f2a
AD
4481 if (floatx80_invalid_encoding(a)) {
4482 float_raise(float_flag_invalid, status);
4483 return float64_default_nan(status);
4484 }
158142c2
FB
4485 aSig = extractFloatx80Frac( a );
4486 aExp = extractFloatx80Exp( a );
4487 aSign = extractFloatx80Sign( a );
4488 if ( aExp == 0x7FFF ) {
bb98fe42 4489 if ( (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4490 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4491 }
4492 return packFloat64( aSign, 0x7FF, 0 );
4493 }
4494 shift64RightJamming( aSig, 1, &zSig );
4495 if ( aExp || aSig ) aExp -= 0x3C01;
ff32e16e 4496 return roundAndPackFloat64(aSign, aExp, zSig, status);
158142c2
FB
4497
4498}
4499
158142c2
FB
4500/*----------------------------------------------------------------------------
4501| Returns the result of converting the extended double-precision floating-
4502| point value `a' to the quadruple-precision floating-point format. The
4503| conversion is performed according to the IEC/IEEE Standard for Binary
4504| Floating-Point Arithmetic.
4505*----------------------------------------------------------------------------*/
4506
e5a41ffa 4507float128 floatx80_to_float128(floatx80 a, float_status *status)
158142c2
FB
4508{
4509 flag aSign;
0c48262d 4510 int aExp;
bb98fe42 4511 uint64_t aSig, zSig0, zSig1;
158142c2 4512
d1eb8f2a
AD
4513 if (floatx80_invalid_encoding(a)) {
4514 float_raise(float_flag_invalid, status);
4515 return float128_default_nan(status);
4516 }
158142c2
FB
4517 aSig = extractFloatx80Frac( a );
4518 aExp = extractFloatx80Exp( a );
4519 aSign = extractFloatx80Sign( a );
bb98fe42 4520 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
ff32e16e 4521 return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
158142c2
FB
4522 }
4523 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
4524 return packFloat128( aSign, aExp, zSig0, zSig1 );
4525
4526}
4527
0f721292
LV
4528/*----------------------------------------------------------------------------
4529| Rounds the extended double-precision floating-point value `a'
4530| to the precision provided by floatx80_rounding_precision and returns the
4531| result as an extended double-precision floating-point value.
4532| The operation is performed according to the IEC/IEEE Standard for Binary
4533| Floating-Point Arithmetic.
4534*----------------------------------------------------------------------------*/
4535
4536floatx80 floatx80_round(floatx80 a, float_status *status)
4537{
4538 return roundAndPackFloatx80(status->floatx80_rounding_precision,
4539 extractFloatx80Sign(a),
4540 extractFloatx80Exp(a),
4541 extractFloatx80Frac(a), 0, status);
4542}
4543
158142c2
FB
4544/*----------------------------------------------------------------------------
4545| Rounds the extended double-precision floating-point value `a' to an integer,
4546| and returns the result as an extended quadruple-precision floating-point
4547| value. The operation is performed according to the IEC/IEEE Standard for
4548| Binary Floating-Point Arithmetic.
4549*----------------------------------------------------------------------------*/
4550
e5a41ffa 4551floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
158142c2
FB
4552{
4553 flag aSign;
f4014512 4554 int32_t aExp;
bb98fe42 4555 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
4556 floatx80 z;
4557
d1eb8f2a
AD
4558 if (floatx80_invalid_encoding(a)) {
4559 float_raise(float_flag_invalid, status);
4560 return floatx80_default_nan(status);
4561 }
158142c2
FB
4562 aExp = extractFloatx80Exp( a );
4563 if ( 0x403E <= aExp ) {
bb98fe42 4564 if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
ff32e16e 4565 return propagateFloatx80NaN(a, a, status);
158142c2
FB
4566 }
4567 return a;
4568 }
4569 if ( aExp < 0x3FFF ) {
4570 if ( ( aExp == 0 )
bb98fe42 4571 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
158142c2
FB
4572 return a;
4573 }
a2f2d288 4574 status->float_exception_flags |= float_flag_inexact;
158142c2 4575 aSign = extractFloatx80Sign( a );
a2f2d288 4576 switch (status->float_rounding_mode) {
158142c2 4577 case float_round_nearest_even:
bb98fe42 4578 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
158142c2
FB
4579 ) {
4580 return
4581 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
4582 }
4583 break;
f9288a76
PM
4584 case float_round_ties_away:
4585 if (aExp == 0x3FFE) {
4586 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
4587 }
4588 break;
158142c2
FB
4589 case float_round_down:
4590 return
4591 aSign ?
4592 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
4593 : packFloatx80( 0, 0, 0 );
4594 case float_round_up:
4595 return
4596 aSign ? packFloatx80( 1, 0, 0 )
4597 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
4598 }
4599 return packFloatx80( aSign, 0, 0 );
4600 }
4601 lastBitMask = 1;
4602 lastBitMask <<= 0x403E - aExp;
4603 roundBitsMask = lastBitMask - 1;
4604 z = a;
a2f2d288 4605 switch (status->float_rounding_mode) {
dc355b76 4606 case float_round_nearest_even:
158142c2 4607 z.low += lastBitMask>>1;
dc355b76
PM
4608 if ((z.low & roundBitsMask) == 0) {
4609 z.low &= ~lastBitMask;
4610 }
4611 break;
f9288a76
PM
4612 case float_round_ties_away:
4613 z.low += lastBitMask >> 1;
4614 break;
dc355b76
PM
4615 case float_round_to_zero:
4616 break;
4617 case float_round_up:
4618 if (!extractFloatx80Sign(z)) {
4619 z.low += roundBitsMask;
4620 }
4621 break;
4622 case float_round_down:
4623 if (extractFloatx80Sign(z)) {
158142c2
FB
4624 z.low += roundBitsMask;
4625 }
dc355b76
PM
4626 break;
4627 default:
4628 abort();
158142c2
FB
4629 }
4630 z.low &= ~ roundBitsMask;
4631 if ( z.low == 0 ) {
4632 ++z.high;
4633 z.low = LIT64( 0x8000000000000000 );
4634 }
a2f2d288
PM
4635 if (z.low != a.low) {
4636 status->float_exception_flags |= float_flag_inexact;
4637 }
158142c2
FB
4638 return z;
4639
4640}
4641
4642/*----------------------------------------------------------------------------
4643| Returns the result of adding the absolute values of the extended double-
4644| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
4645| negated before being returned. `zSign' is ignored if the result is a NaN.
4646| The addition is performed according to the IEC/IEEE Standard for Binary
4647| Floating-Point Arithmetic.
4648*----------------------------------------------------------------------------*/
4649
e5a41ffa
PM
4650static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4651 float_status *status)
158142c2 4652{
f4014512 4653 int32_t aExp, bExp, zExp;
bb98fe42 4654 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4655 int32_t expDiff;
158142c2
FB
4656
4657 aSig = extractFloatx80Frac( a );
4658 aExp = extractFloatx80Exp( a );
4659 bSig = extractFloatx80Frac( b );
4660 bExp = extractFloatx80Exp( b );
4661 expDiff = aExp - bExp;
4662 if ( 0 < expDiff ) {
4663 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4664 if ((uint64_t)(aSig << 1)) {
4665 return propagateFloatx80NaN(a, b, status);
4666 }
158142c2
FB
4667 return a;
4668 }
4669 if ( bExp == 0 ) --expDiff;
4670 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4671 zExp = aExp;
4672 }
4673 else if ( expDiff < 0 ) {
4674 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4675 if ((uint64_t)(bSig << 1)) {
4676 return propagateFloatx80NaN(a, b, status);
4677 }
0f605c88
LV
4678 return packFloatx80(zSign,
4679 floatx80_infinity_high,
4680 floatx80_infinity_low);
158142c2
FB
4681 }
4682 if ( aExp == 0 ) ++expDiff;
4683 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4684 zExp = bExp;
4685 }
4686 else {
4687 if ( aExp == 0x7FFF ) {
bb98fe42 4688 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 4689 return propagateFloatx80NaN(a, b, status);
158142c2
FB
4690 }
4691 return a;
4692 }
4693 zSig1 = 0;
4694 zSig0 = aSig + bSig;
4695 if ( aExp == 0 ) {
4696 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
4697 goto roundAndPack;
4698 }
4699 zExp = aExp;
4700 goto shiftRight1;
4701 }
4702 zSig0 = aSig + bSig;
bb98fe42 4703 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
158142c2
FB
4704 shiftRight1:
4705 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
4706 zSig0 |= LIT64( 0x8000000000000000 );
4707 ++zExp;
4708 roundAndPack:
a2f2d288 4709 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4710 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4711}
4712
4713/*----------------------------------------------------------------------------
4714| Returns the result of subtracting the absolute values of the extended
4715| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
4716| difference is negated before being returned. `zSign' is ignored if the
4717| result is a NaN. The subtraction is performed according to the IEC/IEEE
4718| Standard for Binary Floating-Point Arithmetic.
4719*----------------------------------------------------------------------------*/
4720
e5a41ffa
PM
4721static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
4722 float_status *status)
158142c2 4723{
f4014512 4724 int32_t aExp, bExp, zExp;
bb98fe42 4725 uint64_t aSig, bSig, zSig0, zSig1;
f4014512 4726 int32_t expDiff;
158142c2
FB
4727
4728 aSig = extractFloatx80Frac( a );
4729 aExp = extractFloatx80Exp( a );
4730 bSig = extractFloatx80Frac( b );
4731 bExp = extractFloatx80Exp( b );
4732 expDiff = aExp - bExp;
4733 if ( 0 < expDiff ) goto aExpBigger;
4734 if ( expDiff < 0 ) goto bExpBigger;
4735 if ( aExp == 0x7FFF ) {
bb98fe42 4736 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
ff32e16e 4737 return propagateFloatx80NaN(a, b, status);
158142c2 4738 }
ff32e16e 4739 float_raise(float_flag_invalid, status);
af39bc8c 4740 return floatx80_default_nan(status);
158142c2
FB
4741 }
4742 if ( aExp == 0 ) {
4743 aExp = 1;
4744 bExp = 1;
4745 }
4746 zSig1 = 0;
4747 if ( bSig < aSig ) goto aBigger;
4748 if ( aSig < bSig ) goto bBigger;
a2f2d288 4749 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
158142c2
FB
4750 bExpBigger:
4751 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4752 if ((uint64_t)(bSig << 1)) {
4753 return propagateFloatx80NaN(a, b, status);
4754 }
0f605c88
LV
4755 return packFloatx80(zSign ^ 1, floatx80_infinity_high,
4756 floatx80_infinity_low);
158142c2
FB
4757 }
4758 if ( aExp == 0 ) ++expDiff;
4759 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
4760 bBigger:
4761 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
4762 zExp = bExp;
4763 zSign ^= 1;
4764 goto normalizeRoundAndPack;
4765 aExpBigger:
4766 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4767 if ((uint64_t)(aSig << 1)) {
4768 return propagateFloatx80NaN(a, b, status);
4769 }
158142c2
FB
4770 return a;
4771 }
4772 if ( bExp == 0 ) --expDiff;
4773 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
4774 aBigger:
4775 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
4776 zExp = aExp;
4777 normalizeRoundAndPack:
a2f2d288 4778 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4779 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4780}
4781
4782/*----------------------------------------------------------------------------
4783| Returns the result of adding the extended double-precision floating-point
4784| values `a' and `b'. The operation is performed according to the IEC/IEEE
4785| Standard for Binary Floating-Point Arithmetic.
4786*----------------------------------------------------------------------------*/
4787
e5a41ffa 4788floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4789{
4790 flag aSign, bSign;
4791
d1eb8f2a
AD
4792 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4793 float_raise(float_flag_invalid, status);
4794 return floatx80_default_nan(status);
4795 }
158142c2
FB
4796 aSign = extractFloatx80Sign( a );
4797 bSign = extractFloatx80Sign( b );
4798 if ( aSign == bSign ) {
ff32e16e 4799 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4800 }
4801 else {
ff32e16e 4802 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4803 }
4804
4805}
4806
4807/*----------------------------------------------------------------------------
4808| Returns the result of subtracting the extended double-precision floating-
4809| point values `a' and `b'. The operation is performed according to the
4810| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4811*----------------------------------------------------------------------------*/
4812
e5a41ffa 4813floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4814{
4815 flag aSign, bSign;
4816
d1eb8f2a
AD
4817 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4818 float_raise(float_flag_invalid, status);
4819 return floatx80_default_nan(status);
4820 }
158142c2
FB
4821 aSign = extractFloatx80Sign( a );
4822 bSign = extractFloatx80Sign( b );
4823 if ( aSign == bSign ) {
ff32e16e 4824 return subFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4825 }
4826 else {
ff32e16e 4827 return addFloatx80Sigs(a, b, aSign, status);
158142c2
FB
4828 }
4829
4830}
4831
4832/*----------------------------------------------------------------------------
4833| Returns the result of multiplying the extended double-precision floating-
4834| point values `a' and `b'. The operation is performed according to the
4835| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4836*----------------------------------------------------------------------------*/
4837
e5a41ffa 4838floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4839{
4840 flag aSign, bSign, zSign;
f4014512 4841 int32_t aExp, bExp, zExp;
bb98fe42 4842 uint64_t aSig, bSig, zSig0, zSig1;
158142c2 4843
d1eb8f2a
AD
4844 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4845 float_raise(float_flag_invalid, status);
4846 return floatx80_default_nan(status);
4847 }
158142c2
FB
4848 aSig = extractFloatx80Frac( a );
4849 aExp = extractFloatx80Exp( a );
4850 aSign = extractFloatx80Sign( a );
4851 bSig = extractFloatx80Frac( b );
4852 bExp = extractFloatx80Exp( b );
4853 bSign = extractFloatx80Sign( b );
4854 zSign = aSign ^ bSign;
4855 if ( aExp == 0x7FFF ) {
bb98fe42
AF
4856 if ( (uint64_t) ( aSig<<1 )
4857 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 4858 return propagateFloatx80NaN(a, b, status);
158142c2
FB
4859 }
4860 if ( ( bExp | bSig ) == 0 ) goto invalid;
0f605c88
LV
4861 return packFloatx80(zSign, floatx80_infinity_high,
4862 floatx80_infinity_low);
158142c2
FB
4863 }
4864 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4865 if ((uint64_t)(bSig << 1)) {
4866 return propagateFloatx80NaN(a, b, status);
4867 }
158142c2
FB
4868 if ( ( aExp | aSig ) == 0 ) {
4869 invalid:
ff32e16e 4870 float_raise(float_flag_invalid, status);
af39bc8c 4871 return floatx80_default_nan(status);
158142c2 4872 }
0f605c88
LV
4873 return packFloatx80(zSign, floatx80_infinity_high,
4874 floatx80_infinity_low);
158142c2
FB
4875 }
4876 if ( aExp == 0 ) {
4877 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4878 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4879 }
4880 if ( bExp == 0 ) {
4881 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
4882 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4883 }
4884 zExp = aExp + bExp - 0x3FFE;
4885 mul64To128( aSig, bSig, &zSig0, &zSig1 );
bb98fe42 4886 if ( 0 < (int64_t) zSig0 ) {
158142c2
FB
4887 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
4888 --zExp;
4889 }
a2f2d288 4890 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4891 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4892}
4893
4894/*----------------------------------------------------------------------------
4895| Returns the result of dividing the extended double-precision floating-point
4896| value `a' by the corresponding value `b'. The operation is performed
4897| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4898*----------------------------------------------------------------------------*/
4899
e5a41ffa 4900floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
4901{
4902 flag aSign, bSign, zSign;
f4014512 4903 int32_t aExp, bExp, zExp;
bb98fe42
AF
4904 uint64_t aSig, bSig, zSig0, zSig1;
4905 uint64_t rem0, rem1, rem2, term0, term1, term2;
158142c2 4906
d1eb8f2a
AD
4907 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4908 float_raise(float_flag_invalid, status);
4909 return floatx80_default_nan(status);
4910 }
158142c2
FB
4911 aSig = extractFloatx80Frac( a );
4912 aExp = extractFloatx80Exp( a );
4913 aSign = extractFloatx80Sign( a );
4914 bSig = extractFloatx80Frac( b );
4915 bExp = extractFloatx80Exp( b );
4916 bSign = extractFloatx80Sign( b );
4917 zSign = aSign ^ bSign;
4918 if ( aExp == 0x7FFF ) {
ff32e16e
PM
4919 if ((uint64_t)(aSig << 1)) {
4920 return propagateFloatx80NaN(a, b, status);
4921 }
158142c2 4922 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4923 if ((uint64_t)(bSig << 1)) {
4924 return propagateFloatx80NaN(a, b, status);
4925 }
158142c2
FB
4926 goto invalid;
4927 }
0f605c88
LV
4928 return packFloatx80(zSign, floatx80_infinity_high,
4929 floatx80_infinity_low);
158142c2
FB
4930 }
4931 if ( bExp == 0x7FFF ) {
ff32e16e
PM
4932 if ((uint64_t)(bSig << 1)) {
4933 return propagateFloatx80NaN(a, b, status);
4934 }
158142c2
FB
4935 return packFloatx80( zSign, 0, 0 );
4936 }
4937 if ( bExp == 0 ) {
4938 if ( bSig == 0 ) {
4939 if ( ( aExp | aSig ) == 0 ) {
4940 invalid:
ff32e16e 4941 float_raise(float_flag_invalid, status);
af39bc8c 4942 return floatx80_default_nan(status);
158142c2 4943 }
ff32e16e 4944 float_raise(float_flag_divbyzero, status);
0f605c88
LV
4945 return packFloatx80(zSign, floatx80_infinity_high,
4946 floatx80_infinity_low);
158142c2
FB
4947 }
4948 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4949 }
4950 if ( aExp == 0 ) {
4951 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
4952 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
4953 }
4954 zExp = aExp - bExp + 0x3FFE;
4955 rem1 = 0;
4956 if ( bSig <= aSig ) {
4957 shift128Right( aSig, 0, 1, &aSig, &rem1 );
4958 ++zExp;
4959 }
4960 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
4961 mul64To128( bSig, zSig0, &term0, &term1 );
4962 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
bb98fe42 4963 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
4964 --zSig0;
4965 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
4966 }
4967 zSig1 = estimateDiv128To64( rem1, 0, bSig );
bb98fe42 4968 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
158142c2
FB
4969 mul64To128( bSig, zSig1, &term1, &term2 );
4970 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
bb98fe42 4971 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
4972 --zSig1;
4973 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
4974 }
4975 zSig1 |= ( ( rem1 | rem2 ) != 0 );
4976 }
a2f2d288 4977 return roundAndPackFloatx80(status->floatx80_rounding_precision,
ff32e16e 4978 zSign, zExp, zSig0, zSig1, status);
158142c2
FB
4979}
4980
4981/*----------------------------------------------------------------------------
4982| Returns the remainder of the extended double-precision floating-point value
4983| `a' with respect to the corresponding value `b'. The operation is performed
4984| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4985*----------------------------------------------------------------------------*/
4986
e5a41ffa 4987floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
158142c2 4988{
ed086f3d 4989 flag aSign, zSign;
f4014512 4990 int32_t aExp, bExp, expDiff;
bb98fe42
AF
4991 uint64_t aSig0, aSig1, bSig;
4992 uint64_t q, term0, term1, alternateASig0, alternateASig1;
158142c2 4993
d1eb8f2a
AD
4994 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
4995 float_raise(float_flag_invalid, status);
4996 return floatx80_default_nan(status);
4997 }
158142c2
FB
4998 aSig0 = extractFloatx80Frac( a );
4999 aExp = extractFloatx80Exp( a );
5000 aSign = extractFloatx80Sign( a );
5001 bSig = extractFloatx80Frac( b );
5002 bExp = extractFloatx80Exp( b );
158142c2 5003 if ( aExp == 0x7FFF ) {
bb98fe42
AF
5004 if ( (uint64_t) ( aSig0<<1 )
5005 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
ff32e16e 5006 return propagateFloatx80NaN(a, b, status);
158142c2
FB
5007 }
5008 goto invalid;
5009 }
5010 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5011 if ((uint64_t)(bSig << 1)) {
5012 return propagateFloatx80NaN(a, b, status);
5013 }
158142c2
FB
5014 return a;
5015 }
5016 if ( bExp == 0 ) {
5017 if ( bSig == 0 ) {
5018 invalid:
ff32e16e 5019 float_raise(float_flag_invalid, status);
af39bc8c 5020 return floatx80_default_nan(status);
158142c2
FB
5021 }
5022 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
5023 }
5024 if ( aExp == 0 ) {
bb98fe42 5025 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
158142c2
FB
5026 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5027 }
5028 bSig |= LIT64( 0x8000000000000000 );
5029 zSign = aSign;
5030 expDiff = aExp - bExp;
5031 aSig1 = 0;
5032 if ( expDiff < 0 ) {
5033 if ( expDiff < -1 ) return a;
5034 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
5035 expDiff = 0;
5036 }
5037 q = ( bSig <= aSig0 );
5038 if ( q ) aSig0 -= bSig;
5039 expDiff -= 64;
5040 while ( 0 < expDiff ) {
5041 q = estimateDiv128To64( aSig0, aSig1, bSig );
5042 q = ( 2 < q ) ? q - 2 : 0;
5043 mul64To128( bSig, q, &term0, &term1 );
5044 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5045 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
5046 expDiff -= 62;
5047 }
5048 expDiff += 64;
5049 if ( 0 < expDiff ) {
5050 q = estimateDiv128To64( aSig0, aSig1, bSig );
5051 q = ( 2 < q ) ? q - 2 : 0;
5052 q >>= 64 - expDiff;
5053 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
5054 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5055 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
5056 while ( le128( term0, term1, aSig0, aSig1 ) ) {
5057 ++q;
5058 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
5059 }
5060 }
5061 else {
5062 term1 = 0;
5063 term0 = bSig;
5064 }
5065 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
5066 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
5067 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
5068 && ( q & 1 ) )
5069 ) {
5070 aSig0 = alternateASig0;
5071 aSig1 = alternateASig1;
5072 zSign = ! zSign;
5073 }
5074 return
5075 normalizeRoundAndPackFloatx80(
ff32e16e 5076 80, zSign, bExp + expDiff, aSig0, aSig1, status);
158142c2
FB
5077
5078}
5079
5080/*----------------------------------------------------------------------------
5081| Returns the square root of the extended double-precision floating-point
5082| value `a'. The operation is performed according to the IEC/IEEE Standard
5083| for Binary Floating-Point Arithmetic.
5084*----------------------------------------------------------------------------*/
5085
e5a41ffa 5086floatx80 floatx80_sqrt(floatx80 a, float_status *status)
158142c2
FB
5087{
5088 flag aSign;
f4014512 5089 int32_t aExp, zExp;
bb98fe42
AF
5090 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
5091 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2 5092
d1eb8f2a
AD
5093 if (floatx80_invalid_encoding(a)) {
5094 float_raise(float_flag_invalid, status);
5095 return floatx80_default_nan(status);
5096 }
158142c2
FB
5097 aSig0 = extractFloatx80Frac( a );
5098 aExp = extractFloatx80Exp( a );
5099 aSign = extractFloatx80Sign( a );
5100 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5101 if ((uint64_t)(aSig0 << 1)) {
5102 return propagateFloatx80NaN(a, a, status);
5103 }
158142c2
FB
5104 if ( ! aSign ) return a;
5105 goto invalid;
5106 }
5107 if ( aSign ) {
5108 if ( ( aExp | aSig0 ) == 0 ) return a;
5109 invalid:
ff32e16e 5110 float_raise(float_flag_invalid, status);
af39bc8c 5111 return floatx80_default_nan(status);
158142c2
FB
5112 }
5113 if ( aExp == 0 ) {
5114 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
5115 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
5116 }
5117 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
5118 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
5119 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
5120 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5121 doubleZSig0 = zSig0<<1;
5122 mul64To128( zSig0, zSig0, &term0, &term1 );
5123 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 5124 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
5125 --zSig0;
5126 doubleZSig0 -= 2;
5127 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5128 }
5129 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5130 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
5131 if ( zSig1 == 0 ) zSig1 = 1;
5132 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5133 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5134 mul64To128( zSig1, zSig1, &term2, &term3 );
5135 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 5136 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
5137 --zSig1;
5138 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5139 term3 |= 1;
5140 term2 |= doubleZSig0;
5141 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5142 }
5143 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5144 }
5145 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
5146 zSig0 |= doubleZSig0;
a2f2d288
PM
5147 return roundAndPackFloatx80(status->floatx80_rounding_precision,
5148 0, zExp, zSig0, zSig1, status);
158142c2
FB
5149}
5150
5151/*----------------------------------------------------------------------------
b689362d
AJ
5152| Returns 1 if the extended double-precision floating-point value `a' is equal
5153| to the corresponding value `b', and 0 otherwise. The invalid exception is
5154| raised if either operand is a NaN. Otherwise, the comparison is performed
5155| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5156*----------------------------------------------------------------------------*/
5157
e5a41ffa 5158int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5159{
5160
d1eb8f2a
AD
5161 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5162 || (extractFloatx80Exp(a) == 0x7FFF
5163 && (uint64_t) (extractFloatx80Frac(a) << 1))
5164 || (extractFloatx80Exp(b) == 0x7FFF
5165 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5166 ) {
ff32e16e 5167 float_raise(float_flag_invalid, status);
158142c2
FB
5168 return 0;
5169 }
5170 return
5171 ( a.low == b.low )
5172 && ( ( a.high == b.high )
5173 || ( ( a.low == 0 )
bb98fe42 5174 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5175 );
5176
5177}
5178
5179/*----------------------------------------------------------------------------
5180| Returns 1 if the extended double-precision floating-point value `a' is
5181| less than or equal to the corresponding value `b', and 0 otherwise. The
f5a64251
AJ
5182| invalid exception is raised if either operand is a NaN. The comparison is
5183| performed according to the IEC/IEEE Standard for Binary Floating-Point
5184| Arithmetic.
158142c2
FB
5185*----------------------------------------------------------------------------*/
5186
e5a41ffa 5187int floatx80_le(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5188{
5189 flag aSign, bSign;
5190
d1eb8f2a
AD
5191 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5192 || (extractFloatx80Exp(a) == 0x7FFF
5193 && (uint64_t) (extractFloatx80Frac(a) << 1))
5194 || (extractFloatx80Exp(b) == 0x7FFF
5195 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5196 ) {
ff32e16e 5197 float_raise(float_flag_invalid, status);
158142c2
FB
5198 return 0;
5199 }
5200 aSign = extractFloatx80Sign( a );
5201 bSign = extractFloatx80Sign( b );
5202 if ( aSign != bSign ) {
5203 return
5204 aSign
bb98fe42 5205 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5206 == 0 );
5207 }
5208 return
5209 aSign ? le128( b.high, b.low, a.high, a.low )
5210 : le128( a.high, a.low, b.high, b.low );
5211
5212}
5213
5214/*----------------------------------------------------------------------------
5215| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5216| less than the corresponding value `b', and 0 otherwise. The invalid
5217| exception is raised if either operand is a NaN. The comparison is performed
5218| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5219*----------------------------------------------------------------------------*/
5220
e5a41ffa 5221int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5222{
5223 flag aSign, bSign;
5224
d1eb8f2a
AD
5225 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5226 || (extractFloatx80Exp(a) == 0x7FFF
5227 && (uint64_t) (extractFloatx80Frac(a) << 1))
5228 || (extractFloatx80Exp(b) == 0x7FFF
5229 && (uint64_t) (extractFloatx80Frac(b) << 1))
158142c2 5230 ) {
ff32e16e 5231 float_raise(float_flag_invalid, status);
158142c2
FB
5232 return 0;
5233 }
5234 aSign = extractFloatx80Sign( a );
5235 bSign = extractFloatx80Sign( b );
5236 if ( aSign != bSign ) {
5237 return
5238 aSign
bb98fe42 5239 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5240 != 0 );
5241 }
5242 return
5243 aSign ? lt128( b.high, b.low, a.high, a.low )
5244 : lt128( a.high, a.low, b.high, b.low );
5245
5246}
5247
67b7861d
AJ
5248/*----------------------------------------------------------------------------
5249| Returns 1 if the extended double-precision floating-point values `a' and `b'
f5a64251
AJ
5250| cannot be compared, and 0 otherwise. The invalid exception is raised if
5251| either operand is a NaN. The comparison is performed according to the
5252| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
67b7861d 5253*----------------------------------------------------------------------------*/
e5a41ffa 5254int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
67b7861d 5255{
d1eb8f2a
AD
5256 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)
5257 || (extractFloatx80Exp(a) == 0x7FFF
5258 && (uint64_t) (extractFloatx80Frac(a) << 1))
5259 || (extractFloatx80Exp(b) == 0x7FFF
5260 && (uint64_t) (extractFloatx80Frac(b) << 1))
67b7861d 5261 ) {
ff32e16e 5262 float_raise(float_flag_invalid, status);
67b7861d
AJ
5263 return 1;
5264 }
5265 return 0;
5266}
5267
158142c2 5268/*----------------------------------------------------------------------------
b689362d 5269| Returns 1 if the extended double-precision floating-point value `a' is
f5a64251
AJ
5270| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5271| cause an exception. The comparison is performed according to the IEC/IEEE
5272| Standard for Binary Floating-Point Arithmetic.
158142c2
FB
5273*----------------------------------------------------------------------------*/
5274
e5a41ffa 5275int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5276{
5277
d1eb8f2a
AD
5278 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5279 float_raise(float_flag_invalid, status);
5280 return 0;
5281 }
158142c2 5282 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5283 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5284 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5285 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5286 ) {
af39bc8c
AM
5287 if (floatx80_is_signaling_nan(a, status)
5288 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5289 float_raise(float_flag_invalid, status);
b689362d 5290 }
158142c2
FB
5291 return 0;
5292 }
5293 return
5294 ( a.low == b.low )
5295 && ( ( a.high == b.high )
5296 || ( ( a.low == 0 )
bb98fe42 5297 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
5298 );
5299
5300}
5301
5302/*----------------------------------------------------------------------------
5303| Returns 1 if the extended double-precision floating-point value `a' is less
5304| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
5305| do not cause an exception. Otherwise, the comparison is performed according
5306| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5307*----------------------------------------------------------------------------*/
5308
e5a41ffa 5309int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5310{
5311 flag aSign, bSign;
5312
d1eb8f2a
AD
5313 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5314 float_raise(float_flag_invalid, status);
5315 return 0;
5316 }
158142c2 5317 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5318 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5319 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5320 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5321 ) {
af39bc8c
AM
5322 if (floatx80_is_signaling_nan(a, status)
5323 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5324 float_raise(float_flag_invalid, status);
158142c2
FB
5325 }
5326 return 0;
5327 }
5328 aSign = extractFloatx80Sign( a );
5329 bSign = extractFloatx80Sign( b );
5330 if ( aSign != bSign ) {
5331 return
5332 aSign
bb98fe42 5333 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5334 == 0 );
5335 }
5336 return
5337 aSign ? le128( b.high, b.low, a.high, a.low )
5338 : le128( a.high, a.low, b.high, b.low );
5339
5340}
5341
5342/*----------------------------------------------------------------------------
5343| Returns 1 if the extended double-precision floating-point value `a' is less
5344| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
5345| an exception. Otherwise, the comparison is performed according to the
5346| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5347*----------------------------------------------------------------------------*/
5348
e5a41ffa 5349int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
158142c2
FB
5350{
5351 flag aSign, bSign;
5352
d1eb8f2a
AD
5353 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5354 float_raise(float_flag_invalid, status);
5355 return 0;
5356 }
158142c2 5357 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
bb98fe42 5358 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
158142c2 5359 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
bb98fe42 5360 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
158142c2 5361 ) {
af39bc8c
AM
5362 if (floatx80_is_signaling_nan(a, status)
5363 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5364 float_raise(float_flag_invalid, status);
158142c2
FB
5365 }
5366 return 0;
5367 }
5368 aSign = extractFloatx80Sign( a );
5369 bSign = extractFloatx80Sign( b );
5370 if ( aSign != bSign ) {
5371 return
5372 aSign
bb98fe42 5373 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
5374 != 0 );
5375 }
5376 return
5377 aSign ? lt128( b.high, b.low, a.high, a.low )
5378 : lt128( a.high, a.low, b.high, b.low );
5379
5380}
5381
67b7861d
AJ
5382/*----------------------------------------------------------------------------
5383| Returns 1 if the extended double-precision floating-point values `a' and `b'
5384| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
5385| The comparison is performed according to the IEC/IEEE Standard for Binary
5386| Floating-Point Arithmetic.
5387*----------------------------------------------------------------------------*/
e5a41ffa 5388int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
67b7861d 5389{
d1eb8f2a
AD
5390 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
5391 float_raise(float_flag_invalid, status);
5392 return 1;
5393 }
67b7861d
AJ
5394 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
5395 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
5396 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
5397 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
5398 ) {
af39bc8c
AM
5399 if (floatx80_is_signaling_nan(a, status)
5400 || floatx80_is_signaling_nan(b, status)) {
ff32e16e 5401 float_raise(float_flag_invalid, status);
67b7861d
AJ
5402 }
5403 return 1;
5404 }
5405 return 0;
5406}
5407
158142c2
FB
5408/*----------------------------------------------------------------------------
5409| Returns the result of converting the quadruple-precision floating-point
5410| value `a' to the 32-bit two's complement integer format. The conversion
5411| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5412| Arithmetic---which means in particular that the conversion is rounded
5413| according to the current rounding mode. If `a' is a NaN, the largest
5414| positive integer is returned. Otherwise, if the conversion overflows, the
5415| largest integer with the same sign as `a' is returned.
5416*----------------------------------------------------------------------------*/
5417
f4014512 5418int32_t float128_to_int32(float128 a, float_status *status)
158142c2
FB
5419{
5420 flag aSign;
f4014512 5421 int32_t aExp, shiftCount;
bb98fe42 5422 uint64_t aSig0, aSig1;
158142c2
FB
5423
5424 aSig1 = extractFloat128Frac1( a );
5425 aSig0 = extractFloat128Frac0( a );
5426 aExp = extractFloat128Exp( a );
5427 aSign = extractFloat128Sign( a );
5428 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
5429 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5430 aSig0 |= ( aSig1 != 0 );
5431 shiftCount = 0x4028 - aExp;
5432 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
ff32e16e 5433 return roundAndPackInt32(aSign, aSig0, status);
158142c2
FB
5434
5435}
5436
5437/*----------------------------------------------------------------------------
5438| Returns the result of converting the quadruple-precision floating-point
5439| value `a' to the 32-bit two's complement integer format. The conversion
5440| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5441| Arithmetic, except that the conversion is always rounded toward zero. If
5442| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
5443| conversion overflows, the largest integer with the same sign as `a' is
5444| returned.
5445*----------------------------------------------------------------------------*/
5446
f4014512 5447int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
158142c2
FB
5448{
5449 flag aSign;
f4014512 5450 int32_t aExp, shiftCount;
bb98fe42 5451 uint64_t aSig0, aSig1, savedASig;
b3a6a2e0 5452 int32_t z;
158142c2
FB
5453
5454 aSig1 = extractFloat128Frac1( a );
5455 aSig0 = extractFloat128Frac0( a );
5456 aExp = extractFloat128Exp( a );
5457 aSign = extractFloat128Sign( a );
5458 aSig0 |= ( aSig1 != 0 );
5459 if ( 0x401E < aExp ) {
5460 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
5461 goto invalid;
5462 }
5463 else if ( aExp < 0x3FFF ) {
a2f2d288
PM
5464 if (aExp || aSig0) {
5465 status->float_exception_flags |= float_flag_inexact;
5466 }
158142c2
FB
5467 return 0;
5468 }
5469 aSig0 |= LIT64( 0x0001000000000000 );
5470 shiftCount = 0x402F - aExp;
5471 savedASig = aSig0;
5472 aSig0 >>= shiftCount;
5473 z = aSig0;
5474 if ( aSign ) z = - z;
5475 if ( ( z < 0 ) ^ aSign ) {
5476 invalid:
ff32e16e 5477 float_raise(float_flag_invalid, status);
bb98fe42 5478 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
158142c2
FB
5479 }
5480 if ( ( aSig0<<shiftCount ) != savedASig ) {
a2f2d288 5481 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5482 }
5483 return z;
5484
5485}
5486
5487/*----------------------------------------------------------------------------
5488| Returns the result of converting the quadruple-precision floating-point
5489| value `a' to the 64-bit two's complement integer format. The conversion
5490| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5491| Arithmetic---which means in particular that the conversion is rounded
5492| according to the current rounding mode. If `a' is a NaN, the largest
5493| positive integer is returned. Otherwise, if the conversion overflows, the
5494| largest integer with the same sign as `a' is returned.
5495*----------------------------------------------------------------------------*/
5496
f42c2224 5497int64_t float128_to_int64(float128 a, float_status *status)
158142c2
FB
5498{
5499 flag aSign;
f4014512 5500 int32_t aExp, shiftCount;
bb98fe42 5501 uint64_t aSig0, aSig1;
158142c2
FB
5502
5503 aSig1 = extractFloat128Frac1( a );
5504 aSig0 = extractFloat128Frac0( a );
5505 aExp = extractFloat128Exp( a );
5506 aSign = extractFloat128Sign( a );
5507 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5508 shiftCount = 0x402F - aExp;
5509 if ( shiftCount <= 0 ) {
5510 if ( 0x403E < aExp ) {
ff32e16e 5511 float_raise(float_flag_invalid, status);
158142c2
FB
5512 if ( ! aSign
5513 || ( ( aExp == 0x7FFF )
5514 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
5515 )
5516 ) {
5517 return LIT64( 0x7FFFFFFFFFFFFFFF );
5518 }
bb98fe42 5519 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5520 }
5521 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
5522 }
5523 else {
5524 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
5525 }
ff32e16e 5526 return roundAndPackInt64(aSign, aSig0, aSig1, status);
158142c2
FB
5527
5528}
5529
5530/*----------------------------------------------------------------------------
5531| Returns the result of converting the quadruple-precision floating-point
5532| value `a' to the 64-bit two's complement integer format. The conversion
5533| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5534| Arithmetic, except that the conversion is always rounded toward zero.
5535| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
5536| the conversion overflows, the largest integer with the same sign as `a' is
5537| returned.
5538*----------------------------------------------------------------------------*/
5539
f42c2224 5540int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
158142c2
FB
5541{
5542 flag aSign;
f4014512 5543 int32_t aExp, shiftCount;
bb98fe42 5544 uint64_t aSig0, aSig1;
f42c2224 5545 int64_t z;
158142c2
FB
5546
5547 aSig1 = extractFloat128Frac1( a );
5548 aSig0 = extractFloat128Frac0( a );
5549 aExp = extractFloat128Exp( a );
5550 aSign = extractFloat128Sign( a );
5551 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
5552 shiftCount = aExp - 0x402F;
5553 if ( 0 < shiftCount ) {
5554 if ( 0x403E <= aExp ) {
5555 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
5556 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
5557 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
a2f2d288
PM
5558 if (aSig1) {
5559 status->float_exception_flags |= float_flag_inexact;
5560 }
158142c2
FB
5561 }
5562 else {
ff32e16e 5563 float_raise(float_flag_invalid, status);
158142c2
FB
5564 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
5565 return LIT64( 0x7FFFFFFFFFFFFFFF );
5566 }
5567 }
bb98fe42 5568 return (int64_t) LIT64( 0x8000000000000000 );
158142c2
FB
5569 }
5570 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
bb98fe42 5571 if ( (uint64_t) ( aSig1<<shiftCount ) ) {
a2f2d288 5572 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5573 }
5574 }
5575 else {
5576 if ( aExp < 0x3FFF ) {
5577 if ( aExp | aSig0 | aSig1 ) {
a2f2d288 5578 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5579 }
5580 return 0;
5581 }
5582 z = aSig0>>( - shiftCount );
5583 if ( aSig1
bb98fe42 5584 || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
a2f2d288 5585 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5586 }
5587 }
5588 if ( aSign ) z = - z;
5589 return z;
5590
5591}
5592
2e6d8568
BR
5593/*----------------------------------------------------------------------------
5594| Returns the result of converting the quadruple-precision floating-point value
5595| `a' to the 64-bit unsigned integer format. The conversion is
5596| performed according to the IEC/IEEE Standard for Binary Floating-Point
5597| Arithmetic---which means in particular that the conversion is rounded
5598| according to the current rounding mode. If `a' is a NaN, the largest
5599| positive integer is returned. If the conversion overflows, the
5600| largest unsigned integer is returned. If 'a' is negative, the value is
5601| rounded and zero is returned; negative values that do not round to zero
5602| will raise the inexact exception.
5603*----------------------------------------------------------------------------*/
5604
5605uint64_t float128_to_uint64(float128 a, float_status *status)
5606{
5607 flag aSign;
5608 int aExp;
5609 int shiftCount;
5610 uint64_t aSig0, aSig1;
5611
5612 aSig0 = extractFloat128Frac0(a);
5613 aSig1 = extractFloat128Frac1(a);
5614 aExp = extractFloat128Exp(a);
5615 aSign = extractFloat128Sign(a);
5616 if (aSign && (aExp > 0x3FFE)) {
5617 float_raise(float_flag_invalid, status);
5618 if (float128_is_any_nan(a)) {
5619 return LIT64(0xFFFFFFFFFFFFFFFF);
5620 } else {
5621 return 0;
5622 }
5623 }
5624 if (aExp) {
5625 aSig0 |= LIT64(0x0001000000000000);
5626 }
5627 shiftCount = 0x402F - aExp;
5628 if (shiftCount <= 0) {
5629 if (0x403E < aExp) {
5630 float_raise(float_flag_invalid, status);
5631 return LIT64(0xFFFFFFFFFFFFFFFF);
5632 }
5633 shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
5634 } else {
5635 shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
5636 }
5637 return roundAndPackUint64(aSign, aSig0, aSig1, status);
5638}
5639
5640uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
5641{
5642 uint64_t v;
5643 signed char current_rounding_mode = status->float_rounding_mode;
5644
5645 set_float_rounding_mode(float_round_to_zero, status);
5646 v = float128_to_uint64(a, status);
5647 set_float_rounding_mode(current_rounding_mode, status);
5648
5649 return v;
5650}
5651
158142c2
FB
5652/*----------------------------------------------------------------------------
5653| Returns the result of converting the quadruple-precision floating-point
fd425037
BR
5654| value `a' to the 32-bit unsigned integer format. The conversion
5655| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5656| Arithmetic except that the conversion is always rounded toward zero.
5657| If `a' is a NaN, the largest positive integer is returned. Otherwise,
5658| if the conversion overflows, the largest unsigned integer is returned.
5659| If 'a' is negative, the value is rounded and zero is returned; negative
5660| values that do not round to zero will raise the inexact exception.
5661*----------------------------------------------------------------------------*/
5662
5663uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
5664{
5665 uint64_t v;
5666 uint32_t res;
5667 int old_exc_flags = get_float_exception_flags(status);
5668
5669 v = float128_to_uint64_round_to_zero(a, status);
5670 if (v > 0xffffffff) {
5671 res = 0xffffffff;
5672 } else {
5673 return v;
5674 }
5675 set_float_exception_flags(old_exc_flags, status);
5676 float_raise(float_flag_invalid, status);
5677 return res;
5678}
5679
5680/*----------------------------------------------------------------------------
5681| Returns the result of converting the quadruple-precision floating-point
158142c2
FB
5682| value `a' to the single-precision floating-point format. The conversion
5683| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5684| Arithmetic.
5685*----------------------------------------------------------------------------*/
5686
e5a41ffa 5687float32 float128_to_float32(float128 a, float_status *status)
158142c2
FB
5688{
5689 flag aSign;
f4014512 5690 int32_t aExp;
bb98fe42
AF
5691 uint64_t aSig0, aSig1;
5692 uint32_t zSig;
158142c2
FB
5693
5694 aSig1 = extractFloat128Frac1( a );
5695 aSig0 = extractFloat128Frac0( a );
5696 aExp = extractFloat128Exp( a );
5697 aSign = extractFloat128Sign( a );
5698 if ( aExp == 0x7FFF ) {
5699 if ( aSig0 | aSig1 ) {
ff32e16e 5700 return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
158142c2
FB
5701 }
5702 return packFloat32( aSign, 0xFF, 0 );
5703 }
5704 aSig0 |= ( aSig1 != 0 );
5705 shift64RightJamming( aSig0, 18, &aSig0 );
5706 zSig = aSig0;
5707 if ( aExp || zSig ) {
5708 zSig |= 0x40000000;
5709 aExp -= 0x3F81;
5710 }
ff32e16e 5711 return roundAndPackFloat32(aSign, aExp, zSig, status);
158142c2
FB
5712
5713}
5714
5715/*----------------------------------------------------------------------------
5716| Returns the result of converting the quadruple-precision floating-point
5717| value `a' to the double-precision floating-point format. The conversion
5718| is performed according to the IEC/IEEE Standard for Binary Floating-Point
5719| Arithmetic.
5720*----------------------------------------------------------------------------*/
5721
e5a41ffa 5722float64 float128_to_float64(float128 a, float_status *status)
158142c2
FB
5723{
5724 flag aSign;
f4014512 5725 int32_t aExp;
bb98fe42 5726 uint64_t aSig0, aSig1;
158142c2
FB
5727
5728 aSig1 = extractFloat128Frac1( a );
5729 aSig0 = extractFloat128Frac0( a );
5730 aExp = extractFloat128Exp( a );
5731 aSign = extractFloat128Sign( a );
5732 if ( aExp == 0x7FFF ) {
5733 if ( aSig0 | aSig1 ) {
ff32e16e 5734 return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
158142c2
FB
5735 }
5736 return packFloat64( aSign, 0x7FF, 0 );
5737 }
5738 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
5739 aSig0 |= ( aSig1 != 0 );
5740 if ( aExp || aSig0 ) {
5741 aSig0 |= LIT64( 0x4000000000000000 );
5742 aExp -= 0x3C01;
5743 }
ff32e16e 5744 return roundAndPackFloat64(aSign, aExp, aSig0, status);
158142c2
FB
5745
5746}
5747
158142c2
FB
5748/*----------------------------------------------------------------------------
5749| Returns the result of converting the quadruple-precision floating-point
5750| value `a' to the extended double-precision floating-point format. The
5751| conversion is performed according to the IEC/IEEE Standard for Binary
5752| Floating-Point Arithmetic.
5753*----------------------------------------------------------------------------*/
5754
e5a41ffa 5755floatx80 float128_to_floatx80(float128 a, float_status *status)
158142c2
FB
5756{
5757 flag aSign;
f4014512 5758 int32_t aExp;
bb98fe42 5759 uint64_t aSig0, aSig1;
158142c2
FB
5760
5761 aSig1 = extractFloat128Frac1( a );
5762 aSig0 = extractFloat128Frac0( a );
5763 aExp = extractFloat128Exp( a );
5764 aSign = extractFloat128Sign( a );
5765 if ( aExp == 0x7FFF ) {
5766 if ( aSig0 | aSig1 ) {
ff32e16e 5767 return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
158142c2 5768 }
0f605c88
LV
5769 return packFloatx80(aSign, floatx80_infinity_high,
5770 floatx80_infinity_low);
158142c2
FB
5771 }
5772 if ( aExp == 0 ) {
5773 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
5774 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5775 }
5776 else {
5777 aSig0 |= LIT64( 0x0001000000000000 );
5778 }
5779 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
ff32e16e 5780 return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
158142c2
FB
5781
5782}
5783
158142c2
FB
5784/*----------------------------------------------------------------------------
5785| Rounds the quadruple-precision floating-point value `a' to an integer, and
5786| returns the result as a quadruple-precision floating-point value. The
5787| operation is performed according to the IEC/IEEE Standard for Binary
5788| Floating-Point Arithmetic.
5789*----------------------------------------------------------------------------*/
5790
e5a41ffa 5791float128 float128_round_to_int(float128 a, float_status *status)
158142c2
FB
5792{
5793 flag aSign;
f4014512 5794 int32_t aExp;
bb98fe42 5795 uint64_t lastBitMask, roundBitsMask;
158142c2
FB
5796 float128 z;
5797
5798 aExp = extractFloat128Exp( a );
5799 if ( 0x402F <= aExp ) {
5800 if ( 0x406F <= aExp ) {
5801 if ( ( aExp == 0x7FFF )
5802 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
5803 ) {
ff32e16e 5804 return propagateFloat128NaN(a, a, status);
158142c2
FB
5805 }
5806 return a;
5807 }
5808 lastBitMask = 1;
5809 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
5810 roundBitsMask = lastBitMask - 1;
5811 z = a;
a2f2d288 5812 switch (status->float_rounding_mode) {
dc355b76 5813 case float_round_nearest_even:
158142c2
FB
5814 if ( lastBitMask ) {
5815 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
5816 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
5817 }
5818 else {
bb98fe42 5819 if ( (int64_t) z.low < 0 ) {
158142c2 5820 ++z.high;
bb98fe42 5821 if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
158142c2
FB
5822 }
5823 }
dc355b76 5824 break;
f9288a76
PM
5825 case float_round_ties_away:
5826 if (lastBitMask) {
5827 add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low);
5828 } else {
5829 if ((int64_t) z.low < 0) {
5830 ++z.high;
5831 }
5832 }
5833 break;
dc355b76
PM
5834 case float_round_to_zero:
5835 break;
5836 case float_round_up:
5837 if (!extractFloat128Sign(z)) {
5838 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
5839 }
5840 break;
5841 case float_round_down:
5842 if (extractFloat128Sign(z)) {
5843 add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low);
158142c2 5844 }
dc355b76
PM
5845 break;
5846 default:
5847 abort();
158142c2
FB
5848 }
5849 z.low &= ~ roundBitsMask;
5850 }
5851 else {
5852 if ( aExp < 0x3FFF ) {
bb98fe42 5853 if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
a2f2d288 5854 status->float_exception_flags |= float_flag_inexact;
158142c2 5855 aSign = extractFloat128Sign( a );
a2f2d288 5856 switch (status->float_rounding_mode) {
158142c2
FB
5857 case float_round_nearest_even:
5858 if ( ( aExp == 0x3FFE )
5859 && ( extractFloat128Frac0( a )
5860 | extractFloat128Frac1( a ) )
5861 ) {
5862 return packFloat128( aSign, 0x3FFF, 0, 0 );
5863 }
5864 break;
f9288a76
PM
5865 case float_round_ties_away:
5866 if (aExp == 0x3FFE) {
5867 return packFloat128(aSign, 0x3FFF, 0, 0);
5868 }
5869 break;
158142c2
FB
5870 case float_round_down:
5871 return
5872 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
5873 : packFloat128( 0, 0, 0, 0 );
5874 case float_round_up:
5875 return
5876 aSign ? packFloat128( 1, 0, 0, 0 )
5877 : packFloat128( 0, 0x3FFF, 0, 0 );
5878 }
5879 return packFloat128( aSign, 0, 0, 0 );
5880 }
5881 lastBitMask = 1;
5882 lastBitMask <<= 0x402F - aExp;
5883 roundBitsMask = lastBitMask - 1;
5884 z.low = 0;
5885 z.high = a.high;
a2f2d288 5886 switch (status->float_rounding_mode) {
dc355b76 5887 case float_round_nearest_even:
158142c2
FB
5888 z.high += lastBitMask>>1;
5889 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
5890 z.high &= ~ lastBitMask;
5891 }
dc355b76 5892 break;
f9288a76
PM
5893 case float_round_ties_away:
5894 z.high += lastBitMask>>1;
5895 break;
dc355b76
PM
5896 case float_round_to_zero:
5897 break;
5898 case float_round_up:
5899 if (!extractFloat128Sign(z)) {
158142c2
FB
5900 z.high |= ( a.low != 0 );
5901 z.high += roundBitsMask;
5902 }
dc355b76
PM
5903 break;
5904 case float_round_down:
5905 if (extractFloat128Sign(z)) {
5906 z.high |= (a.low != 0);
5907 z.high += roundBitsMask;
5908 }
5909 break;
5910 default:
5911 abort();
158142c2
FB
5912 }
5913 z.high &= ~ roundBitsMask;
5914 }
5915 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
a2f2d288 5916 status->float_exception_flags |= float_flag_inexact;
158142c2
FB
5917 }
5918 return z;
5919
5920}
5921
5922/*----------------------------------------------------------------------------
5923| Returns the result of adding the absolute values of the quadruple-precision
5924| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
5925| before being returned. `zSign' is ignored if the result is a NaN.
5926| The addition is performed according to the IEC/IEEE Standard for Binary
5927| Floating-Point Arithmetic.
5928*----------------------------------------------------------------------------*/
5929
e5a41ffa
PM
5930static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
5931 float_status *status)
158142c2 5932{
f4014512 5933 int32_t aExp, bExp, zExp;
bb98fe42 5934 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
f4014512 5935 int32_t expDiff;
158142c2
FB
5936
5937 aSig1 = extractFloat128Frac1( a );
5938 aSig0 = extractFloat128Frac0( a );
5939 aExp = extractFloat128Exp( a );
5940 bSig1 = extractFloat128Frac1( b );
5941 bSig0 = extractFloat128Frac0( b );
5942 bExp = extractFloat128Exp( b );
5943 expDiff = aExp - bExp;
5944 if ( 0 < expDiff ) {
5945 if ( aExp == 0x7FFF ) {
ff32e16e
PM
5946 if (aSig0 | aSig1) {
5947 return propagateFloat128NaN(a, b, status);
5948 }
158142c2
FB
5949 return a;
5950 }
5951 if ( bExp == 0 ) {
5952 --expDiff;
5953 }
5954 else {
5955 bSig0 |= LIT64( 0x0001000000000000 );
5956 }
5957 shift128ExtraRightJamming(
5958 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
5959 zExp = aExp;
5960 }
5961 else if ( expDiff < 0 ) {
5962 if ( bExp == 0x7FFF ) {
ff32e16e
PM
5963 if (bSig0 | bSig1) {
5964 return propagateFloat128NaN(a, b, status);
5965 }
158142c2
FB
5966 return packFloat128( zSign, 0x7FFF, 0, 0 );
5967 }
5968 if ( aExp == 0 ) {
5969 ++expDiff;
5970 }
5971 else {
5972 aSig0 |= LIT64( 0x0001000000000000 );
5973 }
5974 shift128ExtraRightJamming(
5975 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
5976 zExp = bExp;
5977 }
5978 else {
5979 if ( aExp == 0x7FFF ) {
5980 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 5981 return propagateFloat128NaN(a, b, status);
158142c2
FB
5982 }
5983 return a;
5984 }
5985 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
fe76d976 5986 if ( aExp == 0 ) {
a2f2d288 5987 if (status->flush_to_zero) {
e6afc87f 5988 if (zSig0 | zSig1) {
ff32e16e 5989 float_raise(float_flag_output_denormal, status);
e6afc87f
PM
5990 }
5991 return packFloat128(zSign, 0, 0, 0);
5992 }
fe76d976
PB
5993 return packFloat128( zSign, 0, zSig0, zSig1 );
5994 }
158142c2
FB
5995 zSig2 = 0;
5996 zSig0 |= LIT64( 0x0002000000000000 );
5997 zExp = aExp;
5998 goto shiftRight1;
5999 }
6000 aSig0 |= LIT64( 0x0001000000000000 );
6001 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6002 --zExp;
6003 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
6004 ++zExp;
6005 shiftRight1:
6006 shift128ExtraRightJamming(
6007 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6008 roundAndPack:
ff32e16e 6009 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6010
6011}
6012
6013/*----------------------------------------------------------------------------
6014| Returns the result of subtracting the absolute values of the quadruple-
6015| precision floating-point values `a' and `b'. If `zSign' is 1, the
6016| difference is negated before being returned. `zSign' is ignored if the
6017| result is a NaN. The subtraction is performed according to the IEC/IEEE
6018| Standard for Binary Floating-Point Arithmetic.
6019*----------------------------------------------------------------------------*/
6020
e5a41ffa
PM
6021static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
6022 float_status *status)
158142c2 6023{
f4014512 6024 int32_t aExp, bExp, zExp;
bb98fe42 6025 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
f4014512 6026 int32_t expDiff;
158142c2
FB
6027
6028 aSig1 = extractFloat128Frac1( a );
6029 aSig0 = extractFloat128Frac0( a );
6030 aExp = extractFloat128Exp( a );
6031 bSig1 = extractFloat128Frac1( b );
6032 bSig0 = extractFloat128Frac0( b );
6033 bExp = extractFloat128Exp( b );
6034 expDiff = aExp - bExp;
6035 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
6036 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
6037 if ( 0 < expDiff ) goto aExpBigger;
6038 if ( expDiff < 0 ) goto bExpBigger;
6039 if ( aExp == 0x7FFF ) {
6040 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
ff32e16e 6041 return propagateFloat128NaN(a, b, status);
158142c2 6042 }
ff32e16e 6043 float_raise(float_flag_invalid, status);
af39bc8c 6044 return float128_default_nan(status);
158142c2
FB
6045 }
6046 if ( aExp == 0 ) {
6047 aExp = 1;
6048 bExp = 1;
6049 }
6050 if ( bSig0 < aSig0 ) goto aBigger;
6051 if ( aSig0 < bSig0 ) goto bBigger;
6052 if ( bSig1 < aSig1 ) goto aBigger;
6053 if ( aSig1 < bSig1 ) goto bBigger;
a2f2d288
PM
6054 return packFloat128(status->float_rounding_mode == float_round_down,
6055 0, 0, 0);
158142c2
FB
6056 bExpBigger:
6057 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6058 if (bSig0 | bSig1) {
6059 return propagateFloat128NaN(a, b, status);
6060 }
158142c2
FB
6061 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
6062 }
6063 if ( aExp == 0 ) {
6064 ++expDiff;
6065 }
6066 else {
6067 aSig0 |= LIT64( 0x4000000000000000 );
6068 }
6069 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6070 bSig0 |= LIT64( 0x4000000000000000 );
6071 bBigger:
6072 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
6073 zExp = bExp;
6074 zSign ^= 1;
6075 goto normalizeRoundAndPack;
6076 aExpBigger:
6077 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6078 if (aSig0 | aSig1) {
6079 return propagateFloat128NaN(a, b, status);
6080 }
158142c2
FB
6081 return a;
6082 }
6083 if ( bExp == 0 ) {
6084 --expDiff;
6085 }
6086 else {
6087 bSig0 |= LIT64( 0x4000000000000000 );
6088 }
6089 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
6090 aSig0 |= LIT64( 0x4000000000000000 );
6091 aBigger:
6092 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
6093 zExp = aExp;
6094 normalizeRoundAndPack:
6095 --zExp;
ff32e16e
PM
6096 return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
6097 status);
158142c2
FB
6098
6099}
6100
6101/*----------------------------------------------------------------------------
6102| Returns the result of adding the quadruple-precision floating-point values
6103| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
6104| for Binary Floating-Point Arithmetic.
6105*----------------------------------------------------------------------------*/
6106
e5a41ffa 6107float128 float128_add(float128 a, float128 b, float_status *status)
158142c2
FB
6108{
6109 flag aSign, bSign;
6110
6111 aSign = extractFloat128Sign( a );
6112 bSign = extractFloat128Sign( b );
6113 if ( aSign == bSign ) {
ff32e16e 6114 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6115 }
6116 else {
ff32e16e 6117 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6118 }
6119
6120}
6121
6122/*----------------------------------------------------------------------------
6123| Returns the result of subtracting the quadruple-precision floating-point
6124| values `a' and `b'. The operation is performed according to the IEC/IEEE
6125| Standard for Binary Floating-Point Arithmetic.
6126*----------------------------------------------------------------------------*/
6127
e5a41ffa 6128float128 float128_sub(float128 a, float128 b, float_status *status)
158142c2
FB
6129{
6130 flag aSign, bSign;
6131
6132 aSign = extractFloat128Sign( a );
6133 bSign = extractFloat128Sign( b );
6134 if ( aSign == bSign ) {
ff32e16e 6135 return subFloat128Sigs(a, b, aSign, status);
158142c2
FB
6136 }
6137 else {
ff32e16e 6138 return addFloat128Sigs(a, b, aSign, status);
158142c2
FB
6139 }
6140
6141}
6142
6143/*----------------------------------------------------------------------------
6144| Returns the result of multiplying the quadruple-precision floating-point
6145| values `a' and `b'. The operation is performed according to the IEC/IEEE
6146| Standard for Binary Floating-Point Arithmetic.
6147*----------------------------------------------------------------------------*/
6148
e5a41ffa 6149float128 float128_mul(float128 a, float128 b, float_status *status)
158142c2
FB
6150{
6151 flag aSign, bSign, zSign;
f4014512 6152 int32_t aExp, bExp, zExp;
bb98fe42 6153 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
158142c2
FB
6154
6155 aSig1 = extractFloat128Frac1( a );
6156 aSig0 = extractFloat128Frac0( a );
6157 aExp = extractFloat128Exp( a );
6158 aSign = extractFloat128Sign( a );
6159 bSig1 = extractFloat128Frac1( b );
6160 bSig0 = extractFloat128Frac0( b );
6161 bExp = extractFloat128Exp( b );
6162 bSign = extractFloat128Sign( b );
6163 zSign = aSign ^ bSign;
6164 if ( aExp == 0x7FFF ) {
6165 if ( ( aSig0 | aSig1 )
6166 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6167 return propagateFloat128NaN(a, b, status);
158142c2
FB
6168 }
6169 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
6170 return packFloat128( zSign, 0x7FFF, 0, 0 );
6171 }
6172 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6173 if (bSig0 | bSig1) {
6174 return propagateFloat128NaN(a, b, status);
6175 }
158142c2
FB
6176 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6177 invalid:
ff32e16e 6178 float_raise(float_flag_invalid, status);
af39bc8c 6179 return float128_default_nan(status);
158142c2
FB
6180 }
6181 return packFloat128( zSign, 0x7FFF, 0, 0 );
6182 }
6183 if ( aExp == 0 ) {
6184 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6185 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6186 }
6187 if ( bExp == 0 ) {
6188 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6189 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6190 }
6191 zExp = aExp + bExp - 0x4000;
6192 aSig0 |= LIT64( 0x0001000000000000 );
6193 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
6194 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
6195 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
6196 zSig2 |= ( zSig3 != 0 );
6197 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
6198 shift128ExtraRightJamming(
6199 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
6200 ++zExp;
6201 }
ff32e16e 6202 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6203
6204}
6205
6206/*----------------------------------------------------------------------------
6207| Returns the result of dividing the quadruple-precision floating-point value
6208| `a' by the corresponding value `b'. The operation is performed according to
6209| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6210*----------------------------------------------------------------------------*/
6211
e5a41ffa 6212float128 float128_div(float128 a, float128 b, float_status *status)
158142c2
FB
6213{
6214 flag aSign, bSign, zSign;
f4014512 6215 int32_t aExp, bExp, zExp;
bb98fe42
AF
6216 uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
6217 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6218
6219 aSig1 = extractFloat128Frac1( a );
6220 aSig0 = extractFloat128Frac0( a );
6221 aExp = extractFloat128Exp( a );
6222 aSign = extractFloat128Sign( a );
6223 bSig1 = extractFloat128Frac1( b );
6224 bSig0 = extractFloat128Frac0( b );
6225 bExp = extractFloat128Exp( b );
6226 bSign = extractFloat128Sign( b );
6227 zSign = aSign ^ bSign;
6228 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6229 if (aSig0 | aSig1) {
6230 return propagateFloat128NaN(a, b, status);
6231 }
158142c2 6232 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6233 if (bSig0 | bSig1) {
6234 return propagateFloat128NaN(a, b, status);
6235 }
158142c2
FB
6236 goto invalid;
6237 }
6238 return packFloat128( zSign, 0x7FFF, 0, 0 );
6239 }
6240 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6241 if (bSig0 | bSig1) {
6242 return propagateFloat128NaN(a, b, status);
6243 }
158142c2
FB
6244 return packFloat128( zSign, 0, 0, 0 );
6245 }
6246 if ( bExp == 0 ) {
6247 if ( ( bSig0 | bSig1 ) == 0 ) {
6248 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
6249 invalid:
ff32e16e 6250 float_raise(float_flag_invalid, status);
af39bc8c 6251 return float128_default_nan(status);
158142c2 6252 }
ff32e16e 6253 float_raise(float_flag_divbyzero, status);
158142c2
FB
6254 return packFloat128( zSign, 0x7FFF, 0, 0 );
6255 }
6256 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6257 }
6258 if ( aExp == 0 ) {
6259 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
6260 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6261 }
6262 zExp = aExp - bExp + 0x3FFD;
6263 shortShift128Left(
6264 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
6265 shortShift128Left(
6266 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6267 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
6268 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
6269 ++zExp;
6270 }
6271 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
6272 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
6273 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
bb98fe42 6274 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6275 --zSig0;
6276 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
6277 }
6278 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
6279 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
6280 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
6281 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6282 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6283 --zSig1;
6284 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
6285 }
6286 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6287 }
6288 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
ff32e16e 6289 return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6290
6291}
6292
6293/*----------------------------------------------------------------------------
6294| Returns the remainder of the quadruple-precision floating-point value `a'
6295| with respect to the corresponding value `b'. The operation is performed
6296| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6297*----------------------------------------------------------------------------*/
6298
e5a41ffa 6299float128 float128_rem(float128 a, float128 b, float_status *status)
158142c2 6300{
ed086f3d 6301 flag aSign, zSign;
f4014512 6302 int32_t aExp, bExp, expDiff;
bb98fe42
AF
6303 uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
6304 uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
6305 int64_t sigMean0;
158142c2
FB
6306
6307 aSig1 = extractFloat128Frac1( a );
6308 aSig0 = extractFloat128Frac0( a );
6309 aExp = extractFloat128Exp( a );
6310 aSign = extractFloat128Sign( a );
6311 bSig1 = extractFloat128Frac1( b );
6312 bSig0 = extractFloat128Frac0( b );
6313 bExp = extractFloat128Exp( b );
158142c2
FB
6314 if ( aExp == 0x7FFF ) {
6315 if ( ( aSig0 | aSig1 )
6316 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
ff32e16e 6317 return propagateFloat128NaN(a, b, status);
158142c2
FB
6318 }
6319 goto invalid;
6320 }
6321 if ( bExp == 0x7FFF ) {
ff32e16e
PM
6322 if (bSig0 | bSig1) {
6323 return propagateFloat128NaN(a, b, status);
6324 }
158142c2
FB
6325 return a;
6326 }
6327 if ( bExp == 0 ) {
6328 if ( ( bSig0 | bSig1 ) == 0 ) {
6329 invalid:
ff32e16e 6330 float_raise(float_flag_invalid, status);
af39bc8c 6331 return float128_default_nan(status);
158142c2
FB
6332 }
6333 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
6334 }
6335 if ( aExp == 0 ) {
6336 if ( ( aSig0 | aSig1 ) == 0 ) return a;
6337 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6338 }
6339 expDiff = aExp - bExp;
6340 if ( expDiff < -1 ) return a;
6341 shortShift128Left(
6342 aSig0 | LIT64( 0x0001000000000000 ),
6343 aSig1,
6344 15 - ( expDiff < 0 ),
6345 &aSig0,
6346 &aSig1
6347 );
6348 shortShift128Left(
6349 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
6350 q = le128( bSig0, bSig1, aSig0, aSig1 );
6351 if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
6352 expDiff -= 64;
6353 while ( 0 < expDiff ) {
6354 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6355 q = ( 4 < q ) ? q - 4 : 0;
6356 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6357 shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
6358 shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
6359 sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
6360 expDiff -= 61;
6361 }
6362 if ( -64 < expDiff ) {
6363 q = estimateDiv128To64( aSig0, aSig1, bSig0 );
6364 q = ( 4 < q ) ? q - 4 : 0;
6365 q >>= - expDiff;
6366 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6367 expDiff += 52;
6368 if ( expDiff < 0 ) {
6369 shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
6370 }
6371 else {
6372 shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
6373 }
6374 mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
6375 sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
6376 }
6377 else {
6378 shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
6379 shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
6380 }
6381 do {
6382 alternateASig0 = aSig0;
6383 alternateASig1 = aSig1;
6384 ++q;
6385 sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
bb98fe42 6386 } while ( 0 <= (int64_t) aSig0 );
158142c2 6387 add128(
bb98fe42 6388 aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
158142c2
FB
6389 if ( ( sigMean0 < 0 )
6390 || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
6391 aSig0 = alternateASig0;
6392 aSig1 = alternateASig1;
6393 }
bb98fe42 6394 zSign = ( (int64_t) aSig0 < 0 );
158142c2 6395 if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
ff32e16e
PM
6396 return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - 4, aSig0, aSig1,
6397 status);
158142c2
FB
6398}
6399
6400/*----------------------------------------------------------------------------
6401| Returns the square root of the quadruple-precision floating-point value `a'.
6402| The operation is performed according to the IEC/IEEE Standard for Binary
6403| Floating-Point Arithmetic.
6404*----------------------------------------------------------------------------*/
6405
e5a41ffa 6406float128 float128_sqrt(float128 a, float_status *status)
158142c2
FB
6407{
6408 flag aSign;
f4014512 6409 int32_t aExp, zExp;
bb98fe42
AF
6410 uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
6411 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
158142c2
FB
6412
6413 aSig1 = extractFloat128Frac1( a );
6414 aSig0 = extractFloat128Frac0( a );
6415 aExp = extractFloat128Exp( a );
6416 aSign = extractFloat128Sign( a );
6417 if ( aExp == 0x7FFF ) {
ff32e16e
PM
6418 if (aSig0 | aSig1) {
6419 return propagateFloat128NaN(a, a, status);
6420 }
158142c2
FB
6421 if ( ! aSign ) return a;
6422 goto invalid;
6423 }
6424 if ( aSign ) {
6425 if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
6426 invalid:
ff32e16e 6427 float_raise(float_flag_invalid, status);
af39bc8c 6428 return float128_default_nan(status);
158142c2
FB
6429 }
6430 if ( aExp == 0 ) {
6431 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
6432 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6433 }
6434 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
6435 aSig0 |= LIT64( 0x0001000000000000 );
6436 zSig0 = estimateSqrt32( aExp, aSig0>>17 );
6437 shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
6438 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
6439 doubleZSig0 = zSig0<<1;
6440 mul64To128( zSig0, zSig0, &term0, &term1 );
6441 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
bb98fe42 6442 while ( (int64_t) rem0 < 0 ) {
158142c2
FB
6443 --zSig0;
6444 doubleZSig0 -= 2;
6445 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
6446 }
6447 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
6448 if ( ( zSig1 & 0x1FFF ) <= 5 ) {
6449 if ( zSig1 == 0 ) zSig1 = 1;
6450 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6451 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
6452 mul64To128( zSig1, zSig1, &term2, &term3 );
6453 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
bb98fe42 6454 while ( (int64_t) rem1 < 0 ) {
158142c2
FB
6455 --zSig1;
6456 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
6457 term3 |= 1;
6458 term2 |= doubleZSig0;
6459 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
6460 }
6461 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
6462 }
6463 shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
ff32e16e 6464 return roundAndPackFloat128(0, zExp, zSig0, zSig1, zSig2, status);
158142c2
FB
6465
6466}
6467
6468/*----------------------------------------------------------------------------
6469| Returns 1 if the quadruple-precision floating-point value `a' is equal to
b689362d
AJ
6470| the corresponding value `b', and 0 otherwise. The invalid exception is
6471| raised if either operand is a NaN. Otherwise, the comparison is performed
158142c2
FB
6472| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6473*----------------------------------------------------------------------------*/
6474
e5a41ffa 6475int float128_eq(float128 a, float128 b, float_status *status)
158142c2
FB
6476{
6477
6478 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6479 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6480 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6481 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6482 ) {
ff32e16e 6483 float_raise(float_flag_invalid, status);
158142c2
FB
6484 return 0;
6485 }
6486 return
6487 ( a.low == b.low )
6488 && ( ( a.high == b.high )
6489 || ( ( a.low == 0 )
bb98fe42 6490 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6491 );
6492
6493}
6494
6495/*----------------------------------------------------------------------------
6496| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6497| or equal to the corresponding value `b', and 0 otherwise. The invalid
6498| exception is raised if either operand is a NaN. The comparison is performed
6499| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6500*----------------------------------------------------------------------------*/
6501
e5a41ffa 6502int float128_le(float128 a, float128 b, float_status *status)
158142c2
FB
6503{
6504 flag aSign, bSign;
6505
6506 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6507 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6508 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6509 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6510 ) {
ff32e16e 6511 float_raise(float_flag_invalid, status);
158142c2
FB
6512 return 0;
6513 }
6514 aSign = extractFloat128Sign( a );
6515 bSign = extractFloat128Sign( b );
6516 if ( aSign != bSign ) {
6517 return
6518 aSign
bb98fe42 6519 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6520 == 0 );
6521 }
6522 return
6523 aSign ? le128( b.high, b.low, a.high, a.low )
6524 : le128( a.high, a.low, b.high, b.low );
6525
6526}
6527
6528/*----------------------------------------------------------------------------
6529| Returns 1 if the quadruple-precision floating-point value `a' is less than
f5a64251
AJ
6530| the corresponding value `b', and 0 otherwise. The invalid exception is
6531| raised if either operand is a NaN. The comparison is performed according
6532| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
158142c2
FB
6533*----------------------------------------------------------------------------*/
6534
e5a41ffa 6535int float128_lt(float128 a, float128 b, float_status *status)
158142c2
FB
6536{
6537 flag aSign, bSign;
6538
6539 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6540 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6541 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6542 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6543 ) {
ff32e16e 6544 float_raise(float_flag_invalid, status);
158142c2
FB
6545 return 0;
6546 }
6547 aSign = extractFloat128Sign( a );
6548 bSign = extractFloat128Sign( b );
6549 if ( aSign != bSign ) {
6550 return
6551 aSign
bb98fe42 6552 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6553 != 0 );
6554 }
6555 return
6556 aSign ? lt128( b.high, b.low, a.high, a.low )
6557 : lt128( a.high, a.low, b.high, b.low );
6558
6559}
6560
67b7861d
AJ
6561/*----------------------------------------------------------------------------
6562| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
f5a64251
AJ
6563| be compared, and 0 otherwise. The invalid exception is raised if either
6564| operand is a NaN. The comparison is performed according to the IEC/IEEE
6565| Standard for Binary Floating-Point Arithmetic.
67b7861d
AJ
6566*----------------------------------------------------------------------------*/
6567
e5a41ffa 6568int float128_unordered(float128 a, float128 b, float_status *status)
67b7861d
AJ
6569{
6570 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6571 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6572 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6573 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6574 ) {
ff32e16e 6575 float_raise(float_flag_invalid, status);
67b7861d
AJ
6576 return 1;
6577 }
6578 return 0;
6579}
6580
158142c2
FB
6581/*----------------------------------------------------------------------------
6582| Returns 1 if the quadruple-precision floating-point value `a' is equal to
f5a64251
AJ
6583| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6584| exception. The comparison is performed according to the IEC/IEEE Standard
6585| for Binary Floating-Point Arithmetic.
158142c2
FB
6586*----------------------------------------------------------------------------*/
6587
e5a41ffa 6588int float128_eq_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6589{
6590
6591 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6592 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6593 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6594 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6595 ) {
af39bc8c
AM
6596 if (float128_is_signaling_nan(a, status)
6597 || float128_is_signaling_nan(b, status)) {
ff32e16e 6598 float_raise(float_flag_invalid, status);
b689362d 6599 }
158142c2
FB
6600 return 0;
6601 }
6602 return
6603 ( a.low == b.low )
6604 && ( ( a.high == b.high )
6605 || ( ( a.low == 0 )
bb98fe42 6606 && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
158142c2
FB
6607 );
6608
6609}
6610
6611/*----------------------------------------------------------------------------
6612| Returns 1 if the quadruple-precision floating-point value `a' is less than
6613| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6614| cause an exception. Otherwise, the comparison is performed according to the
6615| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6616*----------------------------------------------------------------------------*/
6617
e5a41ffa 6618int float128_le_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6619{
6620 flag aSign, bSign;
6621
6622 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6623 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6624 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6625 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6626 ) {
af39bc8c
AM
6627 if (float128_is_signaling_nan(a, status)
6628 || float128_is_signaling_nan(b, status)) {
ff32e16e 6629 float_raise(float_flag_invalid, status);
158142c2
FB
6630 }
6631 return 0;
6632 }
6633 aSign = extractFloat128Sign( a );
6634 bSign = extractFloat128Sign( b );
6635 if ( aSign != bSign ) {
6636 return
6637 aSign
bb98fe42 6638 || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6639 == 0 );
6640 }
6641 return
6642 aSign ? le128( b.high, b.low, a.high, a.low )
6643 : le128( a.high, a.low, b.high, b.low );
6644
6645}
6646
6647/*----------------------------------------------------------------------------
6648| Returns 1 if the quadruple-precision floating-point value `a' is less than
6649| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
6650| exception. Otherwise, the comparison is performed according to the IEC/IEEE
6651| Standard for Binary Floating-Point Arithmetic.
6652*----------------------------------------------------------------------------*/
6653
e5a41ffa 6654int float128_lt_quiet(float128 a, float128 b, float_status *status)
158142c2
FB
6655{
6656 flag aSign, bSign;
6657
6658 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6659 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6660 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6661 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6662 ) {
af39bc8c
AM
6663 if (float128_is_signaling_nan(a, status)
6664 || float128_is_signaling_nan(b, status)) {
ff32e16e 6665 float_raise(float_flag_invalid, status);
158142c2
FB
6666 }
6667 return 0;
6668 }
6669 aSign = extractFloat128Sign( a );
6670 bSign = extractFloat128Sign( b );
6671 if ( aSign != bSign ) {
6672 return
6673 aSign
bb98fe42 6674 && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
158142c2
FB
6675 != 0 );
6676 }
6677 return
6678 aSign ? lt128( b.high, b.low, a.high, a.low )
6679 : lt128( a.high, a.low, b.high, b.low );
6680
6681}
6682
67b7861d
AJ
6683/*----------------------------------------------------------------------------
6684| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
6685| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
6686| comparison is performed according to the IEC/IEEE Standard for Binary
6687| Floating-Point Arithmetic.
6688*----------------------------------------------------------------------------*/
6689
e5a41ffa 6690int float128_unordered_quiet(float128 a, float128 b, float_status *status)
67b7861d
AJ
6691{
6692 if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
6693 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
6694 || ( ( extractFloat128Exp( b ) == 0x7FFF )
6695 && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
6696 ) {
af39bc8c
AM
6697 if (float128_is_signaling_nan(a, status)
6698 || float128_is_signaling_nan(b, status)) {
ff32e16e 6699 float_raise(float_flag_invalid, status);
67b7861d
AJ
6700 }
6701 return 1;
6702 }
6703 return 0;
6704}
6705
e5a41ffa
PM
6706static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
6707 int is_quiet, float_status *status)
f6714d36
AJ
6708{
6709 flag aSign, bSign;
6710
d1eb8f2a
AD
6711 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
6712 float_raise(float_flag_invalid, status);
6713 return float_relation_unordered;
6714 }
f6714d36
AJ
6715 if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
6716 ( extractFloatx80Frac( a )<<1 ) ) ||
6717 ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
6718 ( extractFloatx80Frac( b )<<1 ) )) {
6719 if (!is_quiet ||
af39bc8c
AM
6720 floatx80_is_signaling_nan(a, status) ||
6721 floatx80_is_signaling_nan(b, status)) {
ff32e16e 6722 float_raise(float_flag_invalid, status);
f6714d36
AJ
6723 }
6724 return float_relation_unordered;
6725 }
6726 aSign = extractFloatx80Sign( a );
6727 bSign = extractFloatx80Sign( b );
6728 if ( aSign != bSign ) {
6729
6730 if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
6731 ( ( a.low | b.low ) == 0 ) ) {
6732 /* zero case */
6733 return float_relation_equal;
6734 } else {
6735 return 1 - (2 * aSign);
6736 }
6737 } else {
6738 if (a.low == b.low && a.high == b.high) {
6739 return float_relation_equal;
6740 } else {
6741 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6742 }
6743 }
6744}
6745
e5a41ffa 6746int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
f6714d36 6747{
ff32e16e 6748 return floatx80_compare_internal(a, b, 0, status);
f6714d36
AJ
6749}
6750
e5a41ffa 6751int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
f6714d36 6752{
ff32e16e 6753 return floatx80_compare_internal(a, b, 1, status);
f6714d36
AJ
6754}
6755
e5a41ffa
PM
6756static inline int float128_compare_internal(float128 a, float128 b,
6757 int is_quiet, float_status *status)
1f587329
BS
6758{
6759 flag aSign, bSign;
6760
6761 if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
6762 ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
6763 ( ( extractFloat128Exp( b ) == 0x7fff ) &&
6764 ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
6765 if (!is_quiet ||
af39bc8c
AM
6766 float128_is_signaling_nan(a, status) ||
6767 float128_is_signaling_nan(b, status)) {
ff32e16e 6768 float_raise(float_flag_invalid, status);
1f587329
BS
6769 }
6770 return float_relation_unordered;
6771 }
6772 aSign = extractFloat128Sign( a );
6773 bSign = extractFloat128Sign( b );
6774 if ( aSign != bSign ) {
6775 if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
6776 /* zero case */
6777 return float_relation_equal;
6778 } else {
6779 return 1 - (2 * aSign);
6780 }
6781 } else {
6782 if (a.low == b.low && a.high == b.high) {
6783 return float_relation_equal;
6784 } else {
6785 return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
6786 }
6787 }
6788}
6789
e5a41ffa 6790int float128_compare(float128 a, float128 b, float_status *status)
1f587329 6791{
ff32e16e 6792 return float128_compare_internal(a, b, 0, status);
1f587329
BS
6793}
6794
e5a41ffa 6795int float128_compare_quiet(float128 a, float128 b, float_status *status)
1f587329 6796{
ff32e16e 6797 return float128_compare_internal(a, b, 1, status);
1f587329
BS
6798}
6799
e5a41ffa 6800floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
9ee6e8bb
PB
6801{
6802 flag aSign;
326b9e98 6803 int32_t aExp;
bb98fe42 6804 uint64_t aSig;
9ee6e8bb 6805
d1eb8f2a
AD
6806 if (floatx80_invalid_encoding(a)) {
6807 float_raise(float_flag_invalid, status);
6808 return floatx80_default_nan(status);
6809 }
9ee6e8bb
PB
6810 aSig = extractFloatx80Frac( a );
6811 aExp = extractFloatx80Exp( a );
6812 aSign = extractFloatx80Sign( a );
6813
326b9e98
AJ
6814 if ( aExp == 0x7FFF ) {
6815 if ( aSig<<1 ) {
ff32e16e 6816 return propagateFloatx80NaN(a, a, status);
326b9e98 6817 }
9ee6e8bb
PB
6818 return a;
6819 }
326b9e98 6820
3c85c37f
PM
6821 if (aExp == 0) {
6822 if (aSig == 0) {
6823 return a;
6824 }
6825 aExp++;
6826 }
69397542 6827
326b9e98
AJ
6828 if (n > 0x10000) {
6829 n = 0x10000;
6830 } else if (n < -0x10000) {
6831 n = -0x10000;
6832 }
6833
9ee6e8bb 6834 aExp += n;
a2f2d288
PM
6835 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
6836 aSign, aExp, aSig, 0, status);
9ee6e8bb 6837}
9ee6e8bb 6838
e5a41ffa 6839float128 float128_scalbn(float128 a, int n, float_status *status)
9ee6e8bb
PB
6840{
6841 flag aSign;
326b9e98 6842 int32_t aExp;
bb98fe42 6843 uint64_t aSig0, aSig1;
9ee6e8bb
PB
6844
6845 aSig1 = extractFloat128Frac1( a );
6846 aSig0 = extractFloat128Frac0( a );
6847 aExp = extractFloat128Exp( a );
6848 aSign = extractFloat128Sign( a );
6849 if ( aExp == 0x7FFF ) {
326b9e98 6850 if ( aSig0 | aSig1 ) {
ff32e16e 6851 return propagateFloat128NaN(a, a, status);
326b9e98 6852 }
9ee6e8bb
PB
6853 return a;
6854 }
3c85c37f 6855 if (aExp != 0) {
69397542 6856 aSig0 |= LIT64( 0x0001000000000000 );
3c85c37f 6857 } else if (aSig0 == 0 && aSig1 == 0) {
69397542 6858 return a;
3c85c37f
PM
6859 } else {
6860 aExp++;
6861 }
69397542 6862
326b9e98
AJ
6863 if (n > 0x10000) {
6864 n = 0x10000;
6865 } else if (n < -0x10000) {
6866 n = -0x10000;
6867 }
6868
69397542
PB
6869 aExp += n - 1;
6870 return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
ff32e16e 6871 , status);
9ee6e8bb
PB
6872
6873}
This page took 1.709274 seconds and 4 git commands to generate.